Catch broad exception in methods used in FixedIntervalLoopingCall
Unlike other places where it might make sense to catch specific exceptions, methods that are used to check L3 and DHCP agents liveness via FixedIntervalLoopingCall should never allow exceptions to leak to calling method and interrupt the loop. Further improvement of FixedIntervalLoopingCall might be needed, but for the sake of easy backporting it makes sense to fix the issue in neutron before pushing refactoring to 3rd-party library. Change-Id: I6a61e99a6f4e445e26ea4a9923b47e35559e5703 Closes-Bug: #1458119
This commit is contained in:
parent
dee9ffc696
commit
ae8c1c5f80
|
@ -270,40 +270,47 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
|
|||
agents_db.Agent.admin_state_up))
|
||||
dhcp_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_DHCP)
|
||||
|
||||
for binding in self._filter_bindings(context, down_bindings):
|
||||
LOG.warn(_LW("Removing network %(network)s from agent %(agent)s "
|
||||
"because the agent did not report to the server in "
|
||||
"the last %(dead_time)s seconds."),
|
||||
{'network': binding.network_id,
|
||||
'agent': binding.dhcp_agent_id,
|
||||
'dead_time': agent_dead_limit})
|
||||
# save binding object to avoid ObjectDeletedError
|
||||
# in case binding is concurrently deleted from the DB
|
||||
saved_binding = {'net': binding.network_id,
|
||||
'agent': binding.dhcp_agent_id}
|
||||
try:
|
||||
# do not notify agent if it considered dead
|
||||
# so when it is restarted it won't see network delete
|
||||
# notifications on its queue
|
||||
self.remove_network_from_dhcp_agent(context,
|
||||
binding.dhcp_agent_id,
|
||||
binding.network_id,
|
||||
notify=False)
|
||||
except dhcpagentscheduler.NetworkNotHostedByDhcpAgent:
|
||||
# measures against concurrent operation
|
||||
LOG.debug("Network %(net)s already removed from DHCP agent "
|
||||
"%(agent)s",
|
||||
saved_binding)
|
||||
# still continue and allow concurrent scheduling attempt
|
||||
except Exception:
|
||||
LOG.exception(_LE("Unexpected exception occurred while "
|
||||
"removing network %(net)s from agent "
|
||||
"%(agent)s"),
|
||||
try:
|
||||
for binding in self._filter_bindings(context, down_bindings):
|
||||
LOG.warn(_LW("Removing network %(network)s from agent "
|
||||
"%(agent)s because the agent did not report "
|
||||
"to the server in the last %(dead_time)s "
|
||||
"seconds."),
|
||||
{'network': binding.network_id,
|
||||
'agent': binding.dhcp_agent_id,
|
||||
'dead_time': agent_dead_limit})
|
||||
# save binding object to avoid ObjectDeletedError
|
||||
# in case binding is concurrently deleted from the DB
|
||||
saved_binding = {'net': binding.network_id,
|
||||
'agent': binding.dhcp_agent_id}
|
||||
try:
|
||||
# do not notify agent if it considered dead
|
||||
# so when it is restarted it won't see network delete
|
||||
# notifications on its queue
|
||||
self.remove_network_from_dhcp_agent(context,
|
||||
binding.dhcp_agent_id,
|
||||
binding.network_id,
|
||||
notify=False)
|
||||
except dhcpagentscheduler.NetworkNotHostedByDhcpAgent:
|
||||
# measures against concurrent operation
|
||||
LOG.debug("Network %(net)s already removed from DHCP "
|
||||
"agent %(agent)s",
|
||||
saved_binding)
|
||||
# still continue and allow concurrent scheduling attempt
|
||||
except Exception:
|
||||
LOG.exception(_LE("Unexpected exception occurred while "
|
||||
"removing network %(net)s from agent "
|
||||
"%(agent)s"),
|
||||
saved_binding)
|
||||
|
||||
if cfg.CONF.network_auto_schedule:
|
||||
self._schedule_network(
|
||||
context, saved_binding['net'], dhcp_notifier)
|
||||
if cfg.CONF.network_auto_schedule:
|
||||
self._schedule_network(
|
||||
context, saved_binding['net'], dhcp_notifier)
|
||||
except Exception:
|
||||
# we want to be thorough and catch whatever is raised
|
||||
# to avoid loop abortion
|
||||
LOG.exception(_LE("Exception encountered during network "
|
||||
"rescheduling"))
|
||||
|
||||
def get_dhcp_agents_hosting_networks(
|
||||
self, context, network_ids, active=None, admin_state_up=None):
|
||||
|
|
|
@ -116,9 +116,9 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
|
|||
# so one broken one doesn't stop the iteration.
|
||||
LOG.exception(_LE("Failed to reschedule router %s"),
|
||||
binding.router_id)
|
||||
except db_exc.DBError:
|
||||
# Catch DB errors here so a transient DB connectivity issue
|
||||
# doesn't stop the loopingcall.
|
||||
except Exception:
|
||||
# we want to be thorough and catch whatever is raised
|
||||
# to avoid loop abortion
|
||||
LOG.exception(_LE("Exception encountered during router "
|
||||
"rescheduling."))
|
||||
|
||||
|
|
|
@ -674,17 +674,14 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
|
|||
db_exc.DBError(), oslo_messaging.RemoteError(),
|
||||
l3agentscheduler.RouterReschedulingFailed(router_id='f',
|
||||
agent_id='f'),
|
||||
ValueError('this raises')
|
||||
ValueError('this raises'),
|
||||
Exception()
|
||||
]).start()
|
||||
# these first three should not raise any errors
|
||||
self._take_down_agent_and_run_reschedule(L3_HOSTA) # DBError
|
||||
self._take_down_agent_and_run_reschedule(L3_HOSTA) # RemoteError
|
||||
self._take_down_agent_and_run_reschedule(L3_HOSTA) # schedule err
|
||||
|
||||
# ValueError is not caught so it should raise
|
||||
self.assertRaises(ValueError,
|
||||
self._take_down_agent_and_run_reschedule,
|
||||
L3_HOSTA)
|
||||
self._take_down_agent_and_run_reschedule(L3_HOSTA) # Value error
|
||||
self._take_down_agent_and_run_reschedule(L3_HOSTA) # Exception
|
||||
|
||||
def test_router_rescheduler_iterates_after_reschedule_failure(self):
|
||||
plugin = manager.NeutronManager.get_service_plugins().get(
|
||||
|
|
|
@ -248,6 +248,15 @@ class TestNetworksFailover(TestDhcpSchedulerBaseTestCase,
|
|||
self.assertIn('foo3', res_ids)
|
||||
self.assertIn('foo4', res_ids)
|
||||
|
||||
def test_reschedule_network_from_down_agent_failed_on_unexpected(self):
|
||||
agents = self._create_and_set_agents_down(['host-a'], 1)
|
||||
self._test_schedule_bind_network([agents[0]], self.network_id)
|
||||
with mock.patch.object(
|
||||
self, '_filter_bindings',
|
||||
side_effect=Exception()):
|
||||
# just make sure that no exception is raised
|
||||
self.remove_networks_from_down_agents()
|
||||
|
||||
|
||||
class DHCPAgentWeightSchedulerTestCase(TestDhcpSchedulerBaseTestCase):
|
||||
"""Unit test scenarios for WeightScheduler.schedule."""
|
||||
|
|
Loading…
Reference in New Issue