Catch broad exception in methods used in FixedIntervalLoopingCall
Unlike other places where it might make sense to catch specific exceptions, methods that are used to check L3 and DHCP agents liveness via FixedIntervalLoopingCall should never allow exceptions to leak to calling method and interrupt the loop. Further improvement of FixedIntervalLoopingCall might be needed, but for the sake of easy backporting it makes sense to fix the issue in neutron before pushing refactoring to 3rd-party library. Change-Id: I6a61e99a6f4e445e26ea4a9923b47e35559e5703 Closes-Bug: #1458119
This commit is contained in:
parent
dee9ffc696
commit
ae8c1c5f80
|
@ -270,10 +270,12 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
|
||||||
agents_db.Agent.admin_state_up))
|
agents_db.Agent.admin_state_up))
|
||||||
dhcp_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_DHCP)
|
dhcp_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_DHCP)
|
||||||
|
|
||||||
|
try:
|
||||||
for binding in self._filter_bindings(context, down_bindings):
|
for binding in self._filter_bindings(context, down_bindings):
|
||||||
LOG.warn(_LW("Removing network %(network)s from agent %(agent)s "
|
LOG.warn(_LW("Removing network %(network)s from agent "
|
||||||
"because the agent did not report to the server in "
|
"%(agent)s because the agent did not report "
|
||||||
"the last %(dead_time)s seconds."),
|
"to the server in the last %(dead_time)s "
|
||||||
|
"seconds."),
|
||||||
{'network': binding.network_id,
|
{'network': binding.network_id,
|
||||||
'agent': binding.dhcp_agent_id,
|
'agent': binding.dhcp_agent_id,
|
||||||
'dead_time': agent_dead_limit})
|
'dead_time': agent_dead_limit})
|
||||||
|
@ -291,8 +293,8 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
|
||||||
notify=False)
|
notify=False)
|
||||||
except dhcpagentscheduler.NetworkNotHostedByDhcpAgent:
|
except dhcpagentscheduler.NetworkNotHostedByDhcpAgent:
|
||||||
# measures against concurrent operation
|
# measures against concurrent operation
|
||||||
LOG.debug("Network %(net)s already removed from DHCP agent "
|
LOG.debug("Network %(net)s already removed from DHCP "
|
||||||
"%(agent)s",
|
"agent %(agent)s",
|
||||||
saved_binding)
|
saved_binding)
|
||||||
# still continue and allow concurrent scheduling attempt
|
# still continue and allow concurrent scheduling attempt
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -304,6 +306,11 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
|
||||||
if cfg.CONF.network_auto_schedule:
|
if cfg.CONF.network_auto_schedule:
|
||||||
self._schedule_network(
|
self._schedule_network(
|
||||||
context, saved_binding['net'], dhcp_notifier)
|
context, saved_binding['net'], dhcp_notifier)
|
||||||
|
except Exception:
|
||||||
|
# we want to be thorough and catch whatever is raised
|
||||||
|
# to avoid loop abortion
|
||||||
|
LOG.exception(_LE("Exception encountered during network "
|
||||||
|
"rescheduling"))
|
||||||
|
|
||||||
def get_dhcp_agents_hosting_networks(
|
def get_dhcp_agents_hosting_networks(
|
||||||
self, context, network_ids, active=None, admin_state_up=None):
|
self, context, network_ids, active=None, admin_state_up=None):
|
||||||
|
|
|
@ -116,9 +116,9 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
|
||||||
# so one broken one doesn't stop the iteration.
|
# so one broken one doesn't stop the iteration.
|
||||||
LOG.exception(_LE("Failed to reschedule router %s"),
|
LOG.exception(_LE("Failed to reschedule router %s"),
|
||||||
binding.router_id)
|
binding.router_id)
|
||||||
except db_exc.DBError:
|
except Exception:
|
||||||
# Catch DB errors here so a transient DB connectivity issue
|
# we want to be thorough and catch whatever is raised
|
||||||
# doesn't stop the loopingcall.
|
# to avoid loop abortion
|
||||||
LOG.exception(_LE("Exception encountered during router "
|
LOG.exception(_LE("Exception encountered during router "
|
||||||
"rescheduling."))
|
"rescheduling."))
|
||||||
|
|
||||||
|
|
|
@ -674,17 +674,14 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
|
||||||
db_exc.DBError(), oslo_messaging.RemoteError(),
|
db_exc.DBError(), oslo_messaging.RemoteError(),
|
||||||
l3agentscheduler.RouterReschedulingFailed(router_id='f',
|
l3agentscheduler.RouterReschedulingFailed(router_id='f',
|
||||||
agent_id='f'),
|
agent_id='f'),
|
||||||
ValueError('this raises')
|
ValueError('this raises'),
|
||||||
|
Exception()
|
||||||
]).start()
|
]).start()
|
||||||
# these first three should not raise any errors
|
|
||||||
self._take_down_agent_and_run_reschedule(L3_HOSTA) # DBError
|
self._take_down_agent_and_run_reschedule(L3_HOSTA) # DBError
|
||||||
self._take_down_agent_and_run_reschedule(L3_HOSTA) # RemoteError
|
self._take_down_agent_and_run_reschedule(L3_HOSTA) # RemoteError
|
||||||
self._take_down_agent_and_run_reschedule(L3_HOSTA) # schedule err
|
self._take_down_agent_and_run_reschedule(L3_HOSTA) # schedule err
|
||||||
|
self._take_down_agent_and_run_reschedule(L3_HOSTA) # Value error
|
||||||
# ValueError is not caught so it should raise
|
self._take_down_agent_and_run_reschedule(L3_HOSTA) # Exception
|
||||||
self.assertRaises(ValueError,
|
|
||||||
self._take_down_agent_and_run_reschedule,
|
|
||||||
L3_HOSTA)
|
|
||||||
|
|
||||||
def test_router_rescheduler_iterates_after_reschedule_failure(self):
|
def test_router_rescheduler_iterates_after_reschedule_failure(self):
|
||||||
plugin = manager.NeutronManager.get_service_plugins().get(
|
plugin = manager.NeutronManager.get_service_plugins().get(
|
||||||
|
|
|
@ -248,6 +248,15 @@ class TestNetworksFailover(TestDhcpSchedulerBaseTestCase,
|
||||||
self.assertIn('foo3', res_ids)
|
self.assertIn('foo3', res_ids)
|
||||||
self.assertIn('foo4', res_ids)
|
self.assertIn('foo4', res_ids)
|
||||||
|
|
||||||
|
def test_reschedule_network_from_down_agent_failed_on_unexpected(self):
|
||||||
|
agents = self._create_and_set_agents_down(['host-a'], 1)
|
||||||
|
self._test_schedule_bind_network([agents[0]], self.network_id)
|
||||||
|
with mock.patch.object(
|
||||||
|
self, '_filter_bindings',
|
||||||
|
side_effect=Exception()):
|
||||||
|
# just make sure that no exception is raised
|
||||||
|
self.remove_networks_from_down_agents()
|
||||||
|
|
||||||
|
|
||||||
class DHCPAgentWeightSchedulerTestCase(TestDhcpSchedulerBaseTestCase):
|
class DHCPAgentWeightSchedulerTestCase(TestDhcpSchedulerBaseTestCase):
|
||||||
"""Unit test scenarios for WeightScheduler.schedule."""
|
"""Unit test scenarios for WeightScheduler.schedule."""
|
||||||
|
|
Loading…
Reference in New Issue