Merge "Cleanup router for which processing added router failed" into stable/train

This commit is contained in:
Zuul 2021-12-14 18:29:36 +00:00 committed by Gerrit Code Review
commit bbfc806e28
2 changed files with 43 additions and 8 deletions

View File

@ -484,18 +484,22 @@ class L3NATAgent(ha.AgentMixin,
ri.initialize(self.process_monitor) ri.initialize(self.process_monitor)
except Exception: except Exception:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
del self.router_info[router_id]
LOG.exception('Error while initializing router %s', LOG.exception('Error while initializing router %s',
router_id) router_id)
self.namespaces_manager.ensure_router_cleanup(router_id) self._cleanup_failed_router(router_id, delete_router_info=True)
try:
ri.delete()
except Exception:
LOG.exception('Error while deleting router %s',
router_id)
self._resize_process_pool() self._resize_process_pool()
def _cleanup_failed_router(self, router_id, delete_router_info):
ri = self.router_info.pop(router_id)
self.namespaces_manager.ensure_router_cleanup(router_id)
try:
if delete_router_info:
ri.delete()
except Exception:
LOG.exception('Error while deleting router %s',
router_id)
def _safe_router_removed(self, router_id): def _safe_router_removed(self, router_id):
"""Try to delete a router and return True if successful.""" """Try to delete a router and return True if successful."""
# The l3_ext_manager API expects a router dict, look it up # The l3_ext_manager API expects a router dict, look it up
@ -624,8 +628,22 @@ class L3NATAgent(ha.AgentMixin,
self._router_added(router['id'], router) self._router_added(router['id'], router)
ri = self.router_info[router['id']] ri = self.router_info[router['id']]
ri.router = router ri.router = router
ri.process() try:
ri.process()
except Exception:
with excutils.save_and_reraise_exception():
LOG.exception('Error while processing router %s',
router['id'])
# NOTE(slaweq): deleting of the router info in the
# _cleanup_failed_router is avoided as in case of error,
# processing of the router will be retried on next call and
# that may lead to some race conditions e.g. with
# configuration of the DVR router's FIP gateway
self._cleanup_failed_router(router['id'],
delete_router_info=False)
registry.notify(resources.ROUTER, events.AFTER_CREATE, self, router=ri) registry.notify(resources.ROUTER, events.AFTER_CREATE, self, router=ri)
self.l3_ext_manager.add_router(self.context, router) self.l3_ext_manager.add_router(self.context, router)
def _process_updated_router(self, router): def _process_updated_router(self, router):

View File

@ -2728,6 +2728,23 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
def test_disable_metadata_proxy_spawn(self): def test_disable_metadata_proxy_spawn(self):
self._configure_metadata_proxy(enableflag=False) self._configure_metadata_proxy(enableflag=False)
def test__process_router_added_router_not_in_cache_after_failure(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router_id = _uuid()
router = {'id': router_id}
ri_mock = mock.Mock()
class TestRouterProcessingException(Exception):
pass
with mock.patch.object(agent, "_router_added"), \
mock.patch.dict(agent.router_info, {router_id: ri_mock}):
ri_mock.process.side_effect = TestRouterProcessingException()
self.assertRaises(
TestRouterProcessingException,
agent._process_added_router, router)
self.assertNotIn(router_id, agent.router_info)
def _test_process_routers_update_rpc_timeout(self, ext_net_call=False, def _test_process_routers_update_rpc_timeout(self, ext_net_call=False,
ext_net_call_failed=False): ext_net_call_failed=False):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)