diff --git a/neutron/agent/l3/agent.py b/neutron/agent/l3/agent.py index b1154f30527..e2802e10768 100644 --- a/neutron/agent/l3/agent.py +++ b/neutron/agent/l3/agent.py @@ -492,18 +492,22 @@ class L3NATAgent(ha.AgentMixin, ri.initialize(self.process_monitor) except Exception: with excutils.save_and_reraise_exception(): - del self.router_info[router_id] LOG.exception('Error while initializing router %s', router_id) - self.namespaces_manager.ensure_router_cleanup(router_id) - try: - ri.delete() - except Exception: - LOG.exception('Error while deleting router %s', - router_id) + self._cleanup_failed_router(router_id, delete_router_info=True) self._resize_process_pool() + def _cleanup_failed_router(self, router_id, delete_router_info): + ri = self.router_info.pop(router_id) + self.namespaces_manager.ensure_router_cleanup(router_id) + try: + if delete_router_info: + ri.delete() + except Exception: + LOG.exception('Error while deleting router %s', + router_id) + def _safe_router_removed(self, router_id): """Try to delete a router and return True if successful.""" # The l3_ext_manager API expects a router dict, look it up @@ -632,8 +636,22 @@ class L3NATAgent(ha.AgentMixin, self._router_added(router['id'], router) ri = self.router_info[router['id']] ri.router = router - ri.process() + try: + ri.process() + except Exception: + with excutils.save_and_reraise_exception(): + LOG.exception('Error while processing router %s', + router['id']) + # NOTE(slaweq): deleting of the router info in the + # _cleanup_failed_router is avoided as in case of error, + # processing of the router will be retried on next call and + # that may lead to some race conditions e.g. with + # configuration of the DVR router's FIP gateway + self._cleanup_failed_router(router['id'], + delete_router_info=False) + registry.notify(resources.ROUTER, events.AFTER_CREATE, self, router=ri) + self.l3_ext_manager.add_router(self.context, router) def _process_updated_router(self, router): diff --git a/neutron/tests/unit/agent/l3/test_agent.py b/neutron/tests/unit/agent/l3/test_agent.py index b3a4d100fba..f1db475f399 100644 --- a/neutron/tests/unit/agent/l3/test_agent.py +++ b/neutron/tests/unit/agent/l3/test_agent.py @@ -2732,6 +2732,23 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework): def test_disable_metadata_proxy_spawn(self): self._configure_metadata_proxy(enableflag=False) + def test__process_router_added_router_not_in_cache_after_failure(self): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + router_id = _uuid() + router = {'id': router_id} + ri_mock = mock.Mock() + + class TestRouterProcessingException(Exception): + pass + + with mock.patch.object(agent, "_router_added"), \ + mock.patch.dict(agent.router_info, {router_id: ri_mock}): + ri_mock.process.side_effect = TestRouterProcessingException() + self.assertRaises( + TestRouterProcessingException, + agent._process_added_router, router) + self.assertNotIn(router_id, agent.router_info) + def _test_process_routers_update_rpc_timeout(self, ext_net_call=False, ext_net_call_failed=False): agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)