From 9efa1fdeed86d249b2d3dde987a1fb98290140f0 Mon Sep 17 00:00:00 2001 From: Oleg Bondarev Date: Thu, 11 Jun 2015 15:40:33 +0300 Subject: [PATCH] l3 agent: do router cleanup for unknown routers The patch adds cleanup on router delete for routers which are unknown to agent. This should cover the case when router is deleted during resync on agent init. Functional tests were updated and now handle 3 cases for l3 sync: - no routers were deleted during agent downtime, - some routers were deleted during agent downtime - some routers were deleted during agent resync Closes-Bug: #1464238 Change-Id: Id98111849fa88d6807f757864187b059c491aaac --- neutron/agent/l3/agent.py | 3 +- neutron/agent/l3/namespace_manager.py | 41 +++++---- .../tests/functional/agent/test_l3_agent.py | 89 +++++++++++++------ .../unit/agent/l3/test_namespace_manager.py | 38 +++++--- 4 files changed, 113 insertions(+), 58 deletions(-) diff --git a/neutron/agent/l3/agent.py b/neutron/agent/l3/agent.py index d9e4db8de21..8e54d9617ec 100644 --- a/neutron/agent/l3/agent.py +++ b/neutron/agent/l3/agent.py @@ -335,7 +335,8 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, ri = self.router_info.get(router_id) if ri is None: LOG.warn(_LW("Info for router %s was not found. " - "Skipping router removal"), router_id) + "Performing router cleanup"), router_id) + self.namespaces_manager.ensure_router_cleanup(router_id) return registry.notify(resources.ROUTER, events.BEFORE_DELETE, diff --git a/neutron/agent/l3/namespace_manager.py b/neutron/agent/l3/namespace_manager.py index e7d029fcdca..51464e4e5bd 100644 --- a/neutron/agent/l3/namespace_manager.py +++ b/neutron/agent/l3/namespace_manager.py @@ -81,24 +81,7 @@ class NamespaceManager(object): _ns_prefix, ns_id = self.get_prefix_and_id(ns) if ns_id in self._ids_to_keep: continue - if _ns_prefix == namespaces.NS_PREFIX: - ns = namespaces.RouterNamespace(ns_id, - self.agent_conf, - self.driver, - use_ipv6=False) - else: - ns = dvr_snat_ns.SnatNamespace(ns_id, - self.agent_conf, - self.driver, - use_ipv6=False) - try: - if self.metadata_driver: - # cleanup stale metadata proxy processes first - self.metadata_driver.destroy_monitored_metadata_proxy( - self.process_monitor, ns_id, self.agent_conf) - ns.delete() - except RuntimeError: - LOG.exception(_LE('Failed to destroy stale namespace %s'), ns) + self._cleanup(_ns_prefix, ns_id) return True @@ -131,3 +114,25 @@ class NamespaceManager(object): LOG.exception(_LE('RuntimeError in obtaining namespace list for ' 'namespace cleanup.')) return set() + + def ensure_router_cleanup(self, router_id): + """Performs cleanup for a router""" + for ns in self.list_all(): + if ns.endswith(router_id): + ns_prefix, ns_id = self.get_prefix_and_id(ns) + self._cleanup(ns_prefix, ns_id) + + def _cleanup(self, ns_prefix, ns_id): + if ns_prefix == namespaces.NS_PREFIX: + ns_class = namespaces.RouterNamespace + else: + ns_class = dvr_snat_ns.SnatNamespace + ns = ns_class(ns_id, self.agent_conf, self.driver, use_ipv6=False) + try: + if self.metadata_driver: + # cleanup stale metadata proxy processes first + self.metadata_driver.destroy_monitored_metadata_proxy( + self.process_monitor, ns_id, self.agent_conf) + ns.delete() + except RuntimeError: + LOG.exception(_LE('Failed to destroy stale namespace %s'), ns) diff --git a/neutron/tests/functional/agent/test_l3_agent.py b/neutron/tests/functional/agent/test_l3_agent.py index 7788654bf2c..4c2fd6ea1e2 100644 --- a/neutron/tests/functional/agent/test_l3_agent.py +++ b/neutron/tests/functional/agent/test_l3_agent.py @@ -497,26 +497,24 @@ class L3AgentTestCase(L3AgentTestFramework): (new_external_device_ip, external_device_name), new_config) - def test_periodic_sync_routers_task(self): - routers_to_keep = [] - routers_to_delete = [] + def _test_periodic_sync_routers_task(self, + routers_to_keep, + routers_deleted, + routers_deleted_during_resync): ns_names_to_retrieve = set() - routers_info_to_delete = [] - for i in range(2): - routers_to_keep.append(self.generate_router_info(False)) - ri = self.manage_router(self.agent, routers_to_keep[i]) + deleted_routers_info = [] + for r in routers_to_keep: + ri = self.manage_router(self.agent, r) ns_names_to_retrieve.add(ri.ns_name) - for i in range(2): - routers_to_delete.append(self.generate_router_info(False)) - ri = self.manage_router(self.agent, routers_to_delete[i]) - routers_info_to_delete.append(ri) + for r in routers_deleted + routers_deleted_during_resync: + ri = self.manage_router(self.agent, r) + deleted_routers_info.append(ri) ns_names_to_retrieve.add(ri.ns_name) - # Mock the plugin RPC API to Simulate a situation where the agent - # was handling the 4 routers created above, it went down and after - # starting up again, two of the routers were deleted via the API - self.mock_plugin_api.get_routers.return_value = routers_to_keep - # also clear agent router_info as it will be after restart + mocked_get_routers = self.mock_plugin_api.get_routers + mocked_get_routers.return_value = (routers_to_keep + + routers_deleted_during_resync) + # clear agent router_info as it will be after restart self.agent.router_info = {} # Synchonize the agent with the plug-in @@ -533,23 +531,58 @@ class L3AgentTestCase(L3AgentTestFramework): # Plug external_gateway_info in the routers that are not going to be # deleted by the agent when it processes the updates. Otherwise, # _process_router_if_compatible in the agent fails - for i in range(2): - routers_to_keep[i]['external_gateway_info'] = {'network_id': - external_network_id} + for r in routers_to_keep: + r['external_gateway_info'] = {'network_id': external_network_id} - # Have the agent process the update from the plug-in and verify - # expected behavior - for _ in routers_to_keep: + # while sync updates are still in the queue, higher priority + # router_deleted events may be added there as well + for r in routers_deleted_during_resync: + self.agent.router_deleted(self.agent.context, r['id']) + + # make sure all events are processed + while not self.agent._queue._queue.empty(): self.agent._process_router_update() - for i in range(2): - self.assertIn(routers_to_keep[i]['id'], self.agent.router_info) + for r in routers_to_keep: + self.assertIn(r['id'], self.agent.router_info) self.assertTrue(self._namespace_exists(namespaces.NS_PREFIX + - routers_to_keep[i]['id'])) - for i in range(2): - self.assertNotIn(routers_info_to_delete[i].router_id, + r['id'])) + for ri in deleted_routers_info: + self.assertNotIn(ri.router_id, self.agent.router_info) - self._assert_router_does_not_exist(routers_info_to_delete[i]) + self._assert_router_does_not_exist(ri) + + def test_periodic_sync_routers_task(self): + routers_to_keep = [] + for i in range(2): + routers_to_keep.append(self.generate_router_info(False)) + self._test_periodic_sync_routers_task(routers_to_keep, + routers_deleted=[], + routers_deleted_during_resync=[]) + + def test_periodic_sync_routers_task_routers_deleted_while_agent_down(self): + routers_to_keep = [] + routers_deleted = [] + for i in range(2): + routers_to_keep.append(self.generate_router_info(False)) + for i in range(2): + routers_deleted.append(self.generate_router_info(False)) + self._test_periodic_sync_routers_task(routers_to_keep, + routers_deleted, + routers_deleted_during_resync=[]) + + def test_periodic_sync_routers_task_routers_deleted_while_agent_sync(self): + routers_to_keep = [] + routers_deleted_during_resync = [] + for i in range(2): + routers_to_keep.append(self.generate_router_info(False)) + for i in range(2): + routers_deleted_during_resync.append( + self.generate_router_info(False)) + self._test_periodic_sync_routers_task( + routers_to_keep, + routers_deleted=[], + routers_deleted_during_resync=routers_deleted_during_resync) def _router_lifecycle(self, enable_ha, ip_version=4, dual_stack=False, v6_ext_gw_with_sub=True): diff --git a/neutron/tests/unit/agent/l3/test_namespace_manager.py b/neutron/tests/unit/agent/l3/test_namespace_manager.py index 4d219ec2c13..fb1b79eeb43 100644 --- a/neutron/tests/unit/agent/l3/test_namespace_manager.py +++ b/neutron/tests/unit/agent/l3/test_namespace_manager.py @@ -36,35 +36,36 @@ class NamespaceManagerTestCaseFramework(base.BaseTestCase): class TestNamespaceManager(NamespaceManagerTestCaseFramework): + def setUp(self): + super(TestNamespaceManager, self).setUp() + self.ns_manager = self._create_namespace_manager() + def test_get_prefix_and_id(self): - ns_manager = self._create_namespace_manager() router_id = _uuid() - ns_prefix, ns_id = ns_manager.get_prefix_and_id( + ns_prefix, ns_id = self.ns_manager.get_prefix_and_id( namespaces.NS_PREFIX + router_id) self.assertEqual(ns_prefix, namespaces.NS_PREFIX) self.assertEqual(ns_id, router_id) - ns_prefix, ns_id = ns_manager.get_prefix_and_id( + ns_prefix, ns_id = self.ns_manager.get_prefix_and_id( dvr_snat_ns.SNAT_NS_PREFIX + router_id) self.assertEqual(ns_prefix, dvr_snat_ns.SNAT_NS_PREFIX) self.assertEqual(ns_id, router_id) ns_name = 'dhcp-' + router_id - self.assertIsNone(ns_manager.get_prefix_and_id(ns_name)) + self.assertIsNone(self.ns_manager.get_prefix_and_id(ns_name)) def test_is_managed(self): - ns_manager = self._create_namespace_manager() router_id = _uuid() router_ns_name = namespaces.NS_PREFIX + router_id - self.assertTrue(ns_manager.is_managed(router_ns_name)) + self.assertTrue(self.ns_manager.is_managed(router_ns_name)) router_ns_name = dvr_snat_ns.SNAT_NS_PREFIX + router_id - self.assertTrue(ns_manager.is_managed(router_ns_name)) - self.assertFalse(ns_manager.is_managed('dhcp-' + router_id)) + self.assertTrue(self.ns_manager.is_managed(router_ns_name)) + self.assertFalse(self.ns_manager.is_managed('dhcp-' + router_id)) def test_list_all(self): - ns_manager = self._create_namespace_manager() ns_names = [namespaces.NS_PREFIX + _uuid(), dvr_snat_ns.SNAT_NS_PREFIX + _uuid(), 'dhcp-' + _uuid(), ] @@ -72,7 +73,7 @@ class TestNamespaceManager(NamespaceManagerTestCaseFramework): # Test the normal path with mock.patch.object(ip_lib.IPWrapper, 'get_namespaces', return_value=ns_names): - retrieved_ns_names = ns_manager.list_all() + retrieved_ns_names = self.ns_manager.list_all() self.assertEqual(len(ns_names) - 1, len(retrieved_ns_names)) for i in range(len(retrieved_ns_names)): self.assertIn(ns_names[i], retrieved_ns_names) @@ -81,5 +82,20 @@ class TestNamespaceManager(NamespaceManagerTestCaseFramework): # Test path where IPWrapper raises exception with mock.patch.object(ip_lib.IPWrapper, 'get_namespaces', side_effect=RuntimeError): - retrieved_ns_names = ns_manager.list_all() + retrieved_ns_names = self.ns_manager.list_all() self.assertFalse(retrieved_ns_names) + + def test_ensure_router_cleanup(self): + router_id = _uuid() + ns_names = [namespaces.NS_PREFIX + _uuid() for _ in range(5)] + ns_names += [dvr_snat_ns.SNAT_NS_PREFIX + _uuid() for _ in range(5)] + ns_names += [namespaces.NS_PREFIX + router_id, + dvr_snat_ns.SNAT_NS_PREFIX + router_id] + with mock.patch.object(ip_lib.IPWrapper, 'get_namespaces', + return_value=ns_names), \ + mock.patch.object(self.ns_manager, '_cleanup') as mock_cleanup: + self.ns_manager.ensure_router_cleanup(router_id) + expected = [mock.call(namespaces.NS_PREFIX, router_id), + mock.call(dvr_snat_ns.SNAT_NS_PREFIX, router_id)] + mock_cleanup.assert_has_calls(expected, any_order=True) + self.assertEqual(2, mock_cleanup.call_count)