From acd04d668bd414cd21f2715adc6a35a0eaed59a3 Mon Sep 17 00:00:00 2001 From: Swaminathan Vasudevan Date: Tue, 7 Jun 2016 13:31:56 -0700 Subject: [PATCH] DVR: Clean stale snat-ns by checking its existence when agent restarts At present there is no clear way to distinguish when the snat_namespace object is initialized and when the actual namespace is created. There is no way to check if the namespace already existed. The code was only checking at the snat_namespace object instead of its existence. This patch addresses the issue by adding in an exists method to the namespace object to identify the existence of the namespace in the given agent. This would allow us to check for the existence of the namespace, and also allow us to identify the stale snat namespace and delete the namespace when the gateway is cleared as the agent restarts. This also applies for conditions when the router is manually moved from one agent to another agent while the agent is dead. When the agent wakes up it would clean up the stale snat namespace. Change-Id: Icb00297208813436c2a9e9a003275462293ad643 Closes-Bug: #1557909 --- neutron/agent/l3/agent.py | 4 ++ neutron/agent/l3/dvr_edge_ha_router.py | 1 - neutron/agent/l3/dvr_edge_router.py | 32 ++++----- neutron/agent/l3/dvr_router_base.py | 1 + neutron/agent/l3/namespace_manager.py | 4 ++ neutron/agent/l3/namespaces.py | 3 + .../functional/agent/l3/test_dvr_router.py | 17 +++++ neutron/tests/unit/agent/l3/test_agent.py | 65 ++++++++++++++++++- .../unit/agent/l3/test_namespace_manager.py | 7 ++ 9 files changed, 115 insertions(+), 19 deletions(-) diff --git a/neutron/agent/l3/agent.py b/neutron/agent/l3/agent.py index 7476a06b0fc..8f4ee9e4b57 100644 --- a/neutron/agent/l3/agent.py +++ b/neutron/agent/l3/agent.py @@ -546,8 +546,12 @@ class L3NATAgent(ha.AgentMixin, # need to keep fip namespaces as well ext_net_id = (r['external_gateway_info'] or {}).get( 'network_id') + is_snat_agent = (self.conf.agent_mode == + l3_constants.L3_AGENT_MODE_DVR_SNAT) if ext_net_id: ns_manager.keep_ext_net(ext_net_id) + elif is_snat_agent: + ns_manager.ensure_snat_cleanup(r['id']) update = queue.RouterUpdate( r['id'], queue.PRIORITY_SYNC_ROUTERS_TASK, diff --git a/neutron/agent/l3/dvr_edge_ha_router.py b/neutron/agent/l3/dvr_edge_ha_router.py index 06a124930c9..9b00a70b68d 100644 --- a/neutron/agent/l3/dvr_edge_ha_router.py +++ b/neutron/agent/l3/dvr_edge_ha_router.py @@ -31,7 +31,6 @@ class DvrEdgeHaRouter(DvrEdgeRouter, HaRouter): super(DvrEdgeHaRouter, self).__init__(agent, host, *args, **kwargs) self.enable_snat = None - self.snat_ports = None @property def ha_namespace(self): diff --git a/neutron/agent/l3/dvr_edge_router.py b/neutron/agent/l3/dvr_edge_router.py index 6e2f4ed1c2e..56b7ca87cf1 100644 --- a/neutron/agent/l3/dvr_edge_router.py +++ b/neutron/agent/l3/dvr_edge_router.py @@ -29,7 +29,8 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter): def __init__(self, agent, host, *args, **kwargs): super(DvrEdgeRouter, self).__init__(agent, host, *args, **kwargs) - self.snat_namespace = None + self.snat_namespace = dvr_snat_ns.SnatNamespace( + self.router_id, self.agent_conf, self.driver, self.use_ipv6) self.snat_iptables_manager = None def external_gateway_added(self, ex_gw_port, interface_name): @@ -42,19 +43,27 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter): # the same routes to the snat namespace after the gateway port # is added, we need to call routes_updated here. self.routes_updated([], self.router['routes']) + elif self.snat_namespace.exists(): + # This is the case where the snat was moved manually or + # rescheduled to a different agent when the agent was dead. + LOG.debug("SNAT was moved or rescheduled to a different host " + "and does not match with the current host. This is " + "a stale namespace %s and will be cleared from the " + "current dvr_snat host.", self.snat_namespace.name) + self.external_gateway_removed(ex_gw_port, interface_name) def external_gateway_updated(self, ex_gw_port, interface_name): if not self._is_this_snat_host(): # no centralized SNAT gateway for this node/agent LOG.debug("not hosting snat for router: %s", self.router['id']) - if self.snat_namespace: + if self.snat_namespace.exists(): LOG.debug("SNAT was rescheduled to host %s. Clearing snat " "namespace.", self.router.get('gw_port_host')) return self.external_gateway_removed( ex_gw_port, interface_name) return - if not self.snat_namespace: + if not self.snat_namespace.exists(): # SNAT might be rescheduled to this agent; need to process like # newly created gateway return self.external_gateway_added(ex_gw_port, interface_name) @@ -67,7 +76,7 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter): def _external_gateway_removed(self, ex_gw_port, interface_name): super(DvrEdgeRouter, self).external_gateway_removed(ex_gw_port, interface_name) - if not self._is_this_snat_host() and not self.snat_namespace: + if not self._is_this_snat_host() and not self.snat_namespace.exists(): # no centralized SNAT gateway for this node/agent LOG.debug("not hosting snat for router: %s", self.router['id']) return @@ -79,9 +88,8 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter): def external_gateway_removed(self, ex_gw_port, interface_name): self._external_gateway_removed(ex_gw_port, interface_name) - if self.snat_namespace: + if self.snat_namespace.exists(): self.snat_namespace.delete() - self.snat_namespace = None def internal_network_added(self, port): super(DvrEdgeRouter, self).internal_network_added(port) @@ -155,10 +163,6 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter): # TODO(mlavalle): in the near future, this method should contain the # code in the L3 agent that creates a gateway for a dvr. The first step # is to move the creation of the snat namespace here - self.snat_namespace = dvr_snat_ns.SnatNamespace(self.router['id'], - self.agent_conf, - self.driver, - self.use_ipv6) self.snat_namespace.create() return self.snat_namespace @@ -197,12 +201,10 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter): def update_routing_table(self, operation, route): if self.get_ex_gw_port() and self._is_this_snat_host(): - ns_name = dvr_snat_ns.SnatNamespace.get_snat_ns_name( - self.router['id']) + ns_name = self.snat_namespace.name # NOTE: For now let us apply the static routes both in SNAT # namespace and Router Namespace, to reduce the complexity. - ip_wrapper = ip_lib.IPWrapper(namespace=ns_name) - if ip_wrapper.netns.exists(ns_name): + if self.snat_namespace.exists(): super(DvrEdgeRouter, self)._update_routing_table( operation, route, namespace=ns_name) else: @@ -212,7 +214,7 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter): def delete(self, agent): super(DvrEdgeRouter, self).delete(agent) - if self.snat_namespace: + if self.snat_namespace.exists(): self.snat_namespace.delete() def process_address_scope(self): diff --git a/neutron/agent/l3/dvr_router_base.py b/neutron/agent/l3/dvr_router_base.py index 781085afa8b..56b26253174 100644 --- a/neutron/agent/l3/dvr_router_base.py +++ b/neutron/agent/l3/dvr_router_base.py @@ -25,6 +25,7 @@ class DvrRouterBase(router.RouterInfo): self.agent = agent self.host = host + self.snat_ports = None def process(self, agent): super(DvrRouterBase, self).process(agent) diff --git a/neutron/agent/l3/namespace_manager.py b/neutron/agent/l3/namespace_manager.py index 7d3aceca8fa..366ddb98ba1 100644 --- a/neutron/agent/l3/namespace_manager.py +++ b/neutron/agent/l3/namespace_manager.py @@ -130,6 +130,10 @@ class NamespaceManager(object): ns_prefix, ns_id = self.get_prefix_and_id(ns) self._cleanup(ns_prefix, ns_id) + def ensure_snat_cleanup(self, router_id): + prefix = dvr_snat_ns.SNAT_NS_PREFIX + self._cleanup(prefix, router_id) + def _cleanup(self, ns_prefix, ns_id): ns_class = self.ns_prefix_to_class_map[ns_prefix] ns = ns_class(ns_id, self.agent_conf, self.driver, use_ipv6=False) diff --git a/neutron/agent/l3/namespaces.py b/neutron/agent/l3/namespaces.py index ccb0bf77d1e..44147e9585c 100644 --- a/neutron/agent/l3/namespaces.py +++ b/neutron/agent/l3/namespaces.py @@ -82,6 +82,9 @@ class Namespace(object): msg = _LE('Failed trying to delete namespace: %s') LOG.exception(msg, self.name) + def exists(self): + return self.ip_wrapper_root.netns.exists(self.name) + class RouterNamespace(Namespace): diff --git a/neutron/tests/functional/agent/l3/test_dvr_router.py b/neutron/tests/functional/agent/l3/test_dvr_router.py index 9f1ca4d7035..f2f4d49a31c 100644 --- a/neutron/tests/functional/agent/l3/test_dvr_router.py +++ b/neutron/tests/functional/agent/l3/test_dvr_router.py @@ -145,6 +145,23 @@ class TestDvrRouter(framework.L3AgentTestFramework): self._delete_router(self.agent, router.router_id) self._assert_fip_namespace_deleted(ext_gateway_port) + def test_dvr_unused_snat_ns_deleted_when_agent_restarts_after_move(self): + """Test to validate the stale snat namespace delete with snat move. + + This test validates the stale snat namespace cleanup when + the agent restarts after the gateway port has been moved + from the agent. + """ + self.agent.conf.agent_mode = 'dvr_snat' + router_info = self.generate_dvr_router_info() + router1 = self.manage_router(self.agent, router_info) + self._assert_snat_namespace_exists(router1) + restarted_agent = neutron_l3_agent.L3NATAgentWithStateReport( + self.agent.host, self.agent.conf) + router1.router['gw_port_host'] = "my-new-host" + restarted_router = self.manage_router(restarted_agent, router1.router) + self._assert_snat_namespace_does_not_exist(restarted_router) + def test_dvr_router_fips_for_multiple_ext_networks(self): agent_mode = 'dvr' # Create the first router fip with external net1 diff --git a/neutron/tests/unit/agent/l3/test_agent.py b/neutron/tests/unit/agent/l3/test_agent.py index 8f05e298956..0c6d5d87718 100644 --- a/neutron/tests/unit/agent/l3/test_agent.py +++ b/neutron/tests/unit/agent/l3/test_agent.py @@ -611,6 +611,68 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework): def test_external_gateway_updated_dual_stack(self): self._test_external_gateway_updated(dual_stack=True) + def test_dvr_edge_router_init_for_snat_namespace_object(self): + router = {'id': _uuid()} + ri = dvr_router.DvrEdgeRouter(mock.Mock(), + HOSTNAME, + router['id'], + router, + **self.ri_kwargs) + # Make sure that ri.snat_namespace object is created when the + # router is initialized + self.assertIsNotNone(ri.snat_namespace) + + def test_ext_gw_updated_calling_snat_ns_delete_if_gw_port_host_none( + self): + """Test to check the impact of snat_namespace object. + + This function specifically checks the impact of the snat + namespace object value on external_gateway_removed for deleting + snat_namespace when the gw_port_host mismatches or none. + """ + router = l3_test_common.prepare_router_data(num_internal_ports=2) + ri = dvr_router.DvrEdgeRouter(mock.Mock(), + HOSTNAME, + router['id'], + router, + **self.ri_kwargs) + with mock.patch.object(dvr_snat_ns.SnatNamespace, + 'delete') as snat_ns_delete: + interface_name, ex_gw_port = l3_test_common.prepare_ext_gw_test( + self, ri) + router['gw_port_host'] = '' + ri.external_gateway_updated(ex_gw_port, interface_name) + if router['gw_port_host'] != ri.host: + self.assertEqual(1, snat_ns_delete.call_count) + + @mock.patch.object(namespaces.Namespace, 'delete') + def test_snat_ns_delete_not_called_when_snat_namespace_does_not_exist( + self, mock_ns_del): + """Test to check the impact of snat_namespace object. + + This function specifically checks the impact of the snat + namespace object initialization without the actual creation + of snat_namespace. When deletes are issued to the snat + namespace based on the snat namespace object existence, it + should be checking for the valid namespace existence before + it tries to delete. + """ + router = l3_test_common.prepare_router_data(num_internal_ports=2) + ri = dvr_router.DvrEdgeRouter(mock.Mock(), + HOSTNAME, + router['id'], + router, + **self.ri_kwargs) + # Make sure we set a return value to emulate the non existence + # of the namespace. + self.mock_ip.netns.exists.return_value = False + self.assertIsNotNone(ri.snat_namespace) + interface_name, ex_gw_port = l3_test_common.prepare_ext_gw_test(self, + ri) + ri._external_gateway_removed = mock.Mock() + ri.external_gateway_removed(ex_gw_port, interface_name) + self.assertFalse(mock_ns_del.called) + def _test_ext_gw_updated_dvr_edge_router(self, host_match, snat_hosted_before=True): """ @@ -629,8 +691,6 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework): if snat_hosted_before: ri._create_snat_namespace() snat_ns_name = ri.snat_namespace.name - else: - self.assertIsNone(ri.snat_namespace) interface_name, ex_gw_port = l3_test_common.prepare_ext_gw_test(self, ri) @@ -650,7 +710,6 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework): bridge=self.conf.external_network_bridge, namespace=snat_ns_name, prefix=l3_agent.EXTERNAL_DEV_PREFIX) - self.assertIsNone(ri.snat_namespace) else: if not snat_hosted_before: self.assertIsNotNone(ri.snat_namespace) diff --git a/neutron/tests/unit/agent/l3/test_namespace_manager.py b/neutron/tests/unit/agent/l3/test_namespace_manager.py index 62231197e8c..d861bfcde14 100644 --- a/neutron/tests/unit/agent/l3/test_namespace_manager.py +++ b/neutron/tests/unit/agent/l3/test_namespace_manager.py @@ -91,6 +91,13 @@ class TestNamespaceManager(NamespaceManagerTestCaseFramework): retrieved_ns_names = self.ns_manager.list_all() self.assertFalse(retrieved_ns_names) + def test_ensure_snat_cleanup(self): + router_id = _uuid() + with mock.patch.object(self.ns_manager, '_cleanup') as mock_cleanup: + self.ns_manager.ensure_snat_cleanup(router_id) + mock_cleanup.assert_called_once_with(dvr_snat_ns.SNAT_NS_PREFIX, + router_id) + def test_ensure_router_cleanup(self): router_id = _uuid() ns_names = [namespaces.NS_PREFIX + _uuid() for _ in range(5)]