From 9902400039018d77aa3034147cfb24ca4b2353f6 Mon Sep 17 00:00:00 2001 From: rajeev Date: Mon, 13 Oct 2014 16:25:36 -0400 Subject: [PATCH] Fix race condition on processing DVR floating IPs Fip namespace and agent gateway port can be shared by multiple dvr routers. This change uses a set as the control variable for these shared resources and ensures that Test and Set operation on the control variable are performed atomically so that race conditions do not occur among multiple threads processing floating IPs. Limitation: The scope of this change is limited to addressing the race condition described in the bug report. It may not address other issues such as pre-existing issue with handling of DVR floatingips on agent restart. closes-bug: #1381238 Change-Id: I6dc2b7bad6e8ddbaa86c1f7a1e2028aeacc3afef --- neutron/agent/l3_agent.py | 28 ++++++++++++------- neutron/tests/unit/test_l3_agent.py | 42 +++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/neutron/agent/l3_agent.py b/neutron/agent/l3_agent.py index 112bf77ad27..2bb784277f8 100644 --- a/neutron/agent/l3_agent.py +++ b/neutron/agent/l3_agent.py @@ -561,7 +561,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, # dvr data self.agent_gateway_port = None - self.agent_fip_count = 0 + self.fip_ns_subscribers = set() self.local_subnets = LinkLocalAllocator( os.path.join(self.conf.state_path, 'fip-linklocal-networks'), FIP_LL_SUBNET) @@ -573,6 +573,15 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, self.target_ex_net_id = None self.use_ipv6 = ipv6_utils.is_enabled() + def _fip_ns_subscribe(self, router_id): + is_first = (len(self.fip_ns_subscribers) == 0) + self.fip_ns_subscribers.add(router_id) + return is_first + + def _fip_ns_unsubscribe(self, router_id): + self.fip_ns_subscribers.discard(router_id) + return len(self.fip_ns_subscribers) == 0 + def _check_config_params(self): """Check items in configuration files. @@ -1096,9 +1105,11 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, if ri.router['distributed']: # filter out only FIPs for this host/agent floating_ips = [i for i in floating_ips if i['host'] == self.host] - if floating_ips and self.agent_gateway_port is None: - self._create_agent_gateway_port(ri, floating_ips[0] - ['floating_network_id']) + if floating_ips: + is_first = self._fip_ns_subscribe(ri.router_id) + if is_first: + self._create_agent_gateway_port(ri, floating_ips[0] + ['floating_network_id']) if self.agent_gateway_port: if floating_ips and ri.dist_fip_count == 0: @@ -1662,7 +1673,6 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, interface_name, floating_ip, distributed=True) # update internal structures - self.agent_fip_count = self.agent_fip_count + 1 ri.dist_fip_count = ri.dist_fip_count + 1 def floating_ip_removed_dist(self, ri, fip_cidr): @@ -1696,10 +1706,10 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, self.local_subnets.release(ri.router_id) ri.rtr_fip_subnet = None ns_ip.del_veth(fip_2_rtr_name) - # clean up fip-namespace if this is the last FIP - self.agent_fip_count = self.agent_fip_count - 1 - if self.agent_fip_count == 0: - self._destroy_fip_namespace(fip_ns_name) + is_last = self._fip_ns_unsubscribe(ri.router_id) + # clean up fip-namespace if this is the last FIP + if is_last: + self._destroy_fip_namespace(fip_ns_name) def floating_forward_rules(self, floating_ip, fixed_ip): return [('PREROUTING', '-d %s -j DNAT --to %s' % diff --git a/neutron/tests/unit/test_l3_agent.py b/neutron/tests/unit/test_l3_agent.py index a8b74ea76f7..ed7f7b611e2 100644 --- a/neutron/tests/unit/test_l3_agent.py +++ b/neutron/tests/unit/test_l3_agent.py @@ -2178,8 +2178,9 @@ vrrp_instance VR_1 { 16, FIP_PRI) # TODO(mrsmith): add more asserts + @mock.patch.object(l3_agent.L3NATAgent, '_fip_ns_unsubscribe') @mock.patch.object(l3_agent.LinkLocalAllocator, '_write') - def test_floating_ip_removed_dist(self, write): + def test_floating_ip_removed_dist(self, write, unsubscribe): agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) router = prepare_router_data() agent_gw_port = {'fixed_ips': [{'ip_address': '20.0.0.30', @@ -2194,6 +2195,7 @@ vrrp_instance VR_1 { ri = l3_agent.RouterInfo(router['id'], self.conf.root_helper, self.conf.use_namespaces, router=router) ri.dist_fip_count = 2 + agent.fip_ns_subscribers.add(ri.router_id) ri.floating_ips_dict['11.22.33.44'] = FIP_PRI ri.fip_2_rtr = '11.22.33.42' ri.rtr_2_fip = '11.22.33.40' @@ -2204,9 +2206,10 @@ vrrp_instance VR_1 { self.mock_rule.delete_rule_priority.assert_called_with(FIP_PRI) self.mock_ip_dev.route.delete_route.assert_called_with(fip_cidr, str(s.ip)) + self.assertFalse(unsubscribe.called, '_fip_ns_unsubscribe called!') + with mock.patch.object(agent, '_destroy_fip_namespace') as f: ri.dist_fip_count = 1 - agent.agent_fip_count = 1 fip_ns_name = agent.get_fip_ns_name( str(agent._fetch_external_net_id())) ri.rtr_fip_subnet = agent.local_subnets.allocate(ri.router_id) @@ -2217,6 +2220,7 @@ vrrp_instance VR_1 { self.mock_ip_dev.route.delete_gateway.assert_called_once_with( str(fip_to_rtr.ip), table=16) f.assert_called_once_with(fip_ns_name) + unsubscribe.assert_called_once_with(ri.router_id) def test_get_service_plugin_list(self): service_plugins = [p_const.L3_ROUTER_NAT] @@ -2252,6 +2256,40 @@ vrrp_instance VR_1 { self.assertRaises(messaging.MessagingTimeout, l3_agent.L3NATAgent, HOSTNAME, self.conf) + def test__fip_ns_subscribe_is_first_true(self): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + router_id = _uuid() + is_first = agent._fip_ns_subscribe(router_id) + self.assertTrue(is_first) + self.assertEqual(len(agent.fip_ns_subscribers), 1) + + def test__fip_ns_subscribe_is_first_false(self): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + router_id = _uuid() + router2_id = _uuid() + agent._fip_ns_subscribe(router_id) + is_first = agent._fip_ns_subscribe(router2_id) + self.assertFalse(is_first) + self.assertEqual(len(agent.fip_ns_subscribers), 2) + + def test__fip_ns_unsubscribe_is_last_true(self): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + router_id = _uuid() + agent.fip_ns_subscribers.add(router_id) + is_last = agent._fip_ns_unsubscribe(router_id) + self.assertTrue(is_last) + self.assertEqual(len(agent.fip_ns_subscribers), 0) + + def test__fip_ns_unsubscribe_is_last_false(self): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + router_id = _uuid() + router2_id = _uuid() + agent.fip_ns_subscribers.add(router_id) + agent.fip_ns_subscribers.add(router2_id) + is_last = agent._fip_ns_unsubscribe(router_id) + self.assertFalse(is_last) + self.assertEqual(len(agent.fip_ns_subscribers), 1) + class TestL3AgentEventHandler(base.BaseTestCase):