Fix race condition on processing DVR floating IPs

Fip namespace and agent gateway port can be shared by multiple dvr routers.
This change uses a set as the control variable for these shared resources
and ensures that Test and Set operation on the control variable are
performed atomically so that race conditions do not occur among
multiple threads processing floating IPs.
Limitation: The scope of this change is limited to addressing the race
condition described in the bug report. It may not address other issues
such as pre-existing issue with handling of DVR floatingips on agent
restart.

closes-bug: #1381238

Change-Id: I6dc2b7bad6e8ddbaa86c1f7a1e2028aeacc3afef
This commit is contained in:
rajeev 2014-10-13 16:25:36 -04:00
parent 5ba1dae2d1
commit 9902400039
2 changed files with 59 additions and 11 deletions

View File

@ -561,7 +561,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
# dvr data
self.agent_gateway_port = None
self.agent_fip_count = 0
self.fip_ns_subscribers = set()
self.local_subnets = LinkLocalAllocator(
os.path.join(self.conf.state_path, 'fip-linklocal-networks'),
FIP_LL_SUBNET)
@ -573,6 +573,15 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
self.target_ex_net_id = None
self.use_ipv6 = ipv6_utils.is_enabled()
def _fip_ns_subscribe(self, router_id):
is_first = (len(self.fip_ns_subscribers) == 0)
self.fip_ns_subscribers.add(router_id)
return is_first
def _fip_ns_unsubscribe(self, router_id):
self.fip_ns_subscribers.discard(router_id)
return len(self.fip_ns_subscribers) == 0
def _check_config_params(self):
"""Check items in configuration files.
@ -1096,9 +1105,11 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
if ri.router['distributed']:
# filter out only FIPs for this host/agent
floating_ips = [i for i in floating_ips if i['host'] == self.host]
if floating_ips and self.agent_gateway_port is None:
self._create_agent_gateway_port(ri, floating_ips[0]
['floating_network_id'])
if floating_ips:
is_first = self._fip_ns_subscribe(ri.router_id)
if is_first:
self._create_agent_gateway_port(ri, floating_ips[0]
['floating_network_id'])
if self.agent_gateway_port:
if floating_ips and ri.dist_fip_count == 0:
@ -1662,7 +1673,6 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
interface_name, floating_ip,
distributed=True)
# update internal structures
self.agent_fip_count = self.agent_fip_count + 1
ri.dist_fip_count = ri.dist_fip_count + 1
def floating_ip_removed_dist(self, ri, fip_cidr):
@ -1696,10 +1706,10 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
self.local_subnets.release(ri.router_id)
ri.rtr_fip_subnet = None
ns_ip.del_veth(fip_2_rtr_name)
# clean up fip-namespace if this is the last FIP
self.agent_fip_count = self.agent_fip_count - 1
if self.agent_fip_count == 0:
self._destroy_fip_namespace(fip_ns_name)
is_last = self._fip_ns_unsubscribe(ri.router_id)
# clean up fip-namespace if this is the last FIP
if is_last:
self._destroy_fip_namespace(fip_ns_name)
def floating_forward_rules(self, floating_ip, fixed_ip):
return [('PREROUTING', '-d %s -j DNAT --to %s' %

View File

@ -2178,8 +2178,9 @@ vrrp_instance VR_1 {
16, FIP_PRI)
# TODO(mrsmith): add more asserts
@mock.patch.object(l3_agent.L3NATAgent, '_fip_ns_unsubscribe')
@mock.patch.object(l3_agent.LinkLocalAllocator, '_write')
def test_floating_ip_removed_dist(self, write):
def test_floating_ip_removed_dist(self, write, unsubscribe):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router = prepare_router_data()
agent_gw_port = {'fixed_ips': [{'ip_address': '20.0.0.30',
@ -2194,6 +2195,7 @@ vrrp_instance VR_1 {
ri = l3_agent.RouterInfo(router['id'], self.conf.root_helper,
self.conf.use_namespaces, router=router)
ri.dist_fip_count = 2
agent.fip_ns_subscribers.add(ri.router_id)
ri.floating_ips_dict['11.22.33.44'] = FIP_PRI
ri.fip_2_rtr = '11.22.33.42'
ri.rtr_2_fip = '11.22.33.40'
@ -2204,9 +2206,10 @@ vrrp_instance VR_1 {
self.mock_rule.delete_rule_priority.assert_called_with(FIP_PRI)
self.mock_ip_dev.route.delete_route.assert_called_with(fip_cidr,
str(s.ip))
self.assertFalse(unsubscribe.called, '_fip_ns_unsubscribe called!')
with mock.patch.object(agent, '_destroy_fip_namespace') as f:
ri.dist_fip_count = 1
agent.agent_fip_count = 1
fip_ns_name = agent.get_fip_ns_name(
str(agent._fetch_external_net_id()))
ri.rtr_fip_subnet = agent.local_subnets.allocate(ri.router_id)
@ -2217,6 +2220,7 @@ vrrp_instance VR_1 {
self.mock_ip_dev.route.delete_gateway.assert_called_once_with(
str(fip_to_rtr.ip), table=16)
f.assert_called_once_with(fip_ns_name)
unsubscribe.assert_called_once_with(ri.router_id)
def test_get_service_plugin_list(self):
service_plugins = [p_const.L3_ROUTER_NAT]
@ -2252,6 +2256,40 @@ vrrp_instance VR_1 {
self.assertRaises(messaging.MessagingTimeout, l3_agent.L3NATAgent,
HOSTNAME, self.conf)
def test__fip_ns_subscribe_is_first_true(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router_id = _uuid()
is_first = agent._fip_ns_subscribe(router_id)
self.assertTrue(is_first)
self.assertEqual(len(agent.fip_ns_subscribers), 1)
def test__fip_ns_subscribe_is_first_false(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router_id = _uuid()
router2_id = _uuid()
agent._fip_ns_subscribe(router_id)
is_first = agent._fip_ns_subscribe(router2_id)
self.assertFalse(is_first)
self.assertEqual(len(agent.fip_ns_subscribers), 2)
def test__fip_ns_unsubscribe_is_last_true(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router_id = _uuid()
agent.fip_ns_subscribers.add(router_id)
is_last = agent._fip_ns_unsubscribe(router_id)
self.assertTrue(is_last)
self.assertEqual(len(agent.fip_ns_subscribers), 0)
def test__fip_ns_unsubscribe_is_last_false(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router_id = _uuid()
router2_id = _uuid()
agent.fip_ns_subscribers.add(router_id)
agent.fip_ns_subscribers.add(router2_id)
is_last = agent._fip_ns_unsubscribe(router_id)
self.assertFalse(is_last)
self.assertEqual(len(agent.fip_ns_subscribers), 1)
class TestL3AgentEventHandler(base.BaseTestCase):