DVR: Self recover from the loss of 'fg' ports in FIP Namespace

Sometimes we have seen the 'fg' ports within the fip-namespace
either goes down, not created in time or getting deleted due to
some race conditions.
When this happens, the code tries to recover itself after couple
of exceptions when there is a router_update message.
But after recovery we could see that the fip-namespace is
recreated and the 'fg-' port is plugged in and active, but the
'fpr' and the 'rfp' ports are missing which leads to the
FloatingIP failure.

This patch will fix this issue by checking for the missing devices
in all router_updates.

Change-Id: I78c7ea9f3b6a1cf5b208286eb372da05dc1ba379
Closes-Bug: #1776984
This commit is contained in:
Swaminathan Vasudevan 2018-06-14 13:49:23 -07:00
parent d4a9f56de2
commit 5a7c12f245
3 changed files with 69 additions and 0 deletions

View File

@ -601,7 +601,16 @@ class DvrLocalRouter(dvr_router_base.DvrRouterBase):
self.connect_rtr_2_fip()
super(DvrLocalRouter, self).process_external()
def _check_rtr_2_fip_connect(self):
"""Checks if the rtr to fip connect exists, if not sets to false."""
fip_ns_name = self.fip_ns.get_name()
if ip_lib.network_namespace_exists(fip_ns_name):
fip_2_rtr_name = self.fip_ns.get_int_device_name(self.router_id)
if not ip_lib.device_exists(fip_2_rtr_name, namespace=fip_ns_name):
self.rtr_fip_connect = False
def connect_rtr_2_fip(self):
self._check_rtr_2_fip_connect()
if self.fip_ns.agent_gateway_port and not self.rtr_fip_connect:
ex_gw_port = self.get_ex_gw_port()
self.fip_ns.create_rtr_2_fip_link(self)

View File

@ -115,6 +115,64 @@ class TestDvrRouter(framework.L3AgentTestFramework):
self.assertEqual(4, default_rules_list_count)
self.assertEqual(2, interface_rules_list_count)
def test_dvr_update_gateway_port_no_fip_fg_port_recovers_itself_with_fpr(
self):
self.agent.conf.agent_mode = 'dvr'
# Create the router with external net
router_info = self.generate_dvr_router_info()
external_gw_port = router_info['gw_port']
ext_net_id = router_info['_floatingips'][0]['floating_network_id']
self.mock_plugin_api.get_external_network_id.return_value = ext_net_id
router = self.manage_router(self.agent, router_info)
fg_port = router.fip_ns.agent_gateway_port
fg_port_name = router.fip_ns.get_ext_device_name(fg_port['id'])
fg_device = ip_lib.IPDevice(fg_port_name,
namespace=router.fip_ns.name)
fip_2_rtr_name = router.fip_ns.get_int_device_name(router.router_id)
fpr_device = ip_lib.IPDevice(fip_2_rtr_name,
namespace=router.fip_ns.name)
# Now validate if the gateway is properly configured.
rtr_2_fip, fip_2_rtr = router.rtr_fip_subnet.get_pair()
tbl_index = router._get_snat_idx(fip_2_rtr)
tbl_filter = ['table', tbl_index]
self.assertIn('gateway', fg_device.route.get_gateway(
filters=tbl_filter))
self._validate_fips_for_external_network(
router, router.fip_ns.get_name())
# Now delete the fg- port that was created
ext_net_bridge = self.agent.conf.external_network_bridge
router.fip_ns.driver.unplug(fg_port_name,
bridge=ext_net_bridge,
namespace=router.fip_ns.name,
prefix=dvr_fip_ns.FIP_EXT_DEV_PREFIX)
# Now check if the fg- port is missing.
self.assertFalse(fg_device.exists())
fpr_device.link.set_down()
# Now change the gateway ip for the router and do an update.
router.ex_gw_port = copy.deepcopy(router.ex_gw_port)
new_fg_port = copy.deepcopy(fg_port)
for subnet in new_fg_port['subnets']:
subnet['gateway_ip'] = '19.4.4.2'
router.router[n_const.FLOATINGIP_AGENT_INTF_KEY] = [new_fg_port]
self.assertRaises(n_exc.FloatingIpSetupException,
self.agent._process_updated_router,
router.router)
self.agent._process_updated_router(router.router)
self.assertTrue(fg_device.exists())
self.assertTrue(fpr_device.exists())
updated_route = fg_device.route.list_routes(
ip_version=lib_constants.IP_VERSION_4,
table=tbl_index)
expected_route = [{'cidr': '0.0.0.0/0',
'dev': fg_port_name,
'table': tbl_index,
u'via': u'19.4.4.2'}]
self.assertEqual(expected_route, updated_route)
self._validate_fips_for_external_network(
router, router.fip_ns.get_name())
self._delete_router(self.agent, router.router_id)
self._assert_fip_namespace_deleted(external_gw_port)
def test_dvr_update_gateway_port_with_no_gw_port_in_namespace(self):
self.agent.conf.agent_mode = 'dvr'

View File

@ -206,6 +206,7 @@ class TestDvrRouterOperations(base.BaseTestCase):
ri.rtr_fip_connect = False
ex_gw_port = {'network_id': 'fake_net_id'}
ri.create_dvr_external_gateway_on_agent(ex_gw_port)
ri._check_rtr_2_fip_connect = mock.Mock()
ri.connect_rtr_2_fip()
self.assertTrue(ri._check_if_address_scopes_match.called)
if address_scopes_match:
@ -329,6 +330,7 @@ class TestDvrRouterOperations(base.BaseTestCase):
ri.fip_ns = mock.Mock()
ri.fip_ns.agent_gateway_port = agent_gw_port
ri.create_dvr_external_gateway_on_agent(ri.ex_gw_port)
ri._check_rtr_2_fip_connect = mock.Mock()
ri.connect_rtr_2_fip()
self.assertTrue(ri.rtr_fip_connect)
ri.fip_ns.allocate_rule_priority.return_value = FIP_PRI