Fix DNS connectivity issues with DVR+HA routers and DHCP-HA

Before this change, DVR_SNAT agents would get no routers when
asking for updates due to provisioning of DHCP ports on the
node they are running on. This means that there's no connectivity
between the DHCP port and the network gateway (that may be
hosted on a different node), and therefore things like DNS may
break when a VM attempts resolution when talking to the affected
DHCP port.

This change relaxed a conditional that prevents the right list of
routers to be compiled and returned from the server to the agent.
The agent on the other hand needs to make sure to allocate the
right type of router based on what is being returned from the server.

Closes-bug: #1733987

Change-Id: I6124738c3324e0cc3f7998e3a541ff7547f2a8a7
This commit is contained in:
Armando Migliaccio 2017-11-22 10:59:27 -08:00
parent 618ee8658f
commit b24013f569
4 changed files with 39 additions and 10 deletions

View File

@ -322,7 +322,13 @@ class L3NATAgent(ha.AgentMixin,
kwargs['host'] = self.host
if router.get('distributed') and router.get('ha'):
if self.conf.agent_mode == lib_const.L3_AGENT_MODE_DVR_SNAT:
# if the router does not contain information about the HA interface
# this means that this DVR+HA router needs to host only the edge
# side of it, typically because it's landing on a node that needs
# to provision a router namespace because of a DVR service port
# (e.g. DHCP).
if (self.conf.agent_mode == lib_const.L3_AGENT_MODE_DVR_SNAT
and router.get(lib_const.HA_INTERFACE_KEY) is not None):
kwargs['state_change_callback'] = self.enqueue_state_change
return dvr_edge_ha_router.DvrEdgeHaRouter(*args, **kwargs)

View File

@ -585,7 +585,7 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
None)
@log_helpers.log_method_call
def _process_sync_ha_data(self, context, routers, host, agent_mode):
def _process_sync_ha_data(self, context, routers, host, is_any_dvr_agent):
routers_dict = dict((router['id'], router) for router in routers)
bindings = self.get_ha_router_port_bindings(context,
@ -614,12 +614,11 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
self._populate_mtu_and_subnets_for_ports(context, interfaces)
# If this is a DVR+HA router, but the agent in question is in 'dvr'
# or 'dvr_no_external' mode (as opposed to 'dvr_snat'), then we want to
# always return it even though it's missing the '_ha_interface' key.
# If this is a DVR+HA router, then we want to always return it even
# though it's missing the '_ha_interface' key. The agent will have
# to figure out what kind of router setup is needed.
return [r for r in list(routers_dict.values())
if (agent_mode == constants.L3_AGENT_MODE_DVR or
agent_mode == n_const.L3_AGENT_MODE_DVR_NO_EXTERNAL or
if (is_any_dvr_agent or
not r.get('ha') or r.get(constants.HA_INTERFACE_KEY))]
@log_helpers.log_method_call
@ -638,7 +637,8 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
else:
sync_data = super(L3_HA_NAT_db_mixin, self).get_sync_data(context,
router_ids, active)
return self._process_sync_ha_data(context, sync_data, host, agent_mode)
return self._process_sync_ha_data(
context, sync_data, host, dvr_agent_mode)
@classmethod
def _set_router_states(cls, context, bindings, states):

View File

@ -2524,6 +2524,17 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
def test_process_routers_update_router_deleted_error(self):
self._test_process_routers_update_router_deleted(error=True)
def test_process_ha_dvr_router_if_compatible_no_ha_interface(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
agent.conf.agent_mode = 'dvr_snat'
router = {'id': _uuid(),
'distributed': True, 'ha': True,
'external_gateway_info': {}, 'routes': [],
'admin_state_up': True}
agent._process_router_if_compatible(router)
self.assertIn(router['id'], agent.router_info)
def test_process_router_if_compatible_with_no_ext_net_in_conf(self):
self.conf.set_override('external_network_bridge', 'br-ex')
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)

View File

@ -419,6 +419,19 @@ class L3HATestCase(L3HATestFramework):
self.assertEqual(1, len(subnets))
self.assertEqual(cfg.CONF.l3_ha_net_cidr, subnets[0]['cidr'])
def test_l3_agent_routers_query_interface_includes_dvrsnat(self):
router = self._create_router(distributed=True)
routers = self.plugin.get_ha_sync_data_for_host(self.admin_ctx,
'a-dvr_snat-host',
self.agent2)
self.assertEqual(1, len(routers))
router = routers[0]
self.assertTrue(router.get('ha'))
interface = router.get(constants.HA_INTERFACE_KEY)
self.assertIsNone(interface)
def test_unique_ha_network_per_tenant(self):
tenant1 = _uuid()
tenant2 = _uuid()
@ -778,8 +791,7 @@ class L3HATestCase(L3HATestFramework):
orig_func = self.plugin._process_sync_ha_data
def process_sync_ha_data(context, routers, host, agent_mode):
return orig_func(context, routers, host,
agent_mode=constants.L3_AGENT_MODE_DVR)
return orig_func(context, routers, host, is_any_dvr_agent=True)
with mock.patch.object(self.plugin, '_process_sync_ha_data',
side_effect=process_sync_ha_data):