Set HA network port to DOWN when l3 agent starts

When l3 agent node is rebooted, if HA network port status is already
ACTIVE in DB, agent will get this status from server and then spawn
the keepalived (though l2 agent might not have wired the port),
resulting in multiple HA masters active at the same time.

To fix this, when the L3 agent starts up we can have it explicitly
set the port status to DOWN for all of the HA ports on that node.
Then we are guaranteed that when they go to ACTIVE it will be because
the L2 agent has wired the ports.

Closes-bug: #1597461
Change-Id: Ib0c8a71b6ff97e43a414f3db4882914b12170d53
This commit is contained in:
venkata anil 2017-06-05 09:56:18 +00:00
parent b6394f8520
commit 500b255278
2 changed files with 48 additions and 0 deletions

View File

@ -59,12 +59,32 @@ class L3RpcCallback(object):
self._l3plugin = directory.get_plugin(constants.L3)
return self._l3plugin
def _update_ha_network_port_status(self, context, host_id):
# set HA network port status to DOWN.
device_filter = {
'device_owner': [constants.DEVICE_OWNER_ROUTER_HA_INTF],
'status': [constants.PORT_STATUS_ACTIVE]}
ports = self.plugin.get_ports(context, filters=device_filter)
ha_ports = [p['id'] for p in ports
if p.get(portbindings.HOST_ID) == host_id]
if not ha_ports:
return
LOG.debug("L3 agent on host %(host)s restarted, so setting "
"HA network ports %(ha_ports)s status to DOWN.",
{"host": host_id, "ha_ports": ha_ports})
for p in ha_ports:
self.plugin.update_port(
context, p, {'port': {'status': constants.PORT_STATUS_DOWN}})
def get_router_ids(self, context, host):
"""Returns IDs of routers scheduled to l3 agent on <host>
This will autoschedule unhosted routers to l3 agent on <host> and then
return all ids of routers scheduled to it.
"""
if utils.is_extension_supported(
self.plugin, constants.PORT_BINDING_EXT_ALIAS):
self._update_ha_network_port_status(context, host)
if utils.is_extension_supported(
self.l3plugin, constants.L3_AGENT_SCHEDULER_EXT_ALIAS):
if cfg.CONF.router_auto_schedule:

View File

@ -1043,6 +1043,34 @@ class L3HAModeDbTestCase(L3HATestFramework):
for port in self._get_router_port_bindings(router['id']):
self.assertEqual(self.agent2['host'], port[portbindings.HOST_ID])
def test_get_router_ids_updates_ha_network_port_status(self):
# As network segments are not available, mock bind_port
# to avoid binding failures
def bind_port(context):
binding = context._binding
binding.vif_type = portbindings.VIF_TYPE_OVS
self.core_plugin.mechanism_manager.bind_port = bind_port
router = self._create_router(ha=True)
callback = l3_rpc.L3RpcCallback()
callback._l3plugin = self.plugin
host = self.agent1['host']
ctx = self.admin_ctx
bindings = self.plugin.get_ha_router_port_bindings(ctx, [router['id']])
binding = [binding for binding in bindings
if binding.l3_agent_id == self.agent1['id']][0]
port = self.core_plugin.get_port(ctx, binding.port_id)
callback._ensure_host_set_on_port(
ctx, host, port, router_id=router['id'])
# Port status will be DOWN by default as we are not having
# l2 agent in test, so update it to ACTIVE.
self.core_plugin.update_port_status(
ctx, port['id'], constants.PORT_STATUS_ACTIVE, host=host)
port = self.core_plugin.get_port(ctx, port['id'])
self.assertEqual(constants.PORT_STATUS_ACTIVE, port['status'])
callback.get_router_ids(ctx, host)
port = self.core_plugin.get_port(ctx, port['id'])
self.assertEqual(constants.PORT_STATUS_DOWN, port['status'])
def test_ensure_host_set_on_ports_dvr_ha_binds_to_active(self):
agent3 = helpers.register_l3_agent('host_3',
constants.L3_AGENT_MODE_DVR_SNAT)