From d730b1010277138136512eb6efb12ab893ca6793 Mon Sep 17 00:00:00 2001 From: venkata anil <anilvenkata@redhat.com> Date: Mon, 5 Jun 2017 09:56:18 +0000 Subject: [PATCH] Set HA network port to DOWN when l3 agent starts When l3 agent node is rebooted, if HA network port status is already ACTIVE in DB, agent will get this status from server and then spawn the keepalived (though l2 agent might not have wired the port), resulting in multiple HA masters active at the same time. To fix this, when the L3 agent starts up we can have it explicitly set the port status to DOWN for all of the HA ports on that node. Then we are guaranteed that when they go to ACTIVE it will be because the L2 agent has wired the ports. Closes-bug: #1597461 Change-Id: Ib0c8a71b6ff97e43a414f3db4882914b12170d53 --- neutron/api/rpc/handlers/l3_rpc.py | 29 ++++++++++++++++++++ neutron/tests/unit/db/test_l3_hamode_db.py | 31 ++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/neutron/api/rpc/handlers/l3_rpc.py b/neutron/api/rpc/handlers/l3_rpc.py index a41674c3046..fda7be1ddf3 100644 --- a/neutron/api/rpc/handlers/l3_rpc.py +++ b/neutron/api/rpc/handlers/l3_rpc.py @@ -59,12 +59,41 @@ class L3RpcCallback(object): self._l3plugin = directory.get_plugin(constants.L3) return self._l3plugin + def _update_ha_network_port_status(self, context, host_id): + # set HA network port status to DOWN. + device_filter = { + 'device_owner': [constants.DEVICE_OWNER_ROUTER_HA_INTF], + 'status': [constants.PORT_STATUS_ACTIVE]} + ports = self.plugin.get_ports(context, filters=device_filter) + ha_ports = [p['id'] for p in ports + if p.get(portbindings.HOST_ID) == host_id] + if not ha_ports: + return + LOG.debug("L3 agent on host %(host)s requested for fullsync, so " + "setting HA network ports %(ha_ports)s status to DOWN.", + {"host": host_id, "ha_ports": ha_ports}) + for p in ha_ports: + self.plugin.update_port( + context, p, {'port': {'status': constants.PORT_STATUS_DOWN}}) + def get_router_ids(self, context, host): """Returns IDs of routers scheduled to l3 agent on <host> This will autoschedule unhosted routers to l3 agent on <host> and then return all ids of routers scheduled to it. + This will also update HA network port status to down for all HA routers + hosted on <host>. This is needed to avoid l3 agent spawning keepalived + when l2 agent not yet wired the port. This can happen after a system + reboot that has wiped out flows, etc and the L2 agent hasn't started up + yet. The port will still be ACTIVE in the data model and the L3 agent + will use that info to mistakenly think that L2 network is ready. + By forcing into DOWN, we will require the L2 agent to essentially ack + that the port is indeed ACTIVE by reacting to the port update and + calling update_device_up. """ + if utils.is_extension_supported( + self.plugin, constants.PORT_BINDING_EXT_ALIAS): + self._update_ha_network_port_status(context, host) if utils.is_extension_supported( self.l3plugin, constants.L3_AGENT_SCHEDULER_EXT_ALIAS): if cfg.CONF.router_auto_schedule: diff --git a/neutron/tests/unit/db/test_l3_hamode_db.py b/neutron/tests/unit/db/test_l3_hamode_db.py index 8b0d4940909..8e96c1d2a1d 100644 --- a/neutron/tests/unit/db/test_l3_hamode_db.py +++ b/neutron/tests/unit/db/test_l3_hamode_db.py @@ -1043,6 +1043,37 @@ class L3HAModeDbTestCase(L3HATestFramework): for port in self._get_router_port_bindings(router['id']): self.assertEqual(self.agent2['host'], port[portbindings.HOST_ID]) + def test_get_router_ids_updates_ha_network_port_status(self): + router = self._create_router(ha=True) + callback = l3_rpc.L3RpcCallback() + callback._l3plugin = self.plugin + host = self.agent1['host'] + ctx = self.admin_ctx + bindings = self.plugin.get_ha_router_port_bindings( + ctx, [router['id']]) + binding = [binding for binding in bindings + if binding.l3_agent_id == self.agent1['id']][0] + port = self.core_plugin.get_port(ctx, binding.port_id) + + # As network segments are not available, mock bind_port + # to avoid binding failures + def bind_port(context): + binding = context._binding + binding.vif_type = portbindings.VIF_TYPE_OVS + with mock.patch.object(self.core_plugin.mechanism_manager, + 'bind_port', side_effect=bind_port): + callback._ensure_host_set_on_port( + ctx, host, port, router_id=router['id']) + # Port status will be DOWN by default as we are not having + # l2 agent in test, so update it to ACTIVE. + self.core_plugin.update_port_status( + ctx, port['id'], constants.PORT_STATUS_ACTIVE, host=host) + port = self.core_plugin.get_port(ctx, port['id']) + self.assertEqual(constants.PORT_STATUS_ACTIVE, port['status']) + callback.get_router_ids(ctx, host) + port = self.core_plugin.get_port(ctx, port['id']) + self.assertEqual(constants.PORT_STATUS_DOWN, port['status']) + def test_ensure_host_set_on_ports_dvr_ha_binds_to_active(self): agent3 = helpers.register_l3_agent('host_3', constants.L3_AGENT_MODE_DVR_SNAT)