From 647b24288e93d83f7dd1c3db88a0cf5efac0ff3e Mon Sep 17 00:00:00 2001 From: LIU Yulong Date: Thu, 31 Oct 2019 19:06:37 +0800 Subject: [PATCH] Do not link up HA router gateway in backup node L3 router will set its devices link up by default. For HA routers, the gateway device will be pluged in all scheduled hosts. When the gateway deivce is up in backup node, it will send out IPv6 related packets (MLDv2) according to some kernal config. This will cause the physical fabric think that the gateway MAC is now working in the backup node. And finally the master node L3 traffic will be broken. This patch sets the backup gateway device link down by default. When the VRRP sets the master state in one host, the L3 agent state change procedure will do link up action for the gateway device. Conflicts: neutron/agent/l3/router_info.py neutron/agent/linux/interface.py Closes-Bug: #1859832 Change-Id: I8dca2c1a2f8cb467cfb44420f0eea54ca0932b05 (cherry picked from commit c52029c39aa824a67095fbbf9e59eff769d92587) (cherry picked from commit b9a29681003b7f975d7bc9687399cafb5549010a) (cherry picked from commit 41e8689234e87b6bf77a9849776d6a05cfcb1a71) --- neutron/agent/l3/dvr_edge_ha_router.py | 4 +- neutron/agent/l3/ha.py | 9 +++++ neutron/agent/l3/ha_router.py | 38 ++++++++++++++++++- neutron/agent/l3/router_info.py | 19 +++++++--- neutron/agent/linux/interface.py | 35 ++++++++++++----- neutron/agent/linux/keepalived.py | 6 +++ .../unit/agent/l3/test_dvr_local_router.py | 28 ++++++++++++++ 7 files changed, 123 insertions(+), 16 deletions(-) diff --git a/neutron/agent/l3/dvr_edge_ha_router.py b/neutron/agent/l3/dvr_edge_ha_router.py index f0219a71730..bfe267bd3c6 100644 --- a/neutron/agent/l3/dvr_edge_ha_router.py +++ b/neutron/agent/l3/dvr_edge_ha_router.py @@ -114,7 +114,9 @@ class DvrEdgeHaRouter(dvr_edge_router.DvrEdgeRouter, def _external_gateway_added(self, ex_gw_port, interface_name, ns_name, preserve_ips): - self._plug_external_gateway(ex_gw_port, interface_name, ns_name) + link_up = self.external_gateway_link_up() + self._plug_external_gateway(ex_gw_port, interface_name, ns_name, + link_up=link_up) def _is_this_snat_host(self): return (self.agent_conf.agent_mode diff --git a/neutron/agent/l3/ha.py b/neutron/agent/l3/ha.py index 88a23f62a86..be8724d0d20 100644 --- a/neutron/agent/l3/ha.py +++ b/neutron/agent/l3/ha.py @@ -157,6 +157,15 @@ class AgentMixin(object): if ri is None: return + # Set external gateway port link up or down according to state + if state == 'master': + ri.set_external_gw_port_link_status(link_up=True, set_gw=True) + elif state == 'backup': + ri.set_external_gw_port_link_status(link_up=False) + else: + LOG.warning('Router %s has status %s, ' + 'no action to router gateway device.', + router_id, state) # TODO(dalvarez): Fix bug 1677279 by moving the IPv6 parameters # configuration to keepalived-state-change in order to remove the # dependency that currently exists on l3-agent running for the IPv6 diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index 678ab539469..b6cb0bfe672 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -88,6 +88,15 @@ class HaRouter(router.RouterInfo): def ha_vr_id(self): return self.router.get('ha_vr_id') + def _check_and_set_real_state(self): + # When the physical host was down/up, the 'master' router may still + # have its original state in the _ha_state_path file. We directly + # reset it to 'backup'. + if (not self.keepalived_manager.check_processes() and + os.path.exists(self.ha_state_path) and + self.ha_state == 'master'): + self.ha_state = 'backup' + @property def ha_state(self): if self._ha_state: @@ -133,6 +142,7 @@ class HaRouter(router.RouterInfo): self.set_ha_port() self._init_keepalived_manager(process_monitor) + self._check_and_set_real_state() self.ha_network_added() self.update_initial_state(self.state_change_callback) self.spawn_state_change_monitor(process_monitor) @@ -427,7 +437,9 @@ class HaRouter(router.RouterInfo): return port1_filtered == port2_filtered def external_gateway_added(self, ex_gw_port, interface_name): - self._plug_external_gateway(ex_gw_port, interface_name, self.ns_name) + link_up = self.external_gateway_link_up() + self._plug_external_gateway(ex_gw_port, interface_name, + self.ns_name, link_up=link_up) self._add_gateway_vip(ex_gw_port, interface_name) self._disable_ipv6_addressing_on_interface(interface_name) @@ -492,3 +504,27 @@ class HaRouter(router.RouterInfo): if (self.keepalived_manager.get_process().active and self.ha_state == 'master'): super(HaRouter, self).enable_radvd(internal_ports) + + def external_gateway_link_up(self): + # Check HA router ha_state for its gateway port link state. + # 'backup' instance will not link up the gateway port. + return self.ha_state == 'master' + + def set_external_gw_port_link_status(self, link_up, set_gw=False): + link_state = "up" if link_up else "down" + LOG.info('Set router %s gateway device link state to %s.', + self.router_id, link_state) + + ex_gw_port = self.get_ex_gw_port() + ex_gw_port_id = (ex_gw_port and ex_gw_port['id'] or + self.ex_gw_port and self.ex_gw_port['id']) + if ex_gw_port_id: + interface_name = self.get_external_device_name(ex_gw_port_id) + ns_name = self.get_gw_ns_name() + self.driver.set_link_status(interface_name, ns_name, + link_up=link_up) + if link_up and set_gw: + preserve_ips = self.get_router_preserve_ips() + self._external_gateway_settings(ex_gw_port, interface_name, + ns_name, preserve_ips) + self.routes_updated([], self.routes) diff --git a/neutron/agent/l3/router_info.py b/neutron/agent/l3/router_info.py index 7a72da6318b..3b35049d5f5 100644 --- a/neutron/agent/l3/router_info.py +++ b/neutron/agent/l3/router_info.py @@ -646,7 +646,8 @@ class RouterInfo(object): return [common_utils.ip_to_cidr(ip['floating_ip_address']) for ip in floating_ips] - def _plug_external_gateway(self, ex_gw_port, interface_name, ns_name): + def _plug_external_gateway(self, ex_gw_port, interface_name, ns_name, + link_up=True): self.driver.plug(ex_gw_port['network_id'], ex_gw_port['id'], interface_name, @@ -654,7 +655,8 @@ class RouterInfo(object): bridge=self.agent_conf.external_network_bridge, namespace=ns_name, prefix=EXTERNAL_DEV_PREFIX, - mtu=ex_gw_port.get('mtu')) + mtu=ex_gw_port.get('mtu'), + link_up=link_up) if self.agent_conf.external_network_bridge: # NOTE(slaweq): for OVS implementations remove the DEAD VLAN tag # on ports. DEAD VLAN tag is added to each newly created port @@ -722,7 +724,11 @@ class RouterInfo(object): LOG.debug("External gateway added: port(%s), interface(%s), ns(%s)", ex_gw_port, interface_name, ns_name) self._plug_external_gateway(ex_gw_port, interface_name, ns_name) + self._external_gateway_settings(ex_gw_port, interface_name, + ns_name, preserve_ips) + def _external_gateway_settings(self, ex_gw_port, interface_name, + ns_name, preserve_ips): # Build up the interface and gateway IP addresses that # will be added to the interface. ip_cidrs = common_utils.fixed_ip_cidrs(ex_gw_port['fixed_ips']) @@ -767,15 +773,18 @@ class RouterInfo(object): return any(netaddr.IPAddress(gw_ip).version == 6 for gw_ip in gateway_ips) - def external_gateway_added(self, ex_gw_port, interface_name): + def get_router_preserve_ips(self): preserve_ips = self._list_floating_ip_cidrs() preserve_ips.extend(self.agent.pd.get_preserve_ips(self.router_id)) + return preserve_ips + + def external_gateway_added(self, ex_gw_port, interface_name): + preserve_ips = self.get_router_preserve_ips() self._external_gateway_added( ex_gw_port, interface_name, self.ns_name, preserve_ips) def external_gateway_updated(self, ex_gw_port, interface_name): - preserve_ips = self._list_floating_ip_cidrs() - preserve_ips.extend(self.agent.pd.get_preserve_ips(self.router_id)) + preserve_ips = self.get_router_preserve_ips() self._external_gateway_added( ex_gw_port, interface_name, self.ns_name, preserve_ips) diff --git a/neutron/agent/linux/interface.py b/neutron/agent/linux/interface.py index eb43302dd8c..de27fd7bbfe 100644 --- a/neutron/agent/linux/interface.py +++ b/neutron/agent/linux/interface.py @@ -266,15 +266,16 @@ class LinuxInterfaceDriver(object): @abc.abstractmethod def plug_new(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, + link_up=True): """Plug in the interface only for new devices that don't exist yet.""" def plug(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, link_up=True): if not ip_lib.device_exists(device_name, namespace=namespace): self.plug_new(network_id, port_id, device_name, mac_address, - bridge, namespace, prefix, mtu) + bridge, namespace, prefix, mtu, link_up) else: LOG.info("Device %s already exists", device_name) if mtu: @@ -310,10 +311,21 @@ class LinuxInterfaceDriver(object): LOG.warning("Interface driver cannot update MTU for ports") self._mtu_update_warn_logged = True + def set_link_status(self, device_name, namespace=None, link_up=True): + ns_dev = ip_lib.IPWrapper(namespace=namespace).device(device_name) + if not ns_dev.exists(): + LOG.debug("Device %s may concurrently be deleted.", device_name) + return + if link_up: + ns_dev.link.set_up() + else: + ns_dev.link.set_down() + class NullDriver(LinuxInterfaceDriver): def plug_new(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, + link_up=True): pass def unplug(self, device_name, bridge=None, namespace=None, prefix=None): @@ -352,7 +364,8 @@ class OVSInterfaceDriver(LinuxInterfaceDriver): ovs.clear_db_attribute("Port", interface, "tag") def plug_new(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, + link_up=True): """Plug in the interface.""" if not bridge: bridge = self.conf.ovs_integration_bridge @@ -403,7 +416,8 @@ class OVSInterfaceDriver(LinuxInterfaceDriver): else: LOG.warning("No MTU configured for port %s", port_id) - ns_dev.link.set_up() + if link_up: + ns_dev.link.set_up() if self.conf.ovs_use_veth: root_dev.link.set_up() @@ -459,7 +473,8 @@ class IVSInterfaceDriver(LinuxInterfaceDriver): utils.execute(cmd, run_as_root=True) def plug_new(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, + link_up=True): """Plug in the interface.""" ip = ip_lib.IPWrapper() tap_name = self._get_tap_name(device_name, prefix) @@ -505,7 +520,8 @@ class BridgeInterfaceDriver(LinuxInterfaceDriver): DEV_NAME_PREFIX = 'ns-' def plug_new(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, + link_up=True): """Plugin the interface.""" ip = ip_lib.IPWrapper() @@ -524,7 +540,8 @@ class BridgeInterfaceDriver(LinuxInterfaceDriver): LOG.warning("No MTU configured for port %s", port_id) root_veth.link.set_up() - ns_veth.link.set_up() + if link_up: + ns_veth.link.set_up() def unplug(self, device_name, bridge=None, namespace=None, prefix=None): """Unplug the interface.""" diff --git a/neutron/agent/linux/keepalived.py b/neutron/agent/linux/keepalived.py index 5619cd76903..74f697badaa 100644 --- a/neutron/agent/linux/keepalived.py +++ b/neutron/agent/linux/keepalived.py @@ -452,6 +452,12 @@ class KeepalivedManager(object): pm = self.get_process() pm.disable(sig='15') + def check_processes(self): + keepalived_pm = self.get_process() + vrrp_pm = self._get_vrrp_process( + self.get_vrrp_pid_file_name(keepalived_pm.get_pid_file_name())) + return keepalived_pm.active and vrrp_pm.active + def get_process(self): return external_process.ProcessManager( cfg.CONF, diff --git a/neutron/tests/unit/agent/l3/test_dvr_local_router.py b/neutron/tests/unit/agent/l3/test_dvr_local_router.py index d30865ec92c..34d37e8e8ca 100644 --- a/neutron/tests/unit/agent/l3/test_dvr_local_router.py +++ b/neutron/tests/unit/agent/l3/test_dvr_local_router.py @@ -900,9 +900,37 @@ class TestDvrRouterOperations(base.BaseTestCase): self.mock_driver.unplug.reset_mock() self._set_ri_kwargs(agent, router['id'], router) ri = dvr_edge_ha_rtr.DvrEdgeHaRouter(HOSTNAME, [], **self.ri_kwargs) + ri._ha_state_path = self.get_temp_file_path('router_ha_state') ri._create_snat_namespace = mock.Mock() ri.update_initial_state = mock.Mock() ri._plug_external_gateway = mock.Mock() ri.initialize(mock.Mock()) ri._create_dvr_gateway(mock.Mock(), mock.Mock()) ri._create_snat_namespace.assert_called_once_with() + + def test_initialize_dvr_ha_router_reset_state(self): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + agent.conf.agent_mode = lib_constants.L3_AGENT_MODE_DVR_SNAT + router = l3_test_common.prepare_router_data( + num_internal_ports=2, enable_ha=True) + router['gw_port_host'] = HOSTNAME + router[lib_constants.HA_INTERFACE_KEY]['status'] = 'ACTIVE' + self.mock_driver.unplug.reset_mock() + self._set_ri_kwargs(agent, router['id'], router) + + ri = dvr_edge_ha_rtr.DvrEdgeHaRouter(HOSTNAME, [], **self.ri_kwargs) + ri._ha_state_path = self.get_temp_file_path('router_ha_state') + + with open(ri._ha_state_path, "w") as f: + f.write("master") + + ri._create_snat_namespace = mock.Mock() + ri.update_initial_state = mock.Mock() + ri._plug_external_gateway = mock.Mock() + with mock.patch("neutron.agent.linux.keepalived." + "KeepalivedManager.check_processes", + return_value=False): + ri.initialize(mock.Mock()) + with open(ri._ha_state_path, "r") as f: + state = f.readline() + self.assertEqual("backup", state)