From b9a29681003b7f975d7bc9687399cafb5549010a Mon Sep 17 00:00:00 2001 From: LIU Yulong Date: Thu, 31 Oct 2019 19:06:37 +0800 Subject: [PATCH] Do not link up HA router gateway in backup node L3 router will set its devices link up by default. For HA routers, the gateway device will be pluged in all scheduled hosts. When the gateway deivce is up in backup node, it will send out IPv6 related packets (MLDv2) according to some kernal config. This will cause the physical fabric think that the gateway MAC is now working in the backup node. And finally the master node L3 traffic will be broken. This patch sets the backup gateway device link down by default. When the VRRP sets the master state in one host, the L3 agent state change procedure will do link up action for the gateway device. Closes-Bug: #1859832 Change-Id: I8dca2c1a2f8cb467cfb44420f0eea54ca0932b05 (cherry picked from commit c52029c39aa824a67095fbbf9e59eff769d92587) --- neutron/agent/l3/dvr_edge_ha_router.py | 4 +- neutron/agent/l3/ha.py | 9 +++++ neutron/agent/l3/ha_router.py | 38 ++++++++++++++++++- neutron/agent/l3/router_info.py | 20 +++++++--- neutron/agent/linux/interface.py | 32 ++++++++++++---- neutron/agent/linux/keepalived.py | 6 +++ .../unit/agent/l3/test_dvr_local_router.py | 28 ++++++++++++++ 7 files changed, 121 insertions(+), 16 deletions(-) diff --git a/neutron/agent/l3/dvr_edge_ha_router.py b/neutron/agent/l3/dvr_edge_ha_router.py index b92f70b70f4..71f740bef92 100644 --- a/neutron/agent/l3/dvr_edge_ha_router.py +++ b/neutron/agent/l3/dvr_edge_ha_router.py @@ -114,7 +114,9 @@ class DvrEdgeHaRouter(dvr_edge_router.DvrEdgeRouter, def _external_gateway_added(self, ex_gw_port, interface_name, ns_name, preserve_ips): - self._plug_external_gateway(ex_gw_port, interface_name, ns_name) + link_up = self.external_gateway_link_up() + self._plug_external_gateway(ex_gw_port, interface_name, ns_name, + link_up=link_up) def _is_this_snat_host(self): return self.agent_conf.agent_mode == constants.L3_AGENT_MODE_DVR_SNAT diff --git a/neutron/agent/l3/ha.py b/neutron/agent/l3/ha.py index 0efe666aef2..6755548a16a 100644 --- a/neutron/agent/l3/ha.py +++ b/neutron/agent/l3/ha.py @@ -161,6 +161,15 @@ class AgentMixin(object): if ri is None: return + # Set external gateway port link up or down according to state + if state == 'master': + ri.set_external_gw_port_link_status(link_up=True, set_gw=True) + elif state == 'backup': + ri.set_external_gw_port_link_status(link_up=False) + else: + LOG.warning('Router %s has status %s, ' + 'no action to router gateway device.', + router_id, state) # TODO(dalvarez): Fix bug 1677279 by moving the IPv6 parameters # configuration to keepalived-state-change in order to remove the # dependency that currently exists on l3-agent running for the IPv6 diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index 47b0849fe27..a794402f109 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -92,6 +92,15 @@ class HaRouter(router.RouterInfo): def ha_vr_id(self): return self.router.get('ha_vr_id') + def _check_and_set_real_state(self): + # When the physical host was down/up, the 'master' router may still + # have its original state in the _ha_state_path file. We directly + # reset it to 'backup'. + if (not self.keepalived_manager.check_processes() and + os.path.exists(self.ha_state_path) and + self.ha_state == 'master'): + self.ha_state = 'backup' + @property def ha_state(self): if self._ha_state: @@ -137,6 +146,7 @@ class HaRouter(router.RouterInfo): self.set_ha_port() self._init_keepalived_manager(process_monitor) + self._check_and_set_real_state() self.ha_network_added() self.update_initial_state(self.state_change_callback) self.spawn_state_change_monitor(process_monitor) @@ -431,7 +441,9 @@ class HaRouter(router.RouterInfo): return port1_filtered == port2_filtered def external_gateway_added(self, ex_gw_port, interface_name): - self._plug_external_gateway(ex_gw_port, interface_name, self.ns_name) + link_up = self.external_gateway_link_up() + self._plug_external_gateway(ex_gw_port, interface_name, + self.ns_name, link_up=link_up) self._add_gateway_vip(ex_gw_port, interface_name) self._disable_ipv6_addressing_on_interface(interface_name) @@ -495,3 +507,27 @@ class HaRouter(router.RouterInfo): if (self.keepalived_manager.get_process().active and self.ha_state == 'master'): super(HaRouter, self).enable_radvd(internal_ports) + + def external_gateway_link_up(self): + # Check HA router ha_state for its gateway port link state. + # 'backup' instance will not link up the gateway port. + return self.ha_state == 'master' + + def set_external_gw_port_link_status(self, link_up, set_gw=False): + link_state = "up" if link_up else "down" + LOG.info('Set router %s gateway device link state to %s.', + self.router_id, link_state) + + ex_gw_port = self.get_ex_gw_port() + ex_gw_port_id = (ex_gw_port and ex_gw_port['id'] or + self.ex_gw_port and self.ex_gw_port['id']) + if ex_gw_port_id: + interface_name = self.get_external_device_name(ex_gw_port_id) + ns_name = self.get_gw_ns_name() + self.driver.set_link_status(interface_name, ns_name, + link_up=link_up) + if link_up and set_gw: + preserve_ips = self.get_router_preserve_ips() + self._external_gateway_settings(ex_gw_port, interface_name, + ns_name, preserve_ips) + self.routes_updated([], self.routes) diff --git a/neutron/agent/l3/router_info.py b/neutron/agent/l3/router_info.py index 021803b2287..54125c366c4 100644 --- a/neutron/agent/l3/router_info.py +++ b/neutron/agent/l3/router_info.py @@ -660,14 +660,16 @@ class RouterInfo(object): return [common_utils.ip_to_cidr(ip['floating_ip_address']) for ip in floating_ips] - def _plug_external_gateway(self, ex_gw_port, interface_name, ns_name): + def _plug_external_gateway(self, ex_gw_port, interface_name, ns_name, + link_up=True): self.driver.plug(ex_gw_port['network_id'], ex_gw_port['id'], interface_name, ex_gw_port['mac_address'], namespace=ns_name, prefix=EXTERNAL_DEV_PREFIX, - mtu=ex_gw_port.get('mtu')) + mtu=ex_gw_port.get('mtu'), + link_up=link_up) def _get_external_gw_ips(self, ex_gw_port): gateway_ips = [] @@ -726,7 +728,11 @@ class RouterInfo(object): LOG.debug("External gateway added: port(%s), interface(%s), ns(%s)", ex_gw_port, interface_name, ns_name) self._plug_external_gateway(ex_gw_port, interface_name, ns_name) + self._external_gateway_settings(ex_gw_port, interface_name, + ns_name, preserve_ips) + def _external_gateway_settings(self, ex_gw_port, interface_name, + ns_name, preserve_ips): # Build up the interface and gateway IP addresses that # will be added to the interface. ip_cidrs = common_utils.fixed_ip_cidrs(ex_gw_port['fixed_ips']) @@ -771,17 +777,19 @@ class RouterInfo(object): return any(netaddr.IPAddress(gw_ip).version == 6 for gw_ip in gateway_ips) - def external_gateway_added(self, ex_gw_port, interface_name): + def get_router_preserve_ips(self): preserve_ips = self._list_floating_ip_cidrs() + list( self.centralized_port_forwarding_fip_set) preserve_ips.extend(self.agent.pd.get_preserve_ips(self.router_id)) + return preserve_ips + + def external_gateway_added(self, ex_gw_port, interface_name): + preserve_ips = self.get_router_preserve_ips() self._external_gateway_added( ex_gw_port, interface_name, self.ns_name, preserve_ips) def external_gateway_updated(self, ex_gw_port, interface_name): - preserve_ips = self._list_floating_ip_cidrs() + list( - self.centralized_port_forwarding_fip_set) - preserve_ips.extend(self.agent.pd.get_preserve_ips(self.router_id)) + preserve_ips = self.get_router_preserve_ips() self._external_gateway_added( ex_gw_port, interface_name, self.ns_name, preserve_ips) diff --git a/neutron/agent/linux/interface.py b/neutron/agent/linux/interface.py index ddfe6afb225..9b242933c0f 100644 --- a/neutron/agent/linux/interface.py +++ b/neutron/agent/linux/interface.py @@ -256,15 +256,16 @@ class LinuxInterfaceDriver(object): @abc.abstractmethod def plug_new(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, + link_up=True): """Plug in the interface only for new devices that don't exist yet.""" def plug(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, link_up=True): if not ip_lib.device_exists(device_name, namespace=namespace): self.plug_new(network_id, port_id, device_name, mac_address, - bridge, namespace, prefix, mtu) + bridge, namespace, prefix, mtu, link_up) else: LOG.info("Device %s already exists", device_name) if mtu: @@ -300,10 +301,21 @@ class LinuxInterfaceDriver(object): LOG.warning("Interface driver cannot update MTU for ports") self._mtu_update_warn_logged = True + def set_link_status(self, device_name, namespace=None, link_up=True): + ns_dev = ip_lib.IPWrapper(namespace=namespace).device(device_name) + if not ns_dev.exists(): + LOG.debug("Device %s may concurrently be deleted.", device_name) + return + if link_up: + ns_dev.link.set_up() + else: + ns_dev.link.set_down() + class NullDriver(LinuxInterfaceDriver): def plug_new(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, + link_up=True): pass def unplug(self, device_name, bridge=None, namespace=None, prefix=None): @@ -338,7 +350,8 @@ class OVSInterfaceDriver(LinuxInterfaceDriver): ovs.replace_port(device_name, *attrs) def plug_new(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, + link_up=True): """Plug in the interface.""" if not bridge: bridge = self.conf.ovs_integration_bridge @@ -402,7 +415,8 @@ class OVSInterfaceDriver(LinuxInterfaceDriver): else: LOG.warning("No MTU configured for port %s", port_id) - ns_dev.link.set_up() + if link_up: + ns_dev.link.set_up() if self.conf.ovs_use_veth: root_dev.link.set_up() @@ -442,7 +456,8 @@ class BridgeInterfaceDriver(LinuxInterfaceDriver): DEV_NAME_PREFIX = 'ns-' def plug_new(self, network_id, port_id, device_name, mac_address, - bridge=None, namespace=None, prefix=None, mtu=None): + bridge=None, namespace=None, prefix=None, mtu=None, + link_up=True): """Plugin the interface.""" ip = ip_lib.IPWrapper() @@ -461,7 +476,8 @@ class BridgeInterfaceDriver(LinuxInterfaceDriver): LOG.warning("No MTU configured for port %s", port_id) root_veth.link.set_up() - ns_veth.link.set_up() + if link_up: + ns_veth.link.set_up() def unplug(self, device_name, bridge=None, namespace=None, prefix=None): """Unplug the interface.""" diff --git a/neutron/agent/linux/keepalived.py b/neutron/agent/linux/keepalived.py index c0bf8e84447..6c4c767807d 100644 --- a/neutron/agent/linux/keepalived.py +++ b/neutron/agent/linux/keepalived.py @@ -452,6 +452,12 @@ class KeepalivedManager(object): pm = self.get_process() pm.disable(sig='15') + def check_processes(self): + keepalived_pm = self.get_process() + vrrp_pm = self._get_vrrp_process( + self.get_vrrp_pid_file_name(keepalived_pm.get_pid_file_name())) + return keepalived_pm.active and vrrp_pm.active + def get_process(self): return external_process.ProcessManager( cfg.CONF, diff --git a/neutron/tests/unit/agent/l3/test_dvr_local_router.py b/neutron/tests/unit/agent/l3/test_dvr_local_router.py index adb58ffe05c..cdd607ca9fd 100644 --- a/neutron/tests/unit/agent/l3/test_dvr_local_router.py +++ b/neutron/tests/unit/agent/l3/test_dvr_local_router.py @@ -899,9 +899,37 @@ class TestDvrRouterOperations(base.BaseTestCase): self.mock_driver.unplug.reset_mock() self._set_ri_kwargs(agent, router['id'], router) ri = dvr_edge_ha_rtr.DvrEdgeHaRouter(HOSTNAME, [], **self.ri_kwargs) + ri._ha_state_path = self.get_temp_file_path('router_ha_state') ri._create_snat_namespace = mock.Mock() ri.update_initial_state = mock.Mock() ri._plug_external_gateway = mock.Mock() ri.initialize(mock.Mock()) ri._create_dvr_gateway(mock.Mock(), mock.Mock()) ri._create_snat_namespace.assert_called_once_with() + + def test_initialize_dvr_ha_router_reset_state(self): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + agent.conf.agent_mode = lib_constants.L3_AGENT_MODE_DVR_SNAT + router = l3_test_common.prepare_router_data( + num_internal_ports=2, enable_ha=True) + router['gw_port_host'] = HOSTNAME + router[lib_constants.HA_INTERFACE_KEY]['status'] = 'ACTIVE' + self.mock_driver.unplug.reset_mock() + self._set_ri_kwargs(agent, router['id'], router) + + ri = dvr_edge_ha_rtr.DvrEdgeHaRouter(HOSTNAME, [], **self.ri_kwargs) + ri._ha_state_path = self.get_temp_file_path('router_ha_state') + + with open(ri._ha_state_path, "w") as f: + f.write("master") + + ri._create_snat_namespace = mock.Mock() + ri.update_initial_state = mock.Mock() + ri._plug_external_gateway = mock.Mock() + with mock.patch("neutron.agent.linux.keepalived." + "KeepalivedManager.check_processes", + return_value=False): + ri.initialize(mock.Mock()) + with open(ri._ha_state_path, "r") as f: + state = f.readline() + self.assertEqual("backup", state)