Do not link up HA router gateway in backup node

L3 router will set its devices link up by default.
For HA routers, the gateway device will be pluged
in all scheduled hosts. When the gateway deivce is
up in backup node, it will send out IPv6 related
packets (MLDv2) according to some kernal config.
This will cause the physical fabric think that the
gateway MAC is now working in the backup node. And
finally the master node L3 traffic will be broken.

This patch sets the backup gateway device link down
by default. When the VRRP sets the master state in
one host, the L3 agent state change procedure will
do link up action for the gateway device.

Closes-Bug: #1859832
Change-Id: I8dca2c1a2f8cb467cfb44420f0eea54ca0932b05
This commit is contained in:
LIU Yulong 2019-10-31 19:06:37 +08:00
parent e89d158d45
commit c52029c39a
7 changed files with 121 additions and 16 deletions

View File

@ -114,7 +114,9 @@ class DvrEdgeHaRouter(dvr_edge_router.DvrEdgeRouter,
def _external_gateway_added(self, ex_gw_port, interface_name,
ns_name, preserve_ips):
self._plug_external_gateway(ex_gw_port, interface_name, ns_name)
link_up = self.external_gateway_link_up()
self._plug_external_gateway(ex_gw_port, interface_name, ns_name,
link_up=link_up)
def _is_this_snat_host(self):
return self.agent_conf.agent_mode == constants.L3_AGENT_MODE_DVR_SNAT

View File

@ -163,6 +163,15 @@ class AgentMixin(object):
'agent %(host)s',
state_change_data)
# Set external gateway port link up or down according to state
if state == 'master':
ri.set_external_gw_port_link_status(link_up=True, set_gw=True)
elif state == 'backup':
ri.set_external_gw_port_link_status(link_up=False)
else:
LOG.warning('Router %s has status %s, '
'no action to router gateway device.',
router_id, state)
# TODO(dalvarez): Fix bug 1677279 by moving the IPv6 parameters
# configuration to keepalived-state-change in order to remove the
# dependency that currently exists on l3-agent running for the IPv6

View File

@ -94,6 +94,15 @@ class HaRouter(router.RouterInfo):
def ha_vr_id(self):
return self.router.get('ha_vr_id')
def _check_and_set_real_state(self):
# When the physical host was down/up, the 'master' router may still
# have its original state in the _ha_state_path file. We directly
# reset it to 'backup'.
if (not self.keepalived_manager.check_processes() and
os.path.exists(self.ha_state_path) and
self.ha_state == 'master'):
self.ha_state = 'backup'
@property
def ha_state(self):
if self._ha_state:
@ -139,6 +148,7 @@ class HaRouter(router.RouterInfo):
self.set_ha_port()
self._init_keepalived_manager(process_monitor)
self._check_and_set_real_state()
self.ha_network_added()
self.update_initial_state(self.state_change_callback)
self.spawn_state_change_monitor(process_monitor)
@ -443,7 +453,9 @@ class HaRouter(router.RouterInfo):
return port1_filtered == port2_filtered
def external_gateway_added(self, ex_gw_port, interface_name):
self._plug_external_gateway(ex_gw_port, interface_name, self.ns_name)
link_up = self.external_gateway_link_up()
self._plug_external_gateway(ex_gw_port, interface_name,
self.ns_name, link_up=link_up)
self._add_gateway_vip(ex_gw_port, interface_name)
self._disable_ipv6_addressing_on_interface(interface_name)
@ -507,3 +519,27 @@ class HaRouter(router.RouterInfo):
if (self.keepalived_manager.get_process().active and
self.ha_state == 'master'):
super(HaRouter, self).enable_radvd(internal_ports)
def external_gateway_link_up(self):
# Check HA router ha_state for its gateway port link state.
# 'backup' instance will not link up the gateway port.
return self.ha_state == 'master'
def set_external_gw_port_link_status(self, link_up, set_gw=False):
link_state = "up" if link_up else "down"
LOG.info('Set router %s gateway device link state to %s.',
self.router_id, link_state)
ex_gw_port = self.get_ex_gw_port()
ex_gw_port_id = (ex_gw_port and ex_gw_port['id'] or
self.ex_gw_port and self.ex_gw_port['id'])
if ex_gw_port_id:
interface_name = self.get_external_device_name(ex_gw_port_id)
ns_name = self.get_gw_ns_name()
self.driver.set_link_status(interface_name, ns_name,
link_up=link_up)
if link_up and set_gw:
preserve_ips = self.get_router_preserve_ips()
self._external_gateway_settings(ex_gw_port, interface_name,
ns_name, preserve_ips)
self.routes_updated([], self.routes)

View File

@ -696,14 +696,16 @@ class RouterInfo(BaseRouterInfo):
return [common_utils.ip_to_cidr(ip['floating_ip_address'])
for ip in floating_ips]
def _plug_external_gateway(self, ex_gw_port, interface_name, ns_name):
def _plug_external_gateway(self, ex_gw_port, interface_name, ns_name,
link_up=True):
self.driver.plug(ex_gw_port['network_id'],
ex_gw_port['id'],
interface_name,
ex_gw_port['mac_address'],
namespace=ns_name,
prefix=EXTERNAL_DEV_PREFIX,
mtu=ex_gw_port.get('mtu'))
mtu=ex_gw_port.get('mtu'),
link_up=link_up)
def _get_external_gw_ips(self, ex_gw_port):
gateway_ips = []
@ -763,7 +765,11 @@ class RouterInfo(BaseRouterInfo):
LOG.debug("External gateway added: port(%s), interface(%s), ns(%s)",
ex_gw_port, interface_name, ns_name)
self._plug_external_gateway(ex_gw_port, interface_name, ns_name)
self._external_gateway_settings(ex_gw_port, interface_name,
ns_name, preserve_ips)
def _external_gateway_settings(self, ex_gw_port, interface_name,
ns_name, preserve_ips):
# Build up the interface and gateway IP addresses that
# will be added to the interface.
ip_cidrs = common_utils.fixed_ip_cidrs(ex_gw_port['fixed_ips'])
@ -808,17 +814,19 @@ class RouterInfo(BaseRouterInfo):
return any(netaddr.IPAddress(gw_ip).version == 6
for gw_ip in gateway_ips)
def external_gateway_added(self, ex_gw_port, interface_name):
def get_router_preserve_ips(self):
preserve_ips = self._list_floating_ip_cidrs() + list(
self.centralized_port_forwarding_fip_set)
preserve_ips.extend(self.agent.pd.get_preserve_ips(self.router_id))
return preserve_ips
def external_gateway_added(self, ex_gw_port, interface_name):
preserve_ips = self.get_router_preserve_ips()
self._external_gateway_added(
ex_gw_port, interface_name, self.ns_name, preserve_ips)
def external_gateway_updated(self, ex_gw_port, interface_name):
preserve_ips = self._list_floating_ip_cidrs() + list(
self.centralized_port_forwarding_fip_set)
preserve_ips.extend(self.agent.pd.get_preserve_ips(self.router_id))
preserve_ips = self.get_router_preserve_ips()
self._external_gateway_added(
ex_gw_port, interface_name, self.ns_name, preserve_ips)

View File

@ -256,15 +256,16 @@ class LinuxInterfaceDriver(object):
@abc.abstractmethod
def plug_new(self, network_id, port_id, device_name, mac_address,
bridge=None, namespace=None, prefix=None, mtu=None):
bridge=None, namespace=None, prefix=None, mtu=None,
link_up=True):
"""Plug in the interface only for new devices that don't exist yet."""
def plug(self, network_id, port_id, device_name, mac_address,
bridge=None, namespace=None, prefix=None, mtu=None):
bridge=None, namespace=None, prefix=None, mtu=None, link_up=True):
if not ip_lib.device_exists(device_name,
namespace=namespace):
self.plug_new(network_id, port_id, device_name, mac_address,
bridge, namespace, prefix, mtu)
bridge, namespace, prefix, mtu, link_up)
else:
LOG.info("Device %s already exists", device_name)
if mtu:
@ -300,10 +301,21 @@ class LinuxInterfaceDriver(object):
LOG.warning("Interface driver cannot update MTU for ports")
self._mtu_update_warn_logged = True
def set_link_status(self, device_name, namespace=None, link_up=True):
ns_dev = ip_lib.IPWrapper(namespace=namespace).device(device_name)
if not ns_dev.exists():
LOG.debug("Device %s may concurrently be deleted.", device_name)
return
if link_up:
ns_dev.link.set_up()
else:
ns_dev.link.set_down()
class NullDriver(LinuxInterfaceDriver):
def plug_new(self, network_id, port_id, device_name, mac_address,
bridge=None, namespace=None, prefix=None, mtu=None):
bridge=None, namespace=None, prefix=None, mtu=None,
link_up=True):
pass
def unplug(self, device_name, bridge=None, namespace=None, prefix=None):
@ -338,7 +350,8 @@ class OVSInterfaceDriver(LinuxInterfaceDriver):
ovs.replace_port(device_name, *attrs)
def plug_new(self, network_id, port_id, device_name, mac_address,
bridge=None, namespace=None, prefix=None, mtu=None):
bridge=None, namespace=None, prefix=None, mtu=None,
link_up=True):
"""Plug in the interface."""
if not bridge:
bridge = self.conf.OVS.integration_bridge
@ -400,7 +413,8 @@ class OVSInterfaceDriver(LinuxInterfaceDriver):
else:
LOG.warning("No MTU configured for port %s", port_id)
ns_dev.link.set_up()
if link_up:
ns_dev.link.set_up()
if self.conf.ovs_use_veth:
root_dev.link.set_up()
@ -440,7 +454,8 @@ class BridgeInterfaceDriver(LinuxInterfaceDriver):
DEV_NAME_PREFIX = 'ns-'
def plug_new(self, network_id, port_id, device_name, mac_address,
bridge=None, namespace=None, prefix=None, mtu=None):
bridge=None, namespace=None, prefix=None, mtu=None,
link_up=True):
"""Plugin the interface."""
ip = ip_lib.IPWrapper()
@ -459,7 +474,8 @@ class BridgeInterfaceDriver(LinuxInterfaceDriver):
LOG.warning("No MTU configured for port %s", port_id)
root_veth.link.set_up()
ns_veth.link.set_up()
if link_up:
ns_veth.link.set_up()
def unplug(self, device_name, bridge=None, namespace=None, prefix=None):
"""Unplug the interface."""

View File

@ -452,6 +452,12 @@ class KeepalivedManager(object):
pm = self.get_process()
pm.disable(sig='15')
def check_processes(self):
keepalived_pm = self.get_process()
vrrp_pm = self._get_vrrp_process(
self.get_vrrp_pid_file_name(keepalived_pm.get_pid_file_name()))
return keepalived_pm.active and vrrp_pm.active
def get_process(self):
return external_process.ProcessManager(
cfg.CONF,

View File

@ -899,9 +899,37 @@ class TestDvrRouterOperations(base.BaseTestCase):
self.mock_driver.unplug.reset_mock()
self._set_ri_kwargs(agent, router['id'], router)
ri = dvr_edge_ha_rtr.DvrEdgeHaRouter(HOSTNAME, [], **self.ri_kwargs)
ri._ha_state_path = self.get_temp_file_path('router_ha_state')
ri._create_snat_namespace = mock.Mock()
ri.update_initial_state = mock.Mock()
ri._plug_external_gateway = mock.Mock()
ri.initialize(mock.Mock())
ri._create_dvr_gateway(mock.Mock(), mock.Mock())
ri._create_snat_namespace.assert_called_once_with()
def test_initialize_dvr_ha_router_reset_state(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
agent.conf.agent_mode = lib_constants.L3_AGENT_MODE_DVR_SNAT
router = l3_test_common.prepare_router_data(
num_internal_ports=2, enable_ha=True)
router['gw_port_host'] = HOSTNAME
router[lib_constants.HA_INTERFACE_KEY]['status'] = 'ACTIVE'
self.mock_driver.unplug.reset_mock()
self._set_ri_kwargs(agent, router['id'], router)
ri = dvr_edge_ha_rtr.DvrEdgeHaRouter(HOSTNAME, [], **self.ri_kwargs)
ri._ha_state_path = self.get_temp_file_path('router_ha_state')
with open(ri._ha_state_path, "w") as f:
f.write("master")
ri._create_snat_namespace = mock.Mock()
ri.update_initial_state = mock.Mock()
ri._plug_external_gateway = mock.Mock()
with mock.patch("neutron.agent.linux.keepalived."
"KeepalivedManager.check_processes",
return_value=False):
ri.initialize(mock.Mock())
with open(ri._ha_state_path, "r") as f:
state = f.readline()
self.assertEqual("backup", state)