From b9073d7c3bc9d1d9254294d698a6fae319fd22ad Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Thu, 18 Feb 2021 11:57:45 +0000 Subject: [PATCH] [L3][HA] Retry when setting HA router GW status. When a HA router instance changes the state (active, backup), the GW interface is set to up or down. As reported in the bug, while keepalived is configuring the interface, the interface disappears and appears again from the kernel namespace, as seen in the udev messages. This patch is a workaround until the real issue is addressed (if possible), retrying the interface configuration for a small period of time. Related-Bug: #1916024 Change-Id: I8ced69f4f8e7d7c73da130a57e89e9d66590390b (cherry picked from commit 662f483120972a373e19bde52f16392e2ccb9c82) --- neutron/agent/l3/ha_router.py | 7 +++++-- neutron/agent/linux/interface.py | 12 +++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index d1971e72ea7..d4beed0306f 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -553,8 +553,11 @@ class HaRouter(router.RouterInfo): if ex_gw_port_id: interface_name = self.get_external_device_name(ex_gw_port_id) ns_name = self.get_gw_ns_name() - self.driver.set_link_status(interface_name, ns_name, - link_up=link_up) + if (not self.driver.set_link_status( + interface_name, namespace=ns_name, link_up=link_up) and + link_up): + LOG.error('Gateway interface for router %s was not set up; ' + 'router will not work properly', self.router_id) if link_up and set_gw: preserve_ips = self.get_router_preserve_ips() self._external_gateway_settings(ex_gw_port, interface_name, diff --git a/neutron/agent/linux/interface.py b/neutron/agent/linux/interface.py index 022488c4f56..c4919def686 100644 --- a/neutron/agent/linux/interface.py +++ b/neutron/agent/linux/interface.py @@ -322,14 +322,20 @@ class LinuxInterfaceDriver(object, metaclass=abc.ABCMeta): def set_link_status(self, device_name, namespace=None, link_up=True): ns_dev = ip_lib.IPWrapper(namespace=namespace).device(device_name) - if not ns_dev.exists(): - LOG.debug("Device %s may concurrently be deleted.", device_name) - return + try: + utils.wait_until_true(ns_dev.exists, timeout=3) + except utils.WaitTimeout: + LOG.debug('Device %s may have been deleted concurrently', + device_name) + return False + if link_up: ns_dev.link.set_up() else: ns_dev.link.set_down() + return True + class NullDriver(LinuxInterfaceDriver): def plug_new(self, network_id, port_id, device_name, mac_address,