From 12c07ba3ea9c6501dd7494561e2920496407c48b Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Thu, 18 Feb 2021 11:57:45 +0000 Subject: [PATCH] [L3][HA] Retry when setting HA router GW status. When a HA router instance changes the state (active, backup), the GW interface is set to up or down. As reported in the bug, while keepalived is configuring the interface, the interface disappears and appears again from the kernel namespace, as seen in the udev messages. This patch is a workaround until the real issue is addressed (if possible), retrying the interface configuration for a small period of time. Related-Bug: #1916024 Change-Id: I8ced69f4f8e7d7c73da130a57e89e9d66590390b (cherry picked from commit 662f483120972a373e19bde52f16392e2ccb9c82) --- neutron/agent/l3/ha_router.py | 7 +++++-- neutron/agent/linux/interface.py | 12 +++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index 1811123405e..d64d51c209d 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -547,8 +547,11 @@ class HaRouter(router.RouterInfo): if ex_gw_port_id: interface_name = self.get_external_device_name(ex_gw_port_id) ns_name = self.get_gw_ns_name() - self.driver.set_link_status(interface_name, ns_name, - link_up=link_up) + if (not self.driver.set_link_status( + interface_name, namespace=ns_name, link_up=link_up) and + link_up): + LOG.error('Gateway interface for router %s was not set up; ' + 'router will not work properly', self.router_id) if link_up and set_gw: preserve_ips = self.get_router_preserve_ips() self._external_gateway_settings(ex_gw_port, interface_name, diff --git a/neutron/agent/linux/interface.py b/neutron/agent/linux/interface.py index 672bf84eded..3ac476d7ba7 100644 --- a/neutron/agent/linux/interface.py +++ b/neutron/agent/linux/interface.py @@ -323,14 +323,20 @@ class LinuxInterfaceDriver(object): def set_link_status(self, device_name, namespace=None, link_up=True): ns_dev = ip_lib.IPWrapper(namespace=namespace).device(device_name) - if not ns_dev.exists(): - LOG.debug("Device %s may concurrently be deleted.", device_name) - return + try: + utils.wait_until_true(ns_dev.exists, timeout=3) + except utils.WaitTimeout: + LOG.debug('Device %s may have been deleted concurrently', + device_name) + return False + if link_up: ns_dev.link.set_up() else: ns_dev.link.set_down() + return True + class NullDriver(LinuxInterfaceDriver): def plug_new(self, network_id, port_id, device_name, mac_address,