[L3][HA] Retry when setting HA router GW status.

When a HA router instance changes the state (active, backup), the
GW interface is set to up or down. As reported in the bug, while
keepalived is configuring the interface, the interface disappears
and appears again from the kernel namespace, as seen in the udev
messages.

This patch is a workaround until the real issue is addressed (if
possible), retrying the interface configuration for a small period
of time.

Related-Bug: #1916024

Change-Id: I8ced69f4f8e7d7c73da130a57e89e9d66590390b
(cherry picked from commit 662f483120)
This commit is contained in:
Rodolfo Alonso Hernandez 2021-02-18 11:57:45 +00:00
parent 820bd67278
commit b9073d7c3b
2 changed files with 14 additions and 5 deletions

View File

@ -553,8 +553,11 @@ class HaRouter(router.RouterInfo):
if ex_gw_port_id:
interface_name = self.get_external_device_name(ex_gw_port_id)
ns_name = self.get_gw_ns_name()
self.driver.set_link_status(interface_name, ns_name,
link_up=link_up)
if (not self.driver.set_link_status(
interface_name, namespace=ns_name, link_up=link_up) and
link_up):
LOG.error('Gateway interface for router %s was not set up; '
'router will not work properly', self.router_id)
if link_up and set_gw:
preserve_ips = self.get_router_preserve_ips()
self._external_gateway_settings(ex_gw_port, interface_name,

View File

@ -322,14 +322,20 @@ class LinuxInterfaceDriver(object, metaclass=abc.ABCMeta):
def set_link_status(self, device_name, namespace=None, link_up=True):
ns_dev = ip_lib.IPWrapper(namespace=namespace).device(device_name)
if not ns_dev.exists():
LOG.debug("Device %s may concurrently be deleted.", device_name)
return
try:
utils.wait_until_true(ns_dev.exists, timeout=3)
except utils.WaitTimeout:
LOG.debug('Device %s may have been deleted concurrently',
device_name)
return False
if link_up:
ns_dev.link.set_up()
else:
ns_dev.link.set_down()
return True
class NullDriver(LinuxInterfaceDriver):
def plug_new(self, network_id, port_id, device_name, mac_address,