From db3f5b3ff26ff7d748a3b04a6c38a47e37448e17 Mon Sep 17 00:00:00 2001 From: Slawek Kaplonski Date: Wed, 23 Feb 2022 15:12:37 +0100 Subject: [PATCH] Repeat few times put new interface in the namespace It seems that from time to time we may experience another variant of the shy ovs ports issue (see [1] for details) while trying to put interface, e.g. router's port into the namespace by the interface driver. To avoid that, this patch adds some repeats of the putting interface in the namespace, in the same way like it was done originally with set_address of the device. Additionally, this patch also refactors a bit part which is responsible to set mac address of the device to be able to clean ports in case of the permanent error there. [1] https://bugs.launchpad.net/neutron/+bug/1618987 Closes-Bug: #1961740 Change-Id: I3e0786fd8d0320036b9046746ae579c8ed2ecf27 (cherry picked from commit f7fac03ae1a4b1b1471f6054ba058382a1b9e1a1) --- neutron/agent/linux/interface.py | 70 ++++++++++++++----- .../tests/unit/agent/linux/test_interface.py | 12 +++- 2 files changed, 64 insertions(+), 18 deletions(-) diff --git a/neutron/agent/linux/interface.py b/neutron/agent/linux/interface.py index 247d8ea8f56..8e658aba0c6 100644 --- a/neutron/agent/linux/interface.py +++ b/neutron/agent/linux/interface.py @@ -371,6 +371,48 @@ class OVSInterfaceDriver(LinuxInterfaceDriver): ovs = ovs_lib.OVSBridge(bridge) ovs.replace_port(device_name, *attrs) + def _set_device_address(self, device, mac_address): + for i in range(9): + # workaround for the OVS shy port syndrome. ports sometimes + # hide for a bit right after they are first created. + # see bug/1618987 + try: + device.link.set_address(mac_address) + break + except RuntimeError as e: + LOG.warning("Got error trying to set mac, retrying: %s", + str(e)) + time.sleep(1) + else: + # didn't break, we give it one last shot without catching + device.link.set_address(mac_address) + + def _add_device_to_namespace(self, ip_wrapper, device, namespace): + namespace_obj = ip_wrapper.ensure_namespace(namespace) + for i in range(9): + try: + namespace_obj.add_device_to_namespace(device) + break + except ip_lib.NetworkInterfaceNotFound: + # NOTE(slaweq): if the exception was NetworkInterfaceNotFound + # then lets try again, otherwise lets simply raise it as this + # is some different issue than retry tries to workaround + LOG.warning("Failed to set interface %s into namespace %s. " + "Interface not found, attempt: %s, retrying.", + device, namespace, i + 1) + time.sleep(1) + except utils.WaitTimeout: + # NOTE(slaweq): if the exception was WaitTimeout then it means + # that probably device wasn't found in the desired namespace + # for 5 seconds, so lets try again too + LOG.warning("Failed to set interface %s into namespace %s. " + "Interface not found in namespace, attempt: %s, " + "retrying.", device, namespace, i + 1) + time.sleep(1) + else: + # didn't break, we give it one last shot without catching + namespace_obj.add_device_to_namespace(device) + def plug_new(self, network_id, port_id, device_name, mac_address, bridge=None, namespace=None, prefix=None, mtu=None, link_up=True): @@ -395,31 +437,25 @@ class OVSInterfaceDriver(LinuxInterfaceDriver): internal = not self.conf.ovs_use_veth self._ovs_add_port(bridge, tap_name, port_id, mac_address, internal=internal) - for i in range(9): - # workaround for the OVS shy port syndrome. ports sometimes - # hide for a bit right after they are first created. - # see bug/1618987 - try: - ns_dev.link.set_address(mac_address) - break - except RuntimeError as e: - LOG.warning("Got error trying to set mac, retrying: %s", - str(e)) - time.sleep(1) - else: - # didn't break, we give it one last shot without catching - ns_dev.link.set_address(mac_address) + try: + self._set_device_address(ns_dev, mac_address) + except Exception: + LOG.warning("Failed to set mac for interface %s", ns_dev) + with excutils.save_and_reraise_exception(): + ovs = ovs_lib.OVSBridge(bridge) + ovs.delete_port(tap_name) # Add an interface created by ovs to the namespace. if not self.conf.ovs_use_veth and namespace: try: - namespace_obj = ip.ensure_namespace(namespace) - namespace_obj.add_device_to_namespace(ns_dev) - except (pyroute2_exc.NetlinkError, OSError): + self._add_device_to_namespace(ip, ns_dev, namespace) + except (pyroute2_exc.NetlinkError, OSError, RuntimeError): # To prevent the namespace failure from blasting OVS, the OVS # port creation should be reverted. Possible exceptions: # - NetlinkError in case of duplicated interface # - OSError in case of corrupted namespace + # - RuntimeError in case of any issue with interface, like e.g. + # Interface not found LOG.warning("Failed to plug interface %s into bridge %s, " "cleaning up", device_name, bridge) with excutils.save_and_reraise_exception(): diff --git a/neutron/tests/unit/agent/linux/test_interface.py b/neutron/tests/unit/agent/linux/test_interface.py index 81855d70a16..c01665d7afa 100644 --- a/neutron/tests/unit/agent/linux/test_interface.py +++ b/neutron/tests/unit/agent/linux/test_interface.py @@ -449,6 +449,11 @@ class TestOVSInterfaceDriver(TestBase): self.device_exists.side_effect = device_exists link = self.ip.return_value.device.return_value.link link.set_address.side_effect = (RuntimeError, None) + namespace_obj = ( + self.ip.return_value.ensure_namespace.return_value) + self.ip.ensure_namespace.return_value = namespace_obj + namespace_obj.add_device_to_namespace.side_effect = ( + ip_lib.NetworkInterfaceNotFound, utils.WaitTimeout, None) ovs.plug('01234567-1234-1234-99', 'port-1234', 'tap0', @@ -472,6 +477,10 @@ class TestOVSInterfaceDriver(TestBase): if namespace: expected.extend( [mock.call().ensure_namespace(namespace), + mock.call().ensure_namespace().add_device_to_namespace( + mock.ANY), + mock.call().ensure_namespace().add_device_to_namespace( + mock.ANY), mock.call().ensure_namespace().add_device_to_namespace( mock.ANY)]) expected.extend([ @@ -496,7 +505,8 @@ class TestOVSInterfaceDriver(TestBase): reraise.start() ip_wrapper = mock.Mock() for exception in (OSError(), - pyroute2_exc.NetlinkError(22)): + pyroute2_exc.NetlinkError(22), + RuntimeError()): ip_wrapper.ensure_namespace.side_effect = exception self.ip.return_value = ip_wrapper delete_port.reset_mock()