diff --git a/kuryr_kubernetes/cni/binding/base.py b/kuryr_kubernetes/cni/binding/base.py index 016b84fa4..85046d60b 100644 --- a/kuryr_kubernetes/cni/binding/base.py +++ b/kuryr_kubernetes/cni/binding/base.py @@ -34,6 +34,22 @@ LOG = logging.getLogger(__name__) class BaseBindingDriver(object): """Interface to attach ports to pods.""" + def _remove_ifaces(self, ipdb, ifnames, netns='host'): + """Check if any of `ifnames` exists and remove it. + + :param ipdb: ipdb of the network namespace to check + :param ifnames: iterable of interface names to remove + :param netns: network namespace name (used for logging) + """ + for ifname in ifnames: + if ifname in ipdb.interfaces: + LOG.warning('Found hanging interface %(ifname)s inside ' + '%(netns)s netns. Most likely it is a leftover ' + 'from a kuryr-daemon restart. Trying to delete ' + 'it.', {'ifname': ifname, 'netns': netns}) + with ipdb.interfaces[ifname] as iface: + iface.remove() + @abc.abstractmethod def connect(self, vif, ifname, netns, container_id): raise NotImplementedError() diff --git a/kuryr_kubernetes/cni/binding/bridge.py b/kuryr_kubernetes/cni/binding/bridge.py index f7b1b99a5..a96838656 100644 --- a/kuryr_kubernetes/cni/binding/bridge.py +++ b/kuryr_kubernetes/cni/binding/bridge.py @@ -32,16 +32,12 @@ class BaseBridgeDriver(health.HealthHandler, b_base.BaseBindingDriver): def connect(self, vif, ifname, netns, container_id): host_ifname = vif.vif_name + # NOTE(dulek): Check if we already run connect for this iface and if + # there's a leftover host-side vif. If so we need to + # remove it, its peer should get deleted automatically by + # the kernel. with b_base.get_ipdb() as h_ipdb: - if host_ifname in h_ipdb.interfaces: - # NOTE(dulek): This most likely means that we already run - # connect for this iface and there's a leftover - # host-side vif. Let's remove it, its peer should - # get deleted automatically by the kernel. - LOG.debug('Found leftover host vif %s. Removing it before ' - 'connecting.', host_ifname) - with h_ipdb.interfaces[host_ifname] as h_iface: - h_iface.remove() + self._remove_ifaces(h_ipdb, (host_ifname,)) if vif.network.mtu: interface_mtu = vif.network.mtu diff --git a/kuryr_kubernetes/cni/binding/nested.py b/kuryr_kubernetes/cni/binding/nested.py index 36a96fc4b..8a0691d9d 100644 --- a/kuryr_kubernetes/cni/binding/nested.py +++ b/kuryr_kubernetes/cni/binding/nested.py @@ -15,6 +15,8 @@ import abc import six +from oslo_log import log as logging + from kuryr_kubernetes.cni.binding import base as b_base from kuryr_kubernetes import config from kuryr_kubernetes.handlers import health @@ -24,6 +26,8 @@ VLAN_KIND = 'vlan' MACVLAN_KIND = 'macvlan' MACVLAN_MODE_BRIDGE = 'bridge' +LOG = logging.getLogger(__name__) + @six.add_metaclass(abc.ABCMeta) class NestedDriver(health.HealthHandler, b_base.BaseBindingDriver): @@ -36,19 +40,29 @@ class NestedDriver(health.HealthHandler, b_base.BaseBindingDriver): raise NotImplementedError() def connect(self, vif, ifname, netns, container_id): - with b_base.get_ipdb() as h_ipdb: - # NOTE(vikasc): Ideally 'ifname' should be used here but instead a - # temporary name is being used while creating the device for - # container in host network namespace. This is because cni expects - # only 'eth0' as interface name and if host already has an - # interface named 'eth0', device creation will fail with 'already - # exists' error. - temp_name = vif.vif_name + # NOTE(vikasc): Ideally 'ifname' should be used here but instead a + # temporary name is being used while creating the device for + # container in host network namespace. This is because cni expects + # only 'eth0' as interface name and if host already has an + # interface named 'eth0', device creation will fail with 'already + # exists' error. + temp_name = vif.vif_name + # First let's take a peek into the pod namespace and try to remove any + # leftover interface in case we got restarted before CNI returned to + # kubelet. + with b_base.get_ipdb(netns) as c_ipdb: + self._remove_ifaces(c_ipdb, (temp_name, ifname), netns) + + with b_base.get_ipdb() as h_ipdb: # TODO(vikasc): evaluate whether we should have stevedore # driver for getting the link device. vm_iface_name = config.CONF.binding.link_iface + # We might also have leftover interface in the host netns, let's + # try to remove it too. + self._remove_ifaces(h_ipdb, (temp_name,)) + args = self._get_iface_create_args(vif) with h_ipdb.create(ifname=temp_name, link=h_ipdb.interfaces[vm_iface_name], diff --git a/kuryr_kubernetes/tests/unit/cni/test_binding.py b/kuryr_kubernetes/tests/unit/cni/test_binding.py index ebdcceb35..bc5d29013 100644 --- a/kuryr_kubernetes/tests/unit/cni/test_binding.py +++ b/kuryr_kubernetes/tests/unit/cni/test_binding.py @@ -180,7 +180,7 @@ class TestNestedVlanDriver(TestDriverMixin, test_base.TestCase): self._test_connect() self.assertEqual(1, self.h_ipdb_exit.call_count) - self.assertEqual(2, self.c_ipdb_exit.call_count) + self.assertEqual(3, self.c_ipdb_exit.call_count) self.assertEqual(self.ifname, self.m_h_iface.ifname) self.assertEqual(1, self.m_h_iface.mtu) @@ -202,7 +202,7 @@ class TestNestedMacvlanDriver(TestDriverMixin, test_base.TestCase): self._test_connect() self.assertEqual(1, self.h_ipdb_exit.call_count) - self.assertEqual(2, self.c_ipdb_exit.call_count) + self.assertEqual(3, self.c_ipdb_exit.call_count) self.assertEqual(self.ifname, self.m_h_iface.ifname) self.assertEqual(1, self.m_h_iface.mtu)