Gather debug info when NetlinkError EEXIST happens
In nested setups on the pod interface binding we sometimes get: pyroute2.netlink.exceptions.NetlinkError: (17, 'File exists') This is most likely related to VLAN id conflict, which should get fixed by code deleting interfaces that were possible to get created by previous run of the binding, but for some reason that does not always help. This commit makes sure that when error occurs we gather all the information about interfaces in both host and pod namespaces. That should help to find the guilty interface and understand the reason behind the issue. Change-Id: Ia2d81c0e456a6e66ad0a95ae1f1a601236054e2f Related-Bug: 1854928
This commit is contained in:
parent
8f65c32a64
commit
14dbf5b9ce
|
@ -13,9 +13,11 @@
|
|||
# under the License.
|
||||
|
||||
import abc
|
||||
import errno
|
||||
import six
|
||||
|
||||
from oslo_log import log as logging
|
||||
import pyroute2
|
||||
|
||||
from kuryr_kubernetes.cni.binding import base as b_base
|
||||
from kuryr_kubernetes import config
|
||||
|
@ -54,20 +56,41 @@ class NestedDriver(health.HealthHandler, b_base.BaseBindingDriver):
|
|||
with b_base.get_ipdb(netns) as c_ipdb:
|
||||
self._remove_ifaces(c_ipdb, (temp_name, ifname), netns)
|
||||
|
||||
# We might also have leftover interface in the host netns, let's try to
|
||||
# remove it too. This is outside of the main host's IPDB context
|
||||
# manager to make sure removal is commited before starting next
|
||||
# transaction.
|
||||
with b_base.get_ipdb() as h_ipdb:
|
||||
# TODO(vikasc): evaluate whether we should have stevedore
|
||||
# driver for getting the link device.
|
||||
vm_iface_name = config.CONF.binding.link_iface
|
||||
|
||||
# We might also have leftover interface in the host netns, let's
|
||||
# try to remove it too.
|
||||
self._remove_ifaces(h_ipdb, (temp_name,))
|
||||
|
||||
args = self._get_iface_create_args(vif)
|
||||
with h_ipdb.create(ifname=temp_name,
|
||||
link=h_ipdb.interfaces[vm_iface_name],
|
||||
**args) as iface:
|
||||
iface.net_ns_fd = utils.convert_netns(netns)
|
||||
try:
|
||||
with b_base.get_ipdb() as h_ipdb:
|
||||
# TODO(vikasc): evaluate whether we should have stevedore
|
||||
# driver for getting the link device.
|
||||
vm_iface_name = config.CONF.binding.link_iface
|
||||
|
||||
args = self._get_iface_create_args(vif)
|
||||
with h_ipdb.create(ifname=temp_name,
|
||||
link=h_ipdb.interfaces[vm_iface_name],
|
||||
**args) as iface:
|
||||
iface.net_ns_fd = utils.convert_netns(netns)
|
||||
except pyroute2.NetlinkError as e:
|
||||
if e.code == errno.EEXIST:
|
||||
# NOTE(dulek): This is related to bug 1854928. It's super-rare,
|
||||
# so aim of this piece is to gater any info useful
|
||||
# for determining when it happens.
|
||||
LOG.exception('Creation of pod interface failed, most likely '
|
||||
'due to duplicated VLAN id. This will probably '
|
||||
'cause kuryr-daemon to crashloop. Trying to '
|
||||
'gather debugging information.')
|
||||
|
||||
with b_base.get_ipdb() as h_ipdb:
|
||||
LOG.error('List of host interfaces: %s', h_ipdb.interfaces)
|
||||
|
||||
with b_base.get_ipdb(netns) as c_ipdb:
|
||||
LOG.error('List of pod namespace interfaces: %s',
|
||||
c_ipdb.interfaces)
|
||||
raise
|
||||
|
||||
with b_base.get_ipdb(netns) as c_ipdb:
|
||||
with c_ipdb.interfaces[temp_name] as iface:
|
||||
|
|
|
@ -179,7 +179,7 @@ class TestNestedVlanDriver(TestDriverMixin, test_base.TestCase):
|
|||
def test_connect(self):
|
||||
self._test_connect()
|
||||
|
||||
self.assertEqual(1, self.h_ipdb_exit.call_count)
|
||||
self.assertEqual(2, self.h_ipdb_exit.call_count)
|
||||
self.assertEqual(3, self.c_ipdb_exit.call_count)
|
||||
|
||||
self.assertEqual(self.ifname, self.m_h_iface.ifname)
|
||||
|
@ -201,7 +201,7 @@ class TestNestedMacvlanDriver(TestDriverMixin, test_base.TestCase):
|
|||
def test_connect(self):
|
||||
self._test_connect()
|
||||
|
||||
self.assertEqual(1, self.h_ipdb_exit.call_count)
|
||||
self.assertEqual(2, self.h_ipdb_exit.call_count)
|
||||
self.assertEqual(3, self.c_ipdb_exit.call_count)
|
||||
|
||||
self.assertEqual(self.ifname, self.m_h_iface.ifname)
|
||||
|
|
Loading…
Reference in New Issue