Merge "CNI: Lookup offending interface on NetlinkError"
This commit is contained in:
commit
9067b45a06
@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
import abc
|
import abc
|
||||||
import errno
|
import errno
|
||||||
|
import os
|
||||||
|
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
import psutil
|
import psutil
|
||||||
@ -103,7 +104,6 @@ class NestedDriver(health.HealthHandler, b_base.BaseBindingDriver,
|
|||||||
with b_base.get_ipdb() as h_ipdb:
|
with b_base.get_ipdb() as h_ipdb:
|
||||||
self._remove_ifaces(h_ipdb, (temp_name,))
|
self._remove_ifaces(h_ipdb, (temp_name,))
|
||||||
|
|
||||||
try:
|
|
||||||
with b_base.get_ipdb() as h_ipdb:
|
with b_base.get_ipdb() as h_ipdb:
|
||||||
# TODO(vikasc): evaluate whether we should have stevedore
|
# TODO(vikasc): evaluate whether we should have stevedore
|
||||||
# driver for getting the link device.
|
# driver for getting the link device.
|
||||||
@ -124,18 +124,6 @@ class NestedDriver(health.HealthHandler, b_base.BaseBindingDriver,
|
|||||||
link=h_ipdb.interfaces[vm_iface_name],
|
link=h_ipdb.interfaces[vm_iface_name],
|
||||||
**args) as iface:
|
**args) as iface:
|
||||||
iface.net_ns_fd = utils.convert_netns(netns)
|
iface.net_ns_fd = utils.convert_netns(netns)
|
||||||
except pyroute2.NetlinkError as e:
|
|
||||||
if e.code == errno.EEXIST:
|
|
||||||
# NOTE(dulek): This is related to bug 1854928. It's super-rare,
|
|
||||||
# so aim of this piece is to gater any info useful
|
|
||||||
# for determining when it happens.
|
|
||||||
LOG.exception(f'Creation of pod interface failed due to VLAN '
|
|
||||||
f'ID (vlan_info={args}) conflict. Probably the '
|
|
||||||
f'CRI had not cleaned up the network namespace '
|
|
||||||
f'of deleted pods. This should not be a '
|
|
||||||
f'permanent issue but may cause restart of '
|
|
||||||
f'kuryr-cni pod.')
|
|
||||||
raise
|
|
||||||
|
|
||||||
with b_base.get_ipdb(netns) as c_ipdb:
|
with b_base.get_ipdb(netns) as c_ipdb:
|
||||||
with c_ipdb.interfaces[temp_name] as iface:
|
with c_ipdb.interfaces[temp_name] as iface:
|
||||||
@ -159,9 +147,43 @@ class VlanDriver(NestedDriver):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(VlanDriver, self).__init__()
|
super(VlanDriver, self).__init__()
|
||||||
|
|
||||||
|
def connect(self, vif, ifname, netns, container_id):
|
||||||
|
try:
|
||||||
|
super().connect(vif, ifname, netns, container_id)
|
||||||
|
except pyroute2.NetlinkError as e:
|
||||||
|
if e.code == errno.EEXIST:
|
||||||
|
args = self._get_iface_create_args(vif)
|
||||||
|
LOG.warning(
|
||||||
|
f'Creation of pod interface failed due to VLAN ID '
|
||||||
|
f'(vlan_info={args}) conflict. Probably the CRI had not '
|
||||||
|
f'cleaned up the network namespace of deleted pods. '
|
||||||
|
f'Attempting to find and delete offending interface and '
|
||||||
|
f'retry.')
|
||||||
|
self._cleanup_conflicting_vlan(netns, args['vlan_id'])
|
||||||
|
super().connect(vif, ifname, netns, container_id)
|
||||||
|
raise
|
||||||
|
|
||||||
def _get_iface_create_args(self, vif):
|
def _get_iface_create_args(self, vif):
|
||||||
return {'kind': VLAN_KIND, 'vlan_id': vif.vlan_id}
|
return {'kind': VLAN_KIND, 'vlan_id': vif.vlan_id}
|
||||||
|
|
||||||
|
def _cleanup_conflicting_vlan(self, netns, vlan_id):
|
||||||
|
if vlan_id is None:
|
||||||
|
# Better to not attempt that, might remove way to much.
|
||||||
|
return
|
||||||
|
|
||||||
|
netns_dir = os.path.dirname(netns)
|
||||||
|
for ns in os.listdir(netns_dir):
|
||||||
|
ns = os.fsdecode(ns)
|
||||||
|
with b_base.get_ipdb(ns) as c_ipdb:
|
||||||
|
for ifname, iface in c_ipdb.interfaces.items():
|
||||||
|
if iface.vlan_id == vlan_id:
|
||||||
|
LOG.warning(
|
||||||
|
f'Found offending interface {ifname} with VLAN ID '
|
||||||
|
f'{vlan_id} in netns {ns}. Trying to remove it.')
|
||||||
|
with c_ipdb.interfaces[ifname] as iface:
|
||||||
|
iface.remove()
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
class MacvlanDriver(NestedDriver):
|
class MacvlanDriver(NestedDriver):
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user