Merge "Cleanup ports belonging to deleted nodes"

This commit is contained in:
Zuul 2020-09-28 22:36:27 +00:00 committed by Gerrit Code Review
commit 5062d398a2
1 changed files with 98 additions and 2 deletions

View File

@ -102,6 +102,8 @@ VIF_TYPE_TO_DRIVER_MAPPING = {
'VIFVHostUser': 'neutron-vif', 'VIFVHostUser': 'neutron-vif',
} }
NODE_PORTS_CLEAN_FREQUENCY = 600 # seconds
class NoopVIFPool(base.VIFPoolDriver): class NoopVIFPool(base.VIFPoolDriver):
"""No pool VIFs for Kubernetes Pods""" """No pool VIFs for Kubernetes Pods"""
@ -161,6 +163,7 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta):
# background thread # background thread
self._recovered_pools = False self._recovered_pools = False
eventlet.spawn(self._return_ports_to_pool) eventlet.spawn(self._return_ports_to_pool)
eventlet.spawn(self._cleanup_removed_nodes)
def set_vif_driver(self, driver): def set_vif_driver(self, driver):
self._drv_vif = driver self._drv_vif = driver
@ -460,6 +463,87 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta):
if not port.binding_host_id: if not port.binding_host_id:
os_net.delete_port(port.id) os_net.delete_port(port.id)
def _cleanup_removed_nodes(self):
"""Remove ports associated to removed nodes."""
previous_ports_to_remove = []
while True:
# NOTE(ltomasbo): Nodes are not expected to be removed
# frequently, so there is no need to execute this frequently
# either
eventlet.sleep(NODE_PORTS_CLEAN_FREQUENCY)
try:
self._trigger_removed_nodes_ports_cleanup(
previous_ports_to_remove)
except Exception:
LOG.exception('Error while removing the ports associated to '
'deleted nodes. It will be retried in %s '
'seconds', NODE_PORTS_CLEAN_FREQUENCY)
def _trigger_removed_nodes_ports_cleanup(self, previous_ports_to_remove):
"""Remove ports associated to removed nodes.
There are two types of ports pool, one for neutron and one for nested.
For the nested, the ports lost their device_owner after being detached,
i.e., after the node they belong to got removed. This means we cannot
find them unless they have been tagged.
For the neutron ones, we rely on them having the kuryr device owner
and not having binding information, thus ensuring they are not
attached to any node. However, to avoid the case where those ports
are being created at the same time of the cleanup process, we don't
delete them unless we have seen them for 2 iterations.
"""
if not self._recovered_pools:
LOG.debug("Kuryr-controller not yet ready to perform nodes"
" cleanup.")
return
os_net = clients.get_network_client()
tags = config.CONF.neutron_defaults.resource_tags
if tags:
# NOTE(ltomasbo): Detached subports gets their device_owner unset
detached_subports = os_net.ports(
device_owner='', status='DOWN', tags=tags)
for subport in detached_subports:
try:
del self._existing_vifs[subport.id]
except KeyError:
LOG.debug('Port %s is not in the ports list.', subport.id)
try:
os_net.delete_port(subport.id)
except os_exc.SDKException:
LOG.debug("Problem deleting leftover port %s. "
"Skipping.", subport.id)
# normal ports, or subports not yet attached
existing_ports = os_net.ports(
device_owner=kl_const.DEVICE_OWNER,
status='DOWN',
tags=tags)
else:
# normal ports, or subports not yet attached
existing_ports = os_net.ports(
device_owner=kl_const.DEVICE_OWNER,
status='DOWN')
for port in existing_ports:
# NOTE(ltomasbo): It may be that the port got just created and it
# is still being attached and/or being tagged.
if port.id not in previous_ports_to_remove:
previous_ports_to_remove.append(port.id)
continue
if not port.binding_host_id:
try:
del self._existing_vifs[port.id]
except KeyError:
LOG.debug('Port %s is not in the ports list.', port.id)
try:
os_net.delete_port(port.id)
except os_exc.SDKException:
LOG.debug("Problem deleting leftover port %s. "
"Skipping.", port.id)
previous_ports_to_remove.remove(port.id)
class NeutronVIFPool(BaseVIFPool): class NeutronVIFPool(BaseVIFPool):
"""Manages VIFs for Bare Metal Kubernetes Pods.""" """Manages VIFs for Bare Metal Kubernetes Pods."""
@ -524,7 +608,13 @@ class NeutronVIFPool(BaseVIFPool):
""" """
while True: while True:
eventlet.sleep(oslo_cfg.CONF.vif_pool.ports_pool_update_frequency) eventlet.sleep(oslo_cfg.CONF.vif_pool.ports_pool_update_frequency)
self._trigger_return_to_pool() try:
self._trigger_return_to_pool()
except Exception:
LOG.exception(
'Error while returning ports to pool. '
'It will be retried in %s seconds',
oslo_cfg.CONF.vif_pool.ports_pool_update_frequency)
@lockutils.synchronized('return_to_pool_baremetal') @lockutils.synchronized('return_to_pool_baremetal')
def _trigger_return_to_pool(self): def _trigger_return_to_pool(self):
@ -777,7 +867,13 @@ class NestedVIFPool(BaseVIFPool):
""" """
while True: while True:
eventlet.sleep(oslo_cfg.CONF.vif_pool.ports_pool_update_frequency) eventlet.sleep(oslo_cfg.CONF.vif_pool.ports_pool_update_frequency)
self._trigger_return_to_pool() try:
self._trigger_return_to_pool()
except Exception:
LOG.exception(
'Error while returning ports to pool. '
'It will be retried in %s seconds',
oslo_cfg.CONF.vif_pool.ports_pool_update_frequency)
@lockutils.synchronized('return_to_pool_nested') @lockutils.synchronized('return_to_pool_nested')
def _trigger_return_to_pool(self): def _trigger_return_to_pool(self):