Cleanup ports belonging to deleted nodes

Due to the use of pools, there may be ports that belongs to pools
associated to deleted nodes. Thus those ports are wasted as they
cannot be used.

This patch regularly checks for ports associated to deleted
nodes/trunks which would have lost their device_owner (for the
nested case), and for ports associated to deleted nodes for the
neutron vif case, where device owner is set but binding information
should not be there anymore.

Change-Id: I26be958aa3c0b51eb1a296eb2b4ac7996bc3263c
This commit is contained in:
Luis Tomas Bolivar 2020-09-22 10:55:43 +02:00
parent cb3720cad5
commit bee7718a48
1 changed files with 98 additions and 2 deletions

View File

@ -102,6 +102,8 @@ VIF_TYPE_TO_DRIVER_MAPPING = {
'VIFVHostUser': 'neutron-vif',
}
NODE_PORTS_CLEAN_FREQUENCY = 600 # seconds
class NoopVIFPool(base.VIFPoolDriver):
"""No pool VIFs for Kubernetes Pods"""
@ -161,6 +163,7 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta):
# background thread
self._recovered_pools = False
eventlet.spawn(self._return_ports_to_pool)
eventlet.spawn(self._cleanup_removed_nodes)
def set_vif_driver(self, driver):
self._drv_vif = driver
@ -460,6 +463,87 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta):
if not port.binding_host_id:
os_net.delete_port(port.id)
def _cleanup_removed_nodes(self):
"""Remove ports associated to removed nodes."""
previous_ports_to_remove = []
while True:
# NOTE(ltomasbo): Nodes are not expected to be removed
# frequently, so there is no need to execute this frequently
# either
eventlet.sleep(NODE_PORTS_CLEAN_FREQUENCY)
try:
self._trigger_removed_nodes_ports_cleanup(
previous_ports_to_remove)
except Exception:
LOG.exception('Error while removing the ports associated to '
'deleted nodes. It will be retried in %s '
'seconds', NODE_PORTS_CLEAN_FREQUENCY)
def _trigger_removed_nodes_ports_cleanup(self, previous_ports_to_remove):
"""Remove ports associated to removed nodes.
There are two types of ports pool, one for neutron and one for nested.
For the nested, the ports lost their device_owner after being detached,
i.e., after the node they belong to got removed. This means we cannot
find them unless they have been tagged.
For the neutron ones, we rely on them having the kuryr device owner
and not having binding information, thus ensuring they are not
attached to any node. However, to avoid the case where those ports
are being created at the same time of the cleanup process, we don't
delete them unless we have seen them for 2 iterations.
"""
if not self._recovered_pools:
LOG.debug("Kuryr-controller not yet ready to perform nodes"
" cleanup.")
return
os_net = clients.get_network_client()
tags = config.CONF.neutron_defaults.resource_tags
if tags:
# NOTE(ltomasbo): Detached subports gets their device_owner unset
detached_subports = os_net.ports(
device_owner='', status='DOWN', tags=tags)
for subport in detached_subports:
try:
del self._existing_vifs[subport.id]
except KeyError:
LOG.debug('Port %s is not in the ports list.', subport.id)
try:
os_net.delete_port(subport.id)
except os_exc.SDKException:
LOG.debug("Problem deleting leftover port %s. "
"Skipping.", subport.id)
# normal ports, or subports not yet attached
existing_ports = os_net.ports(
device_owner=kl_const.DEVICE_OWNER,
status='DOWN',
tags=tags)
else:
# normal ports, or subports not yet attached
existing_ports = os_net.ports(
device_owner=kl_const.DEVICE_OWNER,
status='DOWN')
for port in existing_ports:
# NOTE(ltomasbo): It may be that the port got just created and it
# is still being attached and/or being tagged.
if port.id not in previous_ports_to_remove:
previous_ports_to_remove.append(port.id)
continue
if not port.binding_host_id:
try:
del self._existing_vifs[port.id]
except KeyError:
LOG.debug('Port %s is not in the ports list.', port.id)
try:
os_net.delete_port(port.id)
except os_exc.SDKException:
LOG.debug("Problem deleting leftover port %s. "
"Skipping.", port.id)
previous_ports_to_remove.remove(port.id)
class NeutronVIFPool(BaseVIFPool):
"""Manages VIFs for Bare Metal Kubernetes Pods."""
@ -524,7 +608,13 @@ class NeutronVIFPool(BaseVIFPool):
"""
while True:
eventlet.sleep(oslo_cfg.CONF.vif_pool.ports_pool_update_frequency)
self._trigger_return_to_pool()
try:
self._trigger_return_to_pool()
except Exception:
LOG.exception(
'Error while returning ports to pool. '
'It will be retried in %s seconds',
oslo_cfg.CONF.vif_pool.ports_pool_update_frequency)
@lockutils.synchronized('return_to_pool_baremetal')
def _trigger_return_to_pool(self):
@ -777,7 +867,13 @@ class NestedVIFPool(BaseVIFPool):
"""
while True:
eventlet.sleep(oslo_cfg.CONF.vif_pool.ports_pool_update_frequency)
self._trigger_return_to_pool()
try:
self._trigger_return_to_pool()
except Exception:
LOG.exception(
'Error while returning ports to pool. '
'It will be retried in %s seconds',
oslo_cfg.CONF.vif_pool.ports_pool_update_frequency)
@lockutils.synchronized('return_to_pool_nested')
def _trigger_return_to_pool(self):