From a660eae054f731f9b4016a85f92a3c840dc16758 Mon Sep 17 00:00:00 2001 From: Luis Tomas Bolivar Date: Fri, 28 Aug 2020 15:50:06 +0200 Subject: [PATCH] Ensure proper cleanup of subports Namespace deletion can be stuck if not all the ports belonging to the associated namespace networks are deleted. This patch enforce proper clean up of subports by: - Ensuring no request/release VIF actions are processed until the pools are recovered by the controller - Ensure if there are subports leftover when removing a namespace those are detached from the pool and deleted regardless of their status Change-Id: I2cb1586fa1f88bab191af0ead22a2b8afca91a3b --- .../controller/drivers/namespace_subnet.py | 16 ++++++++++------ kuryr_kubernetes/controller/drivers/vif_pool.py | 13 +++++++++++-- .../controller/drivers/test_namespace_subnet.py | 2 +- .../unit/controller/drivers/test_vif_pool.py | 16 ++++++++++++++++ 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/kuryr_kubernetes/controller/drivers/namespace_subnet.py b/kuryr_kubernetes/controller/drivers/namespace_subnet.py index e97408eb8..075e0f8f7 100644 --- a/kuryr_kubernetes/controller/drivers/namespace_subnet.py +++ b/kuryr_kubernetes/controller/drivers/namespace_subnet.py @@ -13,6 +13,7 @@ # limitations under the License. from kuryr.lib._i18n import _ +from kuryr.lib import constants as kl_const from oslo_config import cfg as oslo_cfg from oslo_log import log as logging @@ -101,11 +102,13 @@ class NamespacePodSubnetDriver(default_subnet.DefaultPodSubnetDriver): try: os_net.delete_network(net_id) except os_exc.ConflictException: - LOG.exception("One or more ports in use on the network %s. " "Deleting leftovers ports before retrying", net_id) - leftover_ports = os_net.ports(status='DOWN', network_id=net_id) + leftover_ports = os_net.ports(network_id=net_id) for leftover_port in leftover_ports: + if leftover_port.device_owner not in ['trunk:subport', + kl_const.DEVICE_OWNER]: + continue try: # NOTE(gryf): there is unlikely, that we get an exception # like PortNotFound or something, since openstacksdk @@ -114,10 +117,11 @@ class NamespacePodSubnetDriver(default_subnet.DefaultPodSubnetDriver): os_net.delete_port(leftover_port.id) except os_exc.SDKException as e: if "currently a subport for trunk" in str(e): - LOG.warning("Port %s is in DOWN status but still " - "associated to a trunk. This should not " - "happen. Trying to delete it from the " - "trunk.", leftover_port.id) + if leftover_port.status == "DOWN": + LOG.warning("Port %s is in DOWN status but still " + "associated to a trunk. This should " + "not happen. Trying to delete it from " + "the trunk.", leftover_port.id) # Get the trunk_id from the error message trunk_id = ( str(e).split('trunk')[1].split('.')[0].strip()) diff --git a/kuryr_kubernetes/controller/drivers/vif_pool.py b/kuryr_kubernetes/controller/drivers/vif_pool.py index 8367e1891..249d4d3d3 100644 --- a/kuryr_kubernetes/controller/drivers/vif_pool.py +++ b/kuryr_kubernetes/controller/drivers/vif_pool.py @@ -192,6 +192,9 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta): return pool_key[2] def request_vif(self, pod, project_id, subnets, security_groups): + if not self._recovered_pools: + LOG.info("Kuryr-controller not yet ready to handle new pods.") + raise exceptions.ResourceNotReady(pod) try: host_addr = self._get_host_addr(pod) except KeyError: @@ -254,6 +257,9 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta): def release_vif(self, pod, vif, project_id, security_groups, host_addr=None): + if not self._recovered_pools: + LOG.info("Kuryr-controller not yet ready to remove pods.") + raise exceptions.ResourceNotReady(pod) if not host_addr: host_addr = self._get_host_addr(pod) @@ -515,7 +521,7 @@ class NeutronVIFPool(BaseVIFPool): @lockutils.synchronized('return_to_pool_baremetal') def _trigger_return_to_pool(self): - if not hasattr(self, '_recyclable_ports'): + if not self._recovered_pools: LOG.info("Kuryr-controller not yet ready to return ports to " "pools.") return @@ -683,6 +689,9 @@ class NestedVIFPool(BaseVIFPool): return None def release_vif(self, pod, vif, project_id, security_groups): + if not self._recovered_pools: + LOG.info("Kuryr-controller not yet ready to remove pods.") + raise exceptions.ResourceNotReady(pod) try: host_addr = self._get_host_addr(pod) except KeyError: @@ -765,7 +774,7 @@ class NestedVIFPool(BaseVIFPool): @lockutils.synchronized('return_to_pool_nested') def _trigger_return_to_pool(self): - if not hasattr(self, '_recyclable_ports'): + if not self._recovered_pools: LOG.info("Kuryr-controller not yet ready to return ports to " "pools.") return diff --git a/kuryr_kubernetes/tests/unit/controller/drivers/test_namespace_subnet.py b/kuryr_kubernetes/tests/unit/controller/drivers/test_namespace_subnet.py index 1a497af7d..fc6eddae7 100644 --- a/kuryr_kubernetes/tests/unit/controller/drivers/test_namespace_subnet.py +++ b/kuryr_kubernetes/tests/unit/controller/drivers/test_namespace_subnet.py @@ -157,7 +157,7 @@ class TestNamespacePodSubnetDriver(test_base.TestCase): os_net.remove_interface_from_router.assert_called_once() os_net.delete_network.assert_called_once_with(net_id) - os_net.ports.assert_called_with(status='DOWN', network_id=net_id) + os_net.ports.assert_called_with(network_id=net_id) def test_create_network(self): cls = subnet_drv.NamespacePodSubnetDriver diff --git a/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py b/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py index f51f01558..52809852f 100644 --- a/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py +++ b/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py @@ -87,6 +87,7 @@ class BaseVIFPool(test_base.TestCase): vif = mock.sentinel.vif m_driver._get_port_from_pool.return_value = vif + m_driver._recovered_pools = True oslo_cfg.CONF.set_override('ports_pool_min', 5, group='vif_pool') @@ -116,6 +117,7 @@ class BaseVIFPool(test_base.TestCase): network = ovu.neutron_to_osvif_network(_net) subnets = {subnet_id: network} security_groups = [mock.sentinel.security_groups] + m_driver._recovered_pools = True m_driver._get_port_from_pool.side_effect = ( exceptions.ResourceNotReady(pod)) @@ -132,6 +134,7 @@ class BaseVIFPool(test_base.TestCase): subnets = mock.sentinel.subnets security_groups = [mock.sentinel.security_groups] m_driver._get_host_addr.side_effect = KeyError + m_driver._recovered_pools = True resp = cls.request_vif(m_driver, pod, project_id, subnets, security_groups) @@ -269,6 +272,7 @@ class BaseVIFPool(test_base.TestCase): network=network) m_driver._return_ports_to_pool.return_value = None + m_driver._recovered_pools = True cls.release_vif(m_driver, pod, vif, project_id, security_groups) @@ -644,6 +648,7 @@ class NeutronVIFPool(test_base.TestCase): m_driver._recyclable_ports = {port_id: pool_key} m_driver._available_ports_pools = {} + m_driver._recovered_pools = True oslo_cfg.CONF.set_override('ports_pool_max', max_pool, group='vif_pool') @@ -685,6 +690,7 @@ class NeutronVIFPool(test_base.TestCase): port.security_group_ids = ['security_group'] os_net.ports.return_value = (p for p in [port]) m_driver._get_pool_size.return_value = pool_length + m_driver._recovered_pools = True cls._trigger_return_to_pool(m_driver) @@ -705,6 +711,7 @@ class NeutronVIFPool(test_base.TestCase): m_driver._recyclable_ports = {port_id: pool_key} m_driver._available_ports_pools = {} m_driver._existing_vifs = {port_id: vif} + m_driver._recovered_pools = True oslo_cfg.CONF.set_override('ports_pool_max', 10, group='vif_pool') @@ -730,6 +737,7 @@ class NeutronVIFPool(test_base.TestCase): m_driver._recyclable_ports = {port_id: pool_key} m_driver._available_ports_pools = {} + m_driver._recovered_pools = True oslo_cfg.CONF.set_override('ports_pool_max', 0, group='vif_pool') @@ -765,6 +773,7 @@ class NeutronVIFPool(test_base.TestCase): m_driver._recyclable_ports = {port_id: pool_key} m_driver._available_ports_pools = {} m_driver._existing_vifs = {port_id: vif} + m_driver._recovered_pools = True oslo_cfg.CONF.set_override('ports_pool_max', 5, group='vif_pool') @@ -791,6 +800,7 @@ class NeutronVIFPool(test_base.TestCase): m_driver._recyclable_ports = {port_id: pool_key} m_driver._available_ports_pools = {} m_driver._existing_vifs = {} + m_driver._recovered_pools = True oslo_cfg.CONF.set_override('ports_pool_max', 5, group='vif_pool') @@ -1195,6 +1205,7 @@ class NestedVIFPool(test_base.TestCase): munch.Munch({'id': port_id, 'security_group_ids': ['security_group_modified']})] m_driver._get_pool_size.return_value = pool_length + m_driver._recovered_pools = True cls._trigger_return_to_pool(m_driver) @@ -1226,6 +1237,7 @@ class NestedVIFPool(test_base.TestCase): port.security_group_ids = ['security_group'] os_net.ports.return_value = [port] m_driver._get_pool_size.return_value = pool_length + m_driver._recovered_pools = True cls._trigger_return_to_pool(m_driver) @@ -1261,6 +1273,7 @@ class NestedVIFPool(test_base.TestCase): m_driver._get_pool_size.return_value = pool_length m_driver._get_trunk_id.return_value = trunk_id m_driver._known_trunk_ids = {} + m_driver._recovered_pools = True cls._trigger_return_to_pool(m_driver) @@ -1293,6 +1306,7 @@ class NestedVIFPool(test_base.TestCase): os_net.ports.return_value = [port] m_driver._get_pool_size.return_value = pool_length os_net.update_port.side_effect = os_exc.SDKException + m_driver._recovered_pools = True cls._trigger_return_to_pool(m_driver) @@ -1327,6 +1341,7 @@ class NestedVIFPool(test_base.TestCase): m_driver._get_pool_size.return_value = pool_length m_driver._get_trunk_id.return_value = trunk_id m_driver._known_trunk_ids = {} + m_driver._recovered_pools = True cls._trigger_return_to_pool(m_driver) @@ -1361,6 +1376,7 @@ class NestedVIFPool(test_base.TestCase): m_driver._get_pool_size.return_value = pool_length m_driver._known_trunk_ids = {} m_driver._get_trunk_id.return_value = trunk_id + m_driver._recovered_pools = True cls._trigger_return_to_pool(m_driver)