From 8f61307fa684afe1991d3f03b5339481e074a9e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Dulko?= Date: Wed, 11 May 2022 17:25:26 +0200 Subject: [PATCH] Error handling improvements This combines a few error handling improvements fixing problems found by e2e K8s tests. 1. Logs on K8sNamespaceTerminating are no longer on WARNING but DEBUG level. This is because they're harmless, yet they can spam the logs when multiple namespaces are deleted, such as in e2e K8s tests. 2. 400 Bad Request is ignored on LB member creation. This happens when subnet got deleted in the meanwhile and the LB will be gone soon too. 3. 404 Not Found is ignored when subports are detached from a trunk. This can happen when some other thread already detached that port. If the request was just for a single port, then error can be safely ignored. In case of bulk request we don't really know which and how many subports are detached already, so on detach error we'll just proceed to delete ports and on errors attempt to detach them one-by-one. Change-Id: Ic11f15e44086f8b25380e20457f28c351403b4d9 --- kuryr_kubernetes/controller/drivers/lbaasv2.py | 13 ++++++++++++- .../controller/drivers/nested_vlan_vif.py | 6 ++++++ .../controller/drivers/network_policy.py | 8 ++++---- kuryr_kubernetes/controller/drivers/vif_pool.py | 13 +++++++------ kuryr_kubernetes/controller/handlers/lbaas.py | 14 +++++++------- kuryr_kubernetes/controller/handlers/vif.py | 7 +++---- .../tests/unit/controller/drivers/test_vif_pool.py | 6 ++---- 7 files changed, 41 insertions(+), 26 deletions(-) diff --git a/kuryr_kubernetes/controller/drivers/lbaasv2.py b/kuryr_kubernetes/controller/drivers/lbaasv2.py index 928894f78..521b7b2c6 100644 --- a/kuryr_kubernetes/controller/drivers/lbaasv2.py +++ b/kuryr_kubernetes/controller/drivers/lbaasv2.py @@ -700,7 +700,18 @@ class LBaaSv2Driver(base.LBaaSDriver): } self.add_tags('member', request) lbaas = clients.get_loadbalancer_client() - response = lbaas.create_member(member['pool_id'], **request) + try: + response = lbaas.create_member(member['pool_id'], **request) + except os_exc.BadRequestException as e: + details = e.response.json() + if (details['faultstring'] == f'Subnet {member["subnet_id"]} not ' + f'found.'): + # Most likely the subnet is deleted already as the namespace is + # being deleted. Ignore, we'll delete that LB soon anyway. + LOG.warning('Member %s not created as subnet %s is being ' + 'deleted.', member['name'], member['subnet_id']) + return None + raise member['id'] = response.id return member diff --git a/kuryr_kubernetes/controller/drivers/nested_vlan_vif.py b/kuryr_kubernetes/controller/drivers/nested_vlan_vif.py index aaa687076..737a10787 100644 --- a/kuryr_kubernetes/controller/drivers/nested_vlan_vif.py +++ b/kuryr_kubernetes/controller/drivers/nested_vlan_vif.py @@ -280,6 +280,12 @@ class NestedVlanPodVIFDriver(nested_vif.NestedPodVIFDriver): subports_body.append({'port_id': subport_id}) try: os_net.delete_trunk_subports(trunk_id, subports_body) + except os_exc.NotFoundException: + if len(subports_id) > 1: + LOG.debug('Not Found on subport deletion, most likely some ' + 'subports in the list got detached already.') + raise # We don't know if all ports are detached, so raise. + # If single requested port is detached already, we're cool, ignore. except os_exc.SDKException: LOG.exception("Error happened during subport removal from " "trunk %s", trunk_id) diff --git a/kuryr_kubernetes/controller/drivers/network_policy.py b/kuryr_kubernetes/controller/drivers/network_policy.py index 3195d65b9..89bb52dbb 100644 --- a/kuryr_kubernetes/controller/drivers/network_policy.py +++ b/kuryr_kubernetes/controller/drivers/network_policy.py @@ -118,10 +118,10 @@ class NetworkPolicyDriver(base.NetworkPolicyDriver): try: self._create_knp_crd(policy, i_rules, e_rules) except exceptions.K8sNamespaceTerminating: - LOG.warning('Namespace %s is being terminated, ignoring ' - 'NetworkPolicy %s in that namespace.', - policy['metadata']['namespace'], - policy['metadata']['name']) + LOG.debug('Namespace %s is being terminated, ignoring ' + 'NetworkPolicy %s in that namespace.', + policy['metadata']['namespace'], + policy['metadata']['name']) return else: self._patch_knp_crd(policy, i_rules, e_rules, knp) diff --git a/kuryr_kubernetes/controller/drivers/vif_pool.py b/kuryr_kubernetes/controller/drivers/vif_pool.py index ac407cd33..75371d9a1 100644 --- a/kuryr_kubernetes/controller/drivers/vif_pool.py +++ b/kuryr_kubernetes/controller/drivers/vif_pool.py @@ -1127,7 +1127,6 @@ class NestedVIFPool(BaseVIFPool): "pools.") raise exceptions.ResourceNotReady(net_id) - epool = eventlet.GreenPool(constants.LEFTOVER_RM_POOL_SIZE) ports_to_remove = [] # NOTE(ltomasbo): Note the pods should already be deleted, but their @@ -1143,6 +1142,10 @@ class NestedVIFPool(BaseVIFPool): for p_id in sg_ports] try: self._drv_vif._remove_subports(trunk_id, ports_id) + except os_exc.NotFoundException: + # We don't know which subport was already removed, but we'll + # attempt a manual detach on DELETE error, so just continue. + pass except (os_exc.SDKException, os_exc.HttpException): LOG.exception('Error removing subports from trunk: %s', trunk_id) @@ -1164,11 +1167,9 @@ class NestedVIFPool(BaseVIFPool): except KeyError: pass - for result in epool.imap(c_utils.delete_neutron_port, ports_to_remove): - if result: - LOG.error('During Neutron port deletion an error occured: %s', - result) - raise result + if not c_utils.delete_ports(ports_to_remove): + LOG.error('Some ports failed to be deleted.') + raise exceptions.ResourceNotReady(net_id) class MultiVIFPool(base.VIFPoolDriver): diff --git a/kuryr_kubernetes/controller/handlers/lbaas.py b/kuryr_kubernetes/controller/handlers/lbaas.py index ce379873c..71c7251ff 100644 --- a/kuryr_kubernetes/controller/handlers/lbaas.py +++ b/kuryr_kubernetes/controller/handlers/lbaas.py @@ -87,10 +87,10 @@ class ServiceHandler(k8s_base.ResourceEventHandler): self._bump_network_policies(service) self.create_crd_spec(service) except k_exc.K8sNamespaceTerminating: - LOG.warning('Namespace %s is being terminated, ignoring ' - 'Service %s in that namespace.', - service['metadata']['namespace'], - service['metadata']['name']) + LOG.debug('Namespace %s is being terminated, ignoring ' + 'Service %s in that namespace.', + service['metadata']['namespace'], + service['metadata']['name']) return elif self._has_lbaas_spec_changes(service, loadbalancer_crd): self._update_crd_spec(loadbalancer_crd, service) @@ -347,9 +347,9 @@ class EndpointsHandler(k8s_base.ResourceEventHandler): try: self._update_crd_spec(loadbalancer_crd, endpoints) except k_exc.K8sNamespaceTerminating: - LOG.warning('Namespace %s is being terminated, ignoring ' - 'Endpoints %s in that namespace.', - ep_namespace, ep_name) + LOG.debug('Namespace %s is being terminated, ignoring ' + 'Endpoints %s in that namespace.', + ep_namespace, ep_name) def on_deleted(self, endpoints, *args, **kwargs): self._remove_endpoints(endpoints) diff --git a/kuryr_kubernetes/controller/handlers/vif.py b/kuryr_kubernetes/controller/handlers/vif.py index f936ee357..3f5705cb4 100644 --- a/kuryr_kubernetes/controller/handlers/vif.py +++ b/kuryr_kubernetes/controller/handlers/vif.py @@ -108,10 +108,9 @@ class VIFHandler(k8s_base.ResourceEventHandler): except k_exc.K8sNamespaceTerminating: # The underlying namespace is being terminated, we can # ignore this and let `on_finalize` handle this now. - LOG.warning('Namespace %s is being terminated, ignoring Pod ' - '%s in that namespace.', - pod['metadata']['namespace'], - pod_name) + LOG.debug('Namespace %s is being terminated, ignoring Pod ' + '%s in that namespace.', + pod['metadata']['namespace'], pod_name) return except k_exc.K8sClientException as ex: self.k8s.add_event(pod, 'FailedToCreateKuryrPortCRD', diff --git a/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py b/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py index 9e862f29e..613b8d8ad 100644 --- a/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py +++ b/kuryr_kubernetes/tests/unit/controller/drivers/test_vif_pool.py @@ -1757,8 +1757,7 @@ class NestedVIFPool(test_base.TestCase): m_driver._drv_vif._remove_subports.assert_called_once_with(trunk_id, [port_id]) m_driver._drv_vif._release_vlan_id.assert_called_once_with(vlan_id) - m_pool.imap.assert_called_once_with(utils.delete_neutron_port, - [port_id]) + m_pool.imap.assert_called_once_with(utils.delete_port, [port_id]) def test_delete_network_pools_not_ready(self): cls = vif_pool.NestedVIFPool @@ -1851,5 +1850,4 @@ class NestedVIFPool(test_base.TestCase): m_driver._drv_vif._remove_subports.assert_called_once_with(trunk_id, [port_id]) m_driver._drv_vif._release_vlan_id.assert_not_called() - m_pool.imap.assert_called_once_with(utils.delete_neutron_port, - [port_id]) + m_pool.imap.assert_called_once_with(utils.delete_port, [port_id])