Error handling improvements

This combines a few error handling improvements fixing problems found by
e2e K8s tests.

1. Logs on K8sNamespaceTerminating are no longer on WARNING but DEBUG
   level. This is because they're harmless, yet they can spam the logs
   when multiple namespaces are deleted, such as in e2e K8s tests.
2. 400 Bad Request is ignored on LB member creation. This happens when
   subnet got deleted in the meanwhile and the LB will be gone soon too.
3. 404 Not Found is ignored when subports are detached from a trunk.
   This can happen when some other thread already detached that port. If
   the request was just for a single port, then error can be safely
   ignored. In case of bulk request we don't really know which and how
   many subports are detached already, so on detach error we'll just
   proceed to delete ports and on errors attempt to detach them
   one-by-one.

Change-Id: Ic11f15e44086f8b25380e20457f28c351403b4d9
This commit is contained in:
Michał Dulko 2022-05-11 17:25:26 +02:00
parent 38c0ac62ba
commit 8f61307fa6
7 changed files with 41 additions and 26 deletions

View File

@ -700,7 +700,18 @@ class LBaaSv2Driver(base.LBaaSDriver):
} }
self.add_tags('member', request) self.add_tags('member', request)
lbaas = clients.get_loadbalancer_client() lbaas = clients.get_loadbalancer_client()
response = lbaas.create_member(member['pool_id'], **request) try:
response = lbaas.create_member(member['pool_id'], **request)
except os_exc.BadRequestException as e:
details = e.response.json()
if (details['faultstring'] == f'Subnet {member["subnet_id"]} not '
f'found.'):
# Most likely the subnet is deleted already as the namespace is
# being deleted. Ignore, we'll delete that LB soon anyway.
LOG.warning('Member %s not created as subnet %s is being '
'deleted.', member['name'], member['subnet_id'])
return None
raise
member['id'] = response.id member['id'] = response.id
return member return member

View File

@ -280,6 +280,12 @@ class NestedVlanPodVIFDriver(nested_vif.NestedPodVIFDriver):
subports_body.append({'port_id': subport_id}) subports_body.append({'port_id': subport_id})
try: try:
os_net.delete_trunk_subports(trunk_id, subports_body) os_net.delete_trunk_subports(trunk_id, subports_body)
except os_exc.NotFoundException:
if len(subports_id) > 1:
LOG.debug('Not Found on subport deletion, most likely some '
'subports in the list got detached already.')
raise # We don't know if all ports are detached, so raise.
# If single requested port is detached already, we're cool, ignore.
except os_exc.SDKException: except os_exc.SDKException:
LOG.exception("Error happened during subport removal from " LOG.exception("Error happened during subport removal from "
"trunk %s", trunk_id) "trunk %s", trunk_id)

View File

@ -118,10 +118,10 @@ class NetworkPolicyDriver(base.NetworkPolicyDriver):
try: try:
self._create_knp_crd(policy, i_rules, e_rules) self._create_knp_crd(policy, i_rules, e_rules)
except exceptions.K8sNamespaceTerminating: except exceptions.K8sNamespaceTerminating:
LOG.warning('Namespace %s is being terminated, ignoring ' LOG.debug('Namespace %s is being terminated, ignoring '
'NetworkPolicy %s in that namespace.', 'NetworkPolicy %s in that namespace.',
policy['metadata']['namespace'], policy['metadata']['namespace'],
policy['metadata']['name']) policy['metadata']['name'])
return return
else: else:
self._patch_knp_crd(policy, i_rules, e_rules, knp) self._patch_knp_crd(policy, i_rules, e_rules, knp)

View File

@ -1127,7 +1127,6 @@ class NestedVIFPool(BaseVIFPool):
"pools.") "pools.")
raise exceptions.ResourceNotReady(net_id) raise exceptions.ResourceNotReady(net_id)
epool = eventlet.GreenPool(constants.LEFTOVER_RM_POOL_SIZE)
ports_to_remove = [] ports_to_remove = []
# NOTE(ltomasbo): Note the pods should already be deleted, but their # NOTE(ltomasbo): Note the pods should already be deleted, but their
@ -1143,6 +1142,10 @@ class NestedVIFPool(BaseVIFPool):
for p_id in sg_ports] for p_id in sg_ports]
try: try:
self._drv_vif._remove_subports(trunk_id, ports_id) self._drv_vif._remove_subports(trunk_id, ports_id)
except os_exc.NotFoundException:
# We don't know which subport was already removed, but we'll
# attempt a manual detach on DELETE error, so just continue.
pass
except (os_exc.SDKException, os_exc.HttpException): except (os_exc.SDKException, os_exc.HttpException):
LOG.exception('Error removing subports from trunk: %s', LOG.exception('Error removing subports from trunk: %s',
trunk_id) trunk_id)
@ -1164,11 +1167,9 @@ class NestedVIFPool(BaseVIFPool):
except KeyError: except KeyError:
pass pass
for result in epool.imap(c_utils.delete_neutron_port, ports_to_remove): if not c_utils.delete_ports(ports_to_remove):
if result: LOG.error('Some ports failed to be deleted.')
LOG.error('During Neutron port deletion an error occured: %s', raise exceptions.ResourceNotReady(net_id)
result)
raise result
class MultiVIFPool(base.VIFPoolDriver): class MultiVIFPool(base.VIFPoolDriver):

View File

@ -87,10 +87,10 @@ class ServiceHandler(k8s_base.ResourceEventHandler):
self._bump_network_policies(service) self._bump_network_policies(service)
self.create_crd_spec(service) self.create_crd_spec(service)
except k_exc.K8sNamespaceTerminating: except k_exc.K8sNamespaceTerminating:
LOG.warning('Namespace %s is being terminated, ignoring ' LOG.debug('Namespace %s is being terminated, ignoring '
'Service %s in that namespace.', 'Service %s in that namespace.',
service['metadata']['namespace'], service['metadata']['namespace'],
service['metadata']['name']) service['metadata']['name'])
return return
elif self._has_lbaas_spec_changes(service, loadbalancer_crd): elif self._has_lbaas_spec_changes(service, loadbalancer_crd):
self._update_crd_spec(loadbalancer_crd, service) self._update_crd_spec(loadbalancer_crd, service)
@ -347,9 +347,9 @@ class EndpointsHandler(k8s_base.ResourceEventHandler):
try: try:
self._update_crd_spec(loadbalancer_crd, endpoints) self._update_crd_spec(loadbalancer_crd, endpoints)
except k_exc.K8sNamespaceTerminating: except k_exc.K8sNamespaceTerminating:
LOG.warning('Namespace %s is being terminated, ignoring ' LOG.debug('Namespace %s is being terminated, ignoring '
'Endpoints %s in that namespace.', 'Endpoints %s in that namespace.',
ep_namespace, ep_name) ep_namespace, ep_name)
def on_deleted(self, endpoints, *args, **kwargs): def on_deleted(self, endpoints, *args, **kwargs):
self._remove_endpoints(endpoints) self._remove_endpoints(endpoints)

View File

@ -108,10 +108,9 @@ class VIFHandler(k8s_base.ResourceEventHandler):
except k_exc.K8sNamespaceTerminating: except k_exc.K8sNamespaceTerminating:
# The underlying namespace is being terminated, we can # The underlying namespace is being terminated, we can
# ignore this and let `on_finalize` handle this now. # ignore this and let `on_finalize` handle this now.
LOG.warning('Namespace %s is being terminated, ignoring Pod ' LOG.debug('Namespace %s is being terminated, ignoring Pod '
'%s in that namespace.', '%s in that namespace.',
pod['metadata']['namespace'], pod['metadata']['namespace'], pod_name)
pod_name)
return return
except k_exc.K8sClientException as ex: except k_exc.K8sClientException as ex:
self.k8s.add_event(pod, 'FailedToCreateKuryrPortCRD', self.k8s.add_event(pod, 'FailedToCreateKuryrPortCRD',

View File

@ -1757,8 +1757,7 @@ class NestedVIFPool(test_base.TestCase):
m_driver._drv_vif._remove_subports.assert_called_once_with(trunk_id, m_driver._drv_vif._remove_subports.assert_called_once_with(trunk_id,
[port_id]) [port_id])
m_driver._drv_vif._release_vlan_id.assert_called_once_with(vlan_id) m_driver._drv_vif._release_vlan_id.assert_called_once_with(vlan_id)
m_pool.imap.assert_called_once_with(utils.delete_neutron_port, m_pool.imap.assert_called_once_with(utils.delete_port, [port_id])
[port_id])
def test_delete_network_pools_not_ready(self): def test_delete_network_pools_not_ready(self):
cls = vif_pool.NestedVIFPool cls = vif_pool.NestedVIFPool
@ -1851,5 +1850,4 @@ class NestedVIFPool(test_base.TestCase):
m_driver._drv_vif._remove_subports.assert_called_once_with(trunk_id, m_driver._drv_vif._remove_subports.assert_called_once_with(trunk_id,
[port_id]) [port_id])
m_driver._drv_vif._release_vlan_id.assert_not_called() m_driver._drv_vif._release_vlan_id.assert_not_called()
m_pool.imap.assert_called_once_with(utils.delete_neutron_port, m_pool.imap.assert_called_once_with(utils.delete_port, [port_id])
[port_id])