Error handling improvements

This combines a few error handling improvements fixing problems found by
e2e K8s tests.

1. Logs on K8sNamespaceTerminating are no longer on WARNING but DEBUG
   level. This is because they're harmless, yet they can spam the logs
   when multiple namespaces are deleted, such as in e2e K8s tests.
2. 400 Bad Request is ignored on LB member creation. This happens when
   subnet got deleted in the meanwhile and the LB will be gone soon too.
3. 404 Not Found is ignored when subports are detached from a trunk.
   This can happen when some other thread already detached that port. If
   the request was just for a single port, then error can be safely
   ignored. In case of bulk request we don't really know which and how
   many subports are detached already, so on detach error we'll just
   proceed to delete ports and on errors attempt to detach them
   one-by-one.

Change-Id: Ic11f15e44086f8b25380e20457f28c351403b4d9
This commit is contained in:
Michał Dulko 2022-05-11 17:25:26 +02:00
parent 38c0ac62ba
commit 8f61307fa6
7 changed files with 41 additions and 26 deletions

View File

@ -700,7 +700,18 @@ class LBaaSv2Driver(base.LBaaSDriver):
}
self.add_tags('member', request)
lbaas = clients.get_loadbalancer_client()
response = lbaas.create_member(member['pool_id'], **request)
try:
response = lbaas.create_member(member['pool_id'], **request)
except os_exc.BadRequestException as e:
details = e.response.json()
if (details['faultstring'] == f'Subnet {member["subnet_id"]} not '
f'found.'):
# Most likely the subnet is deleted already as the namespace is
# being deleted. Ignore, we'll delete that LB soon anyway.
LOG.warning('Member %s not created as subnet %s is being '
'deleted.', member['name'], member['subnet_id'])
return None
raise
member['id'] = response.id
return member

View File

@ -280,6 +280,12 @@ class NestedVlanPodVIFDriver(nested_vif.NestedPodVIFDriver):
subports_body.append({'port_id': subport_id})
try:
os_net.delete_trunk_subports(trunk_id, subports_body)
except os_exc.NotFoundException:
if len(subports_id) > 1:
LOG.debug('Not Found on subport deletion, most likely some '
'subports in the list got detached already.')
raise # We don't know if all ports are detached, so raise.
# If single requested port is detached already, we're cool, ignore.
except os_exc.SDKException:
LOG.exception("Error happened during subport removal from "
"trunk %s", trunk_id)

View File

@ -118,10 +118,10 @@ class NetworkPolicyDriver(base.NetworkPolicyDriver):
try:
self._create_knp_crd(policy, i_rules, e_rules)
except exceptions.K8sNamespaceTerminating:
LOG.warning('Namespace %s is being terminated, ignoring '
'NetworkPolicy %s in that namespace.',
policy['metadata']['namespace'],
policy['metadata']['name'])
LOG.debug('Namespace %s is being terminated, ignoring '
'NetworkPolicy %s in that namespace.',
policy['metadata']['namespace'],
policy['metadata']['name'])
return
else:
self._patch_knp_crd(policy, i_rules, e_rules, knp)

View File

@ -1127,7 +1127,6 @@ class NestedVIFPool(BaseVIFPool):
"pools.")
raise exceptions.ResourceNotReady(net_id)
epool = eventlet.GreenPool(constants.LEFTOVER_RM_POOL_SIZE)
ports_to_remove = []
# NOTE(ltomasbo): Note the pods should already be deleted, but their
@ -1143,6 +1142,10 @@ class NestedVIFPool(BaseVIFPool):
for p_id in sg_ports]
try:
self._drv_vif._remove_subports(trunk_id, ports_id)
except os_exc.NotFoundException:
# We don't know which subport was already removed, but we'll
# attempt a manual detach on DELETE error, so just continue.
pass
except (os_exc.SDKException, os_exc.HttpException):
LOG.exception('Error removing subports from trunk: %s',
trunk_id)
@ -1164,11 +1167,9 @@ class NestedVIFPool(BaseVIFPool):
except KeyError:
pass
for result in epool.imap(c_utils.delete_neutron_port, ports_to_remove):
if result:
LOG.error('During Neutron port deletion an error occured: %s',
result)
raise result
if not c_utils.delete_ports(ports_to_remove):
LOG.error('Some ports failed to be deleted.')
raise exceptions.ResourceNotReady(net_id)
class MultiVIFPool(base.VIFPoolDriver):

View File

@ -87,10 +87,10 @@ class ServiceHandler(k8s_base.ResourceEventHandler):
self._bump_network_policies(service)
self.create_crd_spec(service)
except k_exc.K8sNamespaceTerminating:
LOG.warning('Namespace %s is being terminated, ignoring '
'Service %s in that namespace.',
service['metadata']['namespace'],
service['metadata']['name'])
LOG.debug('Namespace %s is being terminated, ignoring '
'Service %s in that namespace.',
service['metadata']['namespace'],
service['metadata']['name'])
return
elif self._has_lbaas_spec_changes(service, loadbalancer_crd):
self._update_crd_spec(loadbalancer_crd, service)
@ -347,9 +347,9 @@ class EndpointsHandler(k8s_base.ResourceEventHandler):
try:
self._update_crd_spec(loadbalancer_crd, endpoints)
except k_exc.K8sNamespaceTerminating:
LOG.warning('Namespace %s is being terminated, ignoring '
'Endpoints %s in that namespace.',
ep_namespace, ep_name)
LOG.debug('Namespace %s is being terminated, ignoring '
'Endpoints %s in that namespace.',
ep_namespace, ep_name)
def on_deleted(self, endpoints, *args, **kwargs):
self._remove_endpoints(endpoints)

View File

@ -108,10 +108,9 @@ class VIFHandler(k8s_base.ResourceEventHandler):
except k_exc.K8sNamespaceTerminating:
# The underlying namespace is being terminated, we can
# ignore this and let `on_finalize` handle this now.
LOG.warning('Namespace %s is being terminated, ignoring Pod '
'%s in that namespace.',
pod['metadata']['namespace'],
pod_name)
LOG.debug('Namespace %s is being terminated, ignoring Pod '
'%s in that namespace.',
pod['metadata']['namespace'], pod_name)
return
except k_exc.K8sClientException as ex:
self.k8s.add_event(pod, 'FailedToCreateKuryrPortCRD',

View File

@ -1757,8 +1757,7 @@ class NestedVIFPool(test_base.TestCase):
m_driver._drv_vif._remove_subports.assert_called_once_with(trunk_id,
[port_id])
m_driver._drv_vif._release_vlan_id.assert_called_once_with(vlan_id)
m_pool.imap.assert_called_once_with(utils.delete_neutron_port,
[port_id])
m_pool.imap.assert_called_once_with(utils.delete_port, [port_id])
def test_delete_network_pools_not_ready(self):
cls = vif_pool.NestedVIFPool
@ -1851,5 +1850,4 @@ class NestedVIFPool(test_base.TestCase):
m_driver._drv_vif._remove_subports.assert_called_once_with(trunk_id,
[port_id])
m_driver._drv_vif._release_vlan_id.assert_not_called()
m_pool.imap.assert_called_once_with(utils.delete_neutron_port,
[port_id])
m_pool.imap.assert_called_once_with(utils.delete_port, [port_id])