Avoid loadbalancer stuck in PENDING_X if delete_vip_port fails

If either on a failed loadbalancer creation action, or in a loadbalancer
deletion, the delete_vip_port fails with a different exception than
PortNotFound (for instance, if neutron API is unreachable at that time),
the loadbalancer will remain stuck in PENDING_X, as the ERROR status
is not returned

Closes-Bug: #1965732
(manually cherry picked from commit 28c50d9147)

Change-Id: I7701ac3ab7358d4ab061645a7221a541cd183aa7
This commit is contained in:
Fernando Royo 2022-03-29 18:04:02 +02:00
parent f208e7e9de
commit 43423d6daf
2 changed files with 76 additions and 9 deletions

View File

@ -497,8 +497,8 @@ class OvnProviderHelper(object):
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
driver_exceptions.UpdateStatusError),
wait=tenacity.wait_exponential(),
stop=tenacity.stop_after_delay(10),
wait=tenacity.wait_exponential(max=75),
stop=tenacity.stop_after_attempt(15),
reraise=True)
def _update_status_to_octavia(self, status):
status = OvnProviderHelper._delete_disabled_from_status(status)
@ -1134,9 +1134,15 @@ class OvnProviderHelper(object):
LOG.exception(EXCEPTION_MSG, "creation of loadbalancer")
# Any Exception set the status to ERROR
if isinstance(port, dict):
self.delete_vip_port(port.get('id'))
LOG.warning("Deleting the VIP port %s since LB went into "
"ERROR state", str(port.get('id')))
try:
self.delete_vip_port(port.get('id'))
LOG.warning("Deleting the VIP port %s since LB went into "
"ERROR state", str(port.get('id')))
except Exception:
LOG.exception("Error deleting the VIP port %s upon "
"loadbalancer %s creation failure",
str(port.get('id')),
str(loadbalancer[constants.ID]))
status = {
'loadbalancers': [{"id": loadbalancer['id'],
"provisioning_status": constants.ERROR,
@ -1159,6 +1165,24 @@ class OvnProviderHelper(object):
LOG.warning("Loadbalancer %s not found in OVN Northbound DB. "
"Setting the Loadbalancer status to DELETED "
"in Octavia", str(loadbalancer['id']))
# NOTE(ltomasbo): In case the previous loadbalancer deletion
# action failed at VIP deletion step, this ensures the VIP
# is not leaked
try:
vip_port_id = self._get_vip_port_from_loadbalancer_id(
loadbalancer[constants.ID])
if vip_port_id:
LOG.warning("Deleting the VIP port %s associated to LB "
"missing in OVN DBs", str(vip_port_id))
self.delete_vip_port(vip_port_id)
except Exception:
LOG.exception("Error deleting the VIP port %s",
str(vip_port_id))
status = {
'loadbalancers': [{"id": loadbalancer['id'],
"provisioning_status": constants.ERROR,
"operating_status": constants.ERROR}]}
return status
try:
@ -1171,14 +1195,15 @@ class OvnProviderHelper(object):
# dict while iterating over it. So first get a list of keys.
# https://cito.github.io/blog/never-iterate-a-changing-dict/
status = {key: value for key, value in status.items() if value}
# Delete VIP port from neutron.
self.delete_vip_port(port_id)
except Exception:
LOG.exception(EXCEPTION_MSG, "deletion of loadbalancer")
status = {
'loadbalancers': [{"id": loadbalancer['id'],
"provisioning_status": constants.ERROR,
"operating_status": constants.ERROR}]}
# Delete VIP port from neutron.
self.delete_vip_port(port_id)
return status
def _lb_delete(self, loadbalancer, ovn_lb, status):
@ -1281,6 +1306,11 @@ class OvnProviderHelper(object):
lb_status['operating_status'] = constants.ERROR
return status
def _get_vip_port_from_loadbalancer_id(self, lb_id):
lb = self._octavia_driver_lib.get_loadbalancer(lb_id)
if lb and lb.vip_port_id:
return lb.vip_port_id
def listener_create(self, listener):
ovn_lb = self._get_or_create_ovn_lb(
listener['loadbalancer_id'],
@ -1959,6 +1989,12 @@ class OvnProviderHelper(object):
LOG.debug('VIP Port already exists, uuid: %s', port['id'])
return {'port': port}
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
n_exc.NeutronClientException),
wait=tenacity.wait_exponential(max=75),
stop=tenacity.stop_after_attempt(15),
reraise=True)
def delete_vip_port(self, port_id):
neutron_client = get_neutron_client()
try:

View File

@ -1250,9 +1250,12 @@ class TestOvnProviderHelper(TestOvnOctaviaBase):
self.ovn_lb.uuid)
del_port.assert_called_once_with('foo_port')
@mock.patch.object(ovn_driver.OvnProviderHelper,
'_get_vip_port_from_loadbalancer_id')
@mock.patch.object(ovn_driver.OvnProviderHelper, 'delete_vip_port')
def test_lb_delete_row_not_found(self, del_port):
def test_lb_delete_row_not_found(self, del_port, get_vip_port):
self.helper._find_ovn_lbs.side_effect = [idlutils.RowNotFound]
get_vip_port.return_value = None
status = self.helper.lb_delete(self.lb)
self.assertEqual(status['loadbalancers'][0]['provisioning_status'],
constants.DELETED)
@ -1260,6 +1263,22 @@ class TestOvnProviderHelper(TestOvnOctaviaBase):
constants.OFFLINE)
self.helper.ovn_nbdb_api.lb_del.assert_not_called()
del_port.assert_not_called()
get_vip_port.assert_called_once_with(self.lb['id'])
@mock.patch.object(ovn_driver.OvnProviderHelper,
'_get_vip_port_from_loadbalancer_id')
@mock.patch.object(ovn_driver.OvnProviderHelper, 'delete_vip_port')
def test_lb_delete_row_not_found_vip_leak(self, del_port, get_vip_port):
self.helper._find_ovn_lbs.side_effect = [idlutils.RowNotFound]
get_vip_port.return_value = 'foo_port'
status = self.helper.lb_delete(self.lb)
self.assertEqual(status['loadbalancers'][0]['provisioning_status'],
constants.DELETED)
self.assertEqual(status['loadbalancers'][0]['operating_status'],
constants.OFFLINE)
self.helper.ovn_nbdb_api.lb_del.assert_not_called()
del_port.assert_called_once_with('foo_port')
get_vip_port.assert_called_once_with(self.lb['id'])
@mock.patch.object(ovn_driver.OvnProviderHelper, 'delete_vip_port')
def test_lb_delete_exception(self, del_port):
@ -1271,7 +1290,7 @@ class TestOvnProviderHelper(TestOvnOctaviaBase):
constants.ERROR)
self.helper.ovn_nbdb_api.lb_del.assert_called_once_with(
self.ovn_lb.uuid)
del_port.assert_called_once_with('foo_port')
del_port.assert_not_called()
@mock.patch('ovn_octavia_provider.driver.get_neutron_client')
@mock.patch.object(ovn_driver.OvnProviderHelper, 'delete_vip_port')
@ -1287,6 +1306,18 @@ class TestOvnProviderHelper(TestOvnOctaviaBase):
self.ovn_lb.uuid)
del_port.assert_called_once_with('foo_port')
@mock.patch.object(ovn_driver.OvnProviderHelper, 'delete_vip_port')
def test_lb_delete_port_exception(self, del_port):
del_port.side_effect = [Exception]
status = self.helper.lb_delete(self.ovn_lb)
self.assertEqual(status['loadbalancers'][0]['provisioning_status'],
constants.ERROR)
self.assertEqual(status['loadbalancers'][0]['operating_status'],
constants.ERROR)
self.helper.ovn_nbdb_api.lb_del.assert_called_once_with(
self.ovn_lb.uuid)
del_port.assert_called_once_with('foo_port')
@mock.patch('ovn_octavia_provider.driver.get_neutron_client')
def test_lb_delete_cascade(self, net_cli):
net_cli.return_value.delete_port.return_value = None