ironic: Fix ConflictException when deleting server

When unprovision works via Ironic, all operations in _cleanup_deploy
have already been completed. Previous to this patch, we continue
attempting all the clean up steps, which eventually errors out with
BadRequest, or similar, and we complete the delete.

Howerver, if cleaning has started, we hit a conflict exception,
so we don't hit the expected error above.

Prior to moving to the SDK, that landed in Caracal,
we would retry on conflict errors. You can tweak the
config to keep retrying for the length of time cleaning
usually takes in your enviroment.

After this patch:
Ieda5636a5e80ea4af25db2e90be241869759e30c

We now hard fail with this error:

openstack.exceptions.ConflictException:
Failed to detach VIF xxx from bare metal node yyy
...
Node yyy is locked by host zzz,
please retry after the current operation is completed.

This change simply skips calling the operations that
will always error out, avoiding the need to wait for
cleainging to complete before getting the expected
error message.

Closes-Bug: #2019977
Related-Bug: #1815799
Change-Id: I60971b58cf1f24bdb5d40f668e380ebfee2ac495
This commit is contained in:
Mark Goddard 2023-05-17 14:20:54 +01:00 committed by John Garbutt
parent 8c953d4d25
commit 6ebd8a56d1
No known key found for this signature in database
3 changed files with 22 additions and 55 deletions
nova
tests/unit/virt/ironic
virt/ironic
releasenotes/notes

@ -1723,8 +1723,6 @@ class IronicDriverTestCase(test.NoDBTestCase):
self.mock_conn.nodes.assert_called_with(
instance_id=instance.uuid,
fields=ironic_driver._NODE_FIELDS)
mock_cleanup_deploy.assert_called_with(node, instance, network_info,
remove_instance_info=False)
# For states that makes sense check if set_node_provision_state has
# been called
@ -1735,7 +1733,8 @@ class IronicDriverTestCase(test.NoDBTestCase):
self.assertFalse(mock_remove_instance_info.called)
else:
self.mock_conn.set_node_provision_state.assert_not_called()
mock_remove_instance_info.assert_called_once_with(node)
mock_cleanup_deploy.assert_called_with(
node, instance, network_info)
# we call this innter function twice so we need to reset mocks
self.mock_conn.set_node_provision_state.reset_mock()
@ -1765,27 +1764,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
self.driver.destroy,
self.ctx, instance, None, None,
)
mock_clean.assert_called_once_with(node, instance, None,
remove_instance_info=False)
@mock.patch.object(ironic_driver.IronicDriver,
'_validate_instance_and_node')
@mock.patch.object(ironic_driver.IronicDriver,
'_cleanup_deploy')
def test_destroy_trigger_remove_info_fail(self, mock_clean, fake_validate):
node_uuid = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
node = ironic_utils.get_test_node(
driver='fake', id=node_uuid,
provision_state=ironic_states.AVAILABLE)
fake_validate.return_value = node
instance = fake_instance.fake_instance_obj(self.ctx,
node=node_uuid)
self.mock_conn.update_node.side_effect = SystemError('unexpected '
'error')
self.assertRaises(SystemError, self.driver.destroy,
self.ctx, instance, None, None)
mock_clean.assert_called_once_with(node, instance, None,
remove_instance_info=False)
mock_clean.assert_not_called()
@mock.patch.object(ironic_driver.IronicDriver,
'_validate_instance_and_node')
@ -2617,18 +2596,6 @@ class IronicDriverTestCase(test.NoDBTestCase):
mock_unvif.assert_called_once_with(node, instance, None)
mock_remove_info.assert_called_once_with(node)
@mock.patch.object(ironic_driver.IronicDriver, '_unplug_vifs')
@mock.patch.object(ironic_driver.IronicDriver,
'_cleanup_volume_target_info')
def test__cleanup_deploy_no_remove_ii(self, mock_vol, mock_unvif):
# TODO(TheJulia): This REALLY should be updated to cover all of the
# calls that take place.
node = ironic_utils.get_test_node(driver='fake')
instance = fake_instance.fake_instance_obj(self.ctx, node=node.id)
self.driver._cleanup_deploy(node, instance, remove_instance_info=False)
mock_vol.assert_called_once_with(instance)
mock_unvif.assert_called_once_with(node, instance, None)
class IronicDriverSyncTestCase(IronicDriverTestCase):

@ -504,12 +504,10 @@ class IronicDriver(virt_driver.ComputeDriver):
'reason': e},
instance=instance)
def _cleanup_deploy(self, node, instance, network_info=None,
remove_instance_info=True):
def _cleanup_deploy(self, node, instance, network_info=None):
self._cleanup_volume_target_info(instance)
self._unplug_vifs(node, instance, network_info)
if remove_instance_info:
self._remove_instance_info_from_node(node)
self._remove_instance_info_from_node(node)
def _wait_for_active(self, instance):
"""Wait for the node to be marked as ACTIVE in Ironic."""
@ -1366,21 +1364,16 @@ class IronicDriver(virt_driver.ComputeDriver):
# without raising any exceptions.
return
try:
if node.provision_state in _UNPROVISION_STATES:
self._unprovision(instance, node)
else:
# NOTE(hshiina): if spawn() fails before ironic starts
# provisioning, instance information should be
# removed from ironic node.
self._remove_instance_info_from_node(node)
finally:
# NOTE(mgoddard): We don't need to remove instance info at this
# point since we will have already done it. The destroy will only
# succeed if this method returns without error, so we will end up
# removing the instance info eventually.
self._cleanup_deploy(node, instance, network_info,
remove_instance_info=False)
if node.provision_state in _UNPROVISION_STATES:
# NOTE(mgoddard): Ironic's node tear-down procedure includes all of
# the things we do in _cleanup_deploy, so let's not repeat them
# here. Doing so would also race with the node cleaning process,
# which may acquire the node lock and prevent us from making
# changes to the node. See
# https://bugs.launchpad.net/nova/+bug/2019977
self._unprovision(instance, node)
else:
self._cleanup_deploy(node, instance, network_info)
LOG.info('Successfully unprovisioned Ironic node %s',
node.id, instance=instance)

@ -0,0 +1,7 @@
---
fixes:
- |
Fixes an issue seen when using bare metal (Ironic) instances where an
instance could fail to delete. See `Bug 2019977`_ for more details.
.. _Bug 2019977: https://bugs.launchpad.net/nova/+bug/2019977