ironic: Fix ConflictException when deleting server
When unprovision works via Ironic, all operations in _cleanup_deploy have already been completed. Previous to this patch, we continue attempting all the clean up steps, which eventually errors out with BadRequest, or similar, and we complete the delete. Howerver, if cleaning has started, we hit a conflict exception, so we don't hit the expected error above. Prior to moving to the SDK, that landed in Caracal, we would retry on conflict errors. You can tweak the config to keep retrying for the length of time cleaning usually takes in your enviroment. After this patch: Ieda5636a5e80ea4af25db2e90be241869759e30c We now hard fail with this error: openstack.exceptions.ConflictException: Failed to detach VIF xxx from bare metal node yyy ... Node yyy is locked by host zzz, please retry after the current operation is completed. This change simply skips calling the operations that will always error out, avoiding the need to wait for cleainging to complete before getting the expected error message. Closes-Bug: #2019977 Related-Bug: #1815799 Change-Id: I60971b58cf1f24bdb5d40f668e380ebfee2ac495
This commit is contained in:
parent
8c953d4d25
commit
6ebd8a56d1
nova
releasenotes/notes
@ -1723,8 +1723,6 @@ class IronicDriverTestCase(test.NoDBTestCase):
|
||||
self.mock_conn.nodes.assert_called_with(
|
||||
instance_id=instance.uuid,
|
||||
fields=ironic_driver._NODE_FIELDS)
|
||||
mock_cleanup_deploy.assert_called_with(node, instance, network_info,
|
||||
remove_instance_info=False)
|
||||
|
||||
# For states that makes sense check if set_node_provision_state has
|
||||
# been called
|
||||
@ -1735,7 +1733,8 @@ class IronicDriverTestCase(test.NoDBTestCase):
|
||||
self.assertFalse(mock_remove_instance_info.called)
|
||||
else:
|
||||
self.mock_conn.set_node_provision_state.assert_not_called()
|
||||
mock_remove_instance_info.assert_called_once_with(node)
|
||||
mock_cleanup_deploy.assert_called_with(
|
||||
node, instance, network_info)
|
||||
|
||||
# we call this innter function twice so we need to reset mocks
|
||||
self.mock_conn.set_node_provision_state.reset_mock()
|
||||
@ -1765,27 +1764,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
|
||||
self.driver.destroy,
|
||||
self.ctx, instance, None, None,
|
||||
)
|
||||
mock_clean.assert_called_once_with(node, instance, None,
|
||||
remove_instance_info=False)
|
||||
|
||||
@mock.patch.object(ironic_driver.IronicDriver,
|
||||
'_validate_instance_and_node')
|
||||
@mock.patch.object(ironic_driver.IronicDriver,
|
||||
'_cleanup_deploy')
|
||||
def test_destroy_trigger_remove_info_fail(self, mock_clean, fake_validate):
|
||||
node_uuid = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
|
||||
node = ironic_utils.get_test_node(
|
||||
driver='fake', id=node_uuid,
|
||||
provision_state=ironic_states.AVAILABLE)
|
||||
fake_validate.return_value = node
|
||||
instance = fake_instance.fake_instance_obj(self.ctx,
|
||||
node=node_uuid)
|
||||
self.mock_conn.update_node.side_effect = SystemError('unexpected '
|
||||
'error')
|
||||
self.assertRaises(SystemError, self.driver.destroy,
|
||||
self.ctx, instance, None, None)
|
||||
mock_clean.assert_called_once_with(node, instance, None,
|
||||
remove_instance_info=False)
|
||||
mock_clean.assert_not_called()
|
||||
|
||||
@mock.patch.object(ironic_driver.IronicDriver,
|
||||
'_validate_instance_and_node')
|
||||
@ -2617,18 +2596,6 @@ class IronicDriverTestCase(test.NoDBTestCase):
|
||||
mock_unvif.assert_called_once_with(node, instance, None)
|
||||
mock_remove_info.assert_called_once_with(node)
|
||||
|
||||
@mock.patch.object(ironic_driver.IronicDriver, '_unplug_vifs')
|
||||
@mock.patch.object(ironic_driver.IronicDriver,
|
||||
'_cleanup_volume_target_info')
|
||||
def test__cleanup_deploy_no_remove_ii(self, mock_vol, mock_unvif):
|
||||
# TODO(TheJulia): This REALLY should be updated to cover all of the
|
||||
# calls that take place.
|
||||
node = ironic_utils.get_test_node(driver='fake')
|
||||
instance = fake_instance.fake_instance_obj(self.ctx, node=node.id)
|
||||
self.driver._cleanup_deploy(node, instance, remove_instance_info=False)
|
||||
mock_vol.assert_called_once_with(instance)
|
||||
mock_unvif.assert_called_once_with(node, instance, None)
|
||||
|
||||
|
||||
class IronicDriverSyncTestCase(IronicDriverTestCase):
|
||||
|
||||
|
@ -504,12 +504,10 @@ class IronicDriver(virt_driver.ComputeDriver):
|
||||
'reason': e},
|
||||
instance=instance)
|
||||
|
||||
def _cleanup_deploy(self, node, instance, network_info=None,
|
||||
remove_instance_info=True):
|
||||
def _cleanup_deploy(self, node, instance, network_info=None):
|
||||
self._cleanup_volume_target_info(instance)
|
||||
self._unplug_vifs(node, instance, network_info)
|
||||
if remove_instance_info:
|
||||
self._remove_instance_info_from_node(node)
|
||||
self._remove_instance_info_from_node(node)
|
||||
|
||||
def _wait_for_active(self, instance):
|
||||
"""Wait for the node to be marked as ACTIVE in Ironic."""
|
||||
@ -1366,21 +1364,16 @@ class IronicDriver(virt_driver.ComputeDriver):
|
||||
# without raising any exceptions.
|
||||
return
|
||||
|
||||
try:
|
||||
if node.provision_state in _UNPROVISION_STATES:
|
||||
self._unprovision(instance, node)
|
||||
else:
|
||||
# NOTE(hshiina): if spawn() fails before ironic starts
|
||||
# provisioning, instance information should be
|
||||
# removed from ironic node.
|
||||
self._remove_instance_info_from_node(node)
|
||||
finally:
|
||||
# NOTE(mgoddard): We don't need to remove instance info at this
|
||||
# point since we will have already done it. The destroy will only
|
||||
# succeed if this method returns without error, so we will end up
|
||||
# removing the instance info eventually.
|
||||
self._cleanup_deploy(node, instance, network_info,
|
||||
remove_instance_info=False)
|
||||
if node.provision_state in _UNPROVISION_STATES:
|
||||
# NOTE(mgoddard): Ironic's node tear-down procedure includes all of
|
||||
# the things we do in _cleanup_deploy, so let's not repeat them
|
||||
# here. Doing so would also race with the node cleaning process,
|
||||
# which may acquire the node lock and prevent us from making
|
||||
# changes to the node. See
|
||||
# https://bugs.launchpad.net/nova/+bug/2019977
|
||||
self._unprovision(instance, node)
|
||||
else:
|
||||
self._cleanup_deploy(node, instance, network_info)
|
||||
|
||||
LOG.info('Successfully unprovisioned Ironic node %s',
|
||||
node.id, instance=instance)
|
||||
|
7
releasenotes/notes/bug-2019977-4afe7658394130b8.yaml
Normal file
7
releasenotes/notes/bug-2019977-4afe7658394130b8.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes an issue seen when using bare metal (Ironic) instances where an
|
||||
instance could fail to delete. See `Bug 2019977`_ for more details.
|
||||
|
||||
.. _Bug 2019977: https://bugs.launchpad.net/nova/+bug/2019977
|
Loading…
x
Reference in New Issue
Block a user