Reset vm_state to original value if rebuild claim fails

If while evacuating an active or stopped server the rebuild
resource claim or group affinity policy check fails, the state
of the server has not actually changed but the vm_state is changed
to ERROR because of the _error_out_instance_on_exception context
manager.

This builds on Ie4f9177f4d54cbc7dbcf58bd107fd5f24c60d8bb by
wrapping the BuildAbortException in InstanceFaultRollback for the
claim/group policy failures so the vm_state remains unchanged.
Note that the overall instance action record will still be marked
as a failure since the BuildAbortException is re-raised and the
wrap_instance_event decorator will fail the action (this is how the
user can know the operation failed).

Change-Id: I07fa46690d8f7b846665bc59c5e361873154382b
Closes-Bug: #1784983
This commit is contained in:
Matt Riedemann 2019-10-30 12:11:43 -04:00
parent 8f341eb4a4
commit 26e1d9c723
3 changed files with 44 additions and 6 deletions

View File

@ -3335,7 +3335,12 @@ class ComputeManager(manager.Manager):
allocs = self.reportclient.get_allocations_for_consumer(
context, instance.uuid)
with self._error_out_instance_on_exception(context, instance):
# If the resource claim or group policy validation fails before we
# do anything to the guest or its networking/volumes we want to keep
# the current status rather than put the instance into ERROR status.
instance_state = instance.vm_state
with self._error_out_instance_on_exception(
context, instance, instance_state=instance_state):
try:
self._do_rebuild_instance_with_claim(
context, instance, orig_image_ref,
@ -3369,8 +3374,13 @@ class ComputeManager(manager.Manager):
self.rt.delete_allocation_for_evacuated_instance(
context, instance, scheduled_node, node_type='destination')
self._notify_instance_rebuild_error(context, instance, e, bdms)
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=e.format_message())
# Wrap this in InstanceFaultRollback so that the
# _error_out_instance_on_exception context manager keeps the
# vm_state unchanged.
raise exception.InstanceFaultRollback(
inner_exception=exception.BuildAbortException(
instance_uuid=instance.uuid,
reason=e.format_message()))
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError) as e:
LOG.debug('Instance was deleted while rebuilding',

View File

@ -2503,8 +2503,10 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
# evacuate the server
self.api.post_server_action(server['id'], {'evacuate': {}})
# the migration will fail on the dest node and the instance will
# go into error state
server = self._wait_for_state_change(self.api, server, 'ERROR')
# stay ACTIVE and task_state will be set to None.
server = self._wait_for_server_parameter(
self.api, server, {'status': 'ACTIVE',
'OS-EXT-STS:task_state': None})
# Run the periodics to show those don't modify allocations.
self._run_periodics()
@ -7204,7 +7206,8 @@ class ServerMoveWithPortResourceRequestTest(
server = self._wait_for_server_parameter(
self.api, server,
{'OS-EXT-SRV-ATTR:host': 'host1',
'status': 'ERROR'})
'status': 'ACTIVE',
'OS-EXT-STS:task_state': None})
self._wait_for_migration_status(server, ['failed'])

View File

@ -5007,6 +5007,28 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
delete_alloc.assert_called_once_with(self.context, instance, 'foo',
node_type='destination')
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.compute.utils.add_instance_fault_from_exc')
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'delete_allocation_for_evacuated_instance')
def test_rebuild_compute_resources_unavailable(self, mock_delete_alloc,
mock_add_fault,
mock_save):
"""Tests that when the rebuild_claim fails with
ComputeResourcesUnavailable the vm_state on the instance remains
unchanged.
"""
instance = fake_instance.fake_instance_obj(self.context)
instance.vm_state = vm_states.ACTIVE
ex = exception.ComputeResourcesUnavailable(reason='out of foo')
self.assertRaises(exception.BuildAbortException,
self._test_rebuild_ex, instance, ex)
# Make sure the instance vm_state did not change.
self.assertEqual(vm_states.ACTIVE, instance.vm_state)
mock_delete_alloc.assert_called_once()
mock_save.assert_called()
mock_add_fault.assert_called_once()
@mock.patch('nova.context.RequestContext.elevated')
@mock.patch('nova.objects.instance.Instance.drop_migration_context')
@mock.patch('nova.objects.instance.Instance.apply_migration_context')
@ -5071,6 +5093,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
instance = fake_instance.fake_instance_obj(self.context)
instance.info_cache = None
instance.system_metadata = {}
instance.vm_state = vm_states.ACTIVE
elevated_context = mock.Mock()
mock_context_elevated.return_value = elevated_context
request_spec = objects.RequestSpec()
@ -5094,6 +5117,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_notify.assert_called_once_with(
elevated_context, instance, 'fake-mini', bdms=None, exception=exc,
phase='error', tb=mock.ANY)
# Make sure the instance vm_state did not change.
self.assertEqual(vm_states.ACTIVE, instance.vm_state)
def test_rebuild_node_not_updated_if_not_recreate(self):
node = uuidutils.generate_uuid() # ironic node uuid