Refresh instance in MigrationTask.execute Exception handler

Cross-cell resize is a synchronous operation from the MigrationTask
which means if something fails on the compute and sets the instance
to ERROR, the Exception block in _cold_migrate in conductor can
blindly reset the vm_state to ACTIVE because it is using a stale
copy of the instance.

This change refreshes the instance before calling _set_vm_state_and_notify
so we don't overwrite the current vm_state. This is not really a
problem for same-cell resize because prep_resize is an RPC cast to
the destination compute so any failures there will be persisted
separately from the Exception block in the _cold_migrate method.

Change-Id: I2254e36407c9f9ff674eaec44c115d7516421de3
This commit is contained in:
Matt Riedemann 2019-07-03 17:52:21 -04:00
parent 425518d198
commit 462d0d813e
3 changed files with 55 additions and 11 deletions

View File

@ -376,11 +376,23 @@ class ComputeTaskManager(base.Base):
updates, ex, request_spec)
except Exception as ex:
with excutils.save_and_reraise_exception():
updates = {'vm_state': instance.vm_state,
'task_state': None}
self._set_vm_state_and_notify(context, instance.uuid,
'migrate_server',
updates, ex, request_spec)
# Refresh the instance so we don't overwrite vm_state changes
# set after we executed the task.
try:
instance.refresh()
# Passing vm_state is kind of silly but it's expected in
# set_vm_state_and_notify.
updates = {'vm_state': instance.vm_state,
'task_state': None}
self._set_vm_state_and_notify(context, instance.uuid,
'migrate_server',
updates, ex, request_spec)
except exception.InstanceNotFound:
# We can't send the notification because the instance is
# gone so just log it.
LOG.info('During %s the instance was deleted.',
'resize' if instance.instance_type_id != flavor.id
else 'cold migrate', instance=instance)
# NOTE(sbauza): Make sure we persist the new flavor in case we had
# a successful scheduler call if and only if nothing bad happened
if request_spec.obj_what_changed():

View File

@ -775,7 +775,7 @@ def set_vm_state_and_notify(context, instance_uuid, service, method, updates,
This becomes part of the publisher_id for the notification payload.
:param method: The method that failed, e.g. 'migrate_server'.
:param updates: dict of updates for the instance object, typically a
vm_state and/or task_state value.
vm_state and task_state value.
:param ex: An exception which occurred during the given method.
:param request_spec: Optional request spec.
"""
@ -790,6 +790,9 @@ def set_vm_state_and_notify(context, instance_uuid, service, method, updates,
else:
request_spec = {}
# TODO(mriedem): We should make vm_state optional since not all callers
# of this method want to change the vm_state, e.g. the Exception block
# in ComputeTaskManager._cold_migrate.
vm_state = updates['vm_state']
properties = request_spec.get('instance_properties', {})
notifier = rpc.get_notifier(service)

View File

@ -2834,13 +2834,15 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
exc_info = test.TestingException('something happened')
select_dest_mock.return_value = [[fake_selection1]]
updates = {'vm_state': vm_states.STOPPED,
updates = {'vm_state': inst_obj.vm_state,
'task_state': None}
prep_resize_mock.side_effect = exc_info
self.assertRaises(test.TestingException,
self.conductor._cold_migrate,
self.context, inst_obj, self.flavor,
{}, True, None, None)
with mock.patch.object(inst_obj, 'refresh') as mock_refresh:
self.assertRaises(test.TestingException,
self.conductor._cold_migrate,
self.context, inst_obj, self.flavor,
{}, True, None, None)
mock_refresh.assert_called_once_with()
# Filter properties are populated during code execution
legacy_filter_props = {'retry': {'num_attempts': 1,
@ -2863,6 +2865,33 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
exc_info, fake_spec)
rollback_mock.assert_called_once_with(exc_info)
@mock.patch('nova.conductor.tasks.migrate.MigrationTask.execute',
side_effect=test.TestingException('execute fails'))
@mock.patch('nova.objects.Instance.refresh',
side_effect=exc.InstanceNotFound(instance_id=uuids.instance))
def test_cold_migrate_exception_instance_refresh_not_found(
self, mock_refresh, mock_execute):
"""Tests the scenario where MigrationTask.execute raises some error
and then the instance.refresh() in the exception block raises
InstanceNotFound because the instance was deleted during the operation.
"""
params = {'uuid': uuids.instance}
instance = self._create_fake_instance_obj(params=params)
filter_properties = {}
clean_shutdown = True
request_spec = fake_request_spec.fake_spec_obj()
request_spec.flavor = instance.flavor
host_list = None
self.assertRaises(test.TestingException,
self.conductor._cold_migrate,
self.context, instance, instance.flavor,
filter_properties, clean_shutdown,
request_spec, host_list)
self.assertIn('During cold migrate the instance was deleted.',
self.stdlog.logger.output)
mock_execute.assert_called_once_with()
mock_refresh.assert_called_once_with()
@mock.patch.object(objects.RequestSpec, 'save')
@mock.patch.object(migrate.MigrationTask, 'execute')
@mock.patch.object(utils, 'get_image_from_system_metadata')