Abort live-migration during instance_init

When compute service restart during a live-migration,
we lose live-migration monitoring thread. In that case
it is better to early abort live-migration job before resetting
state of instance, this will avoid API to accept further
action while unmanaged migration process still run in background.
It also avoid unexpected/dangerous behavior as describe in related bug.

Change-Id: Idec2d31cbba497dc4b20912f3388ad2341951d23
Closes-Bug: #1753676
This commit is contained in:
Alexandre Arents 2019-08-20 13:37:33 +00:00
parent 7aa88029bb
commit ebcf6e4ce5
2 changed files with 34 additions and 5 deletions

View File

@ -874,6 +874,25 @@ class ComputeManager(manager.Manager):
{'cpus': list(pinned_cpus)},
instance=instance)
def _reset_live_migration(self, context, instance):
migration = None
try:
migration = objects.Migration.get_by_instance_and_status(
context, instance.uuid, 'running')
if migration:
self.live_migration_abort(context, instance, migration.id)
except Exception:
LOG.exception('Failed to abort live-migration',
instance=instance)
finally:
if migration:
self._set_migration_status(migration, 'error')
LOG.info('Instance found in migrating state during '
'startup. Resetting task_state',
instance=instance)
instance.task_state = None
instance.save(expected_task_state=[task_states.MIGRATING])
def _init_instance(self, context, instance):
"""Initialize this instance during service init."""
@ -1106,9 +1125,8 @@ class ComputeManager(manager.Manager):
instance.save()
if instance.task_state == task_states.MIGRATING:
# Live migration did not complete, but instance is on this
# host, so reset the state.
instance.task_state = None
instance.save(expected_task_state=[task_states.MIGRATING])
# host. Abort ongoing migration if still running and reset state.
self._reset_live_migration(context, instance)
db_state = instance.power_state
drv_state = self._get_power_state(context, instance)

View File

@ -1508,14 +1508,25 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
vm_state=vm_states.ACTIVE,
host=self.compute.host,
task_state=task_states.MIGRATING)
migration = objects.Migration(source_compute='fake-host1', id=39,
dest_compute='fake-host2')
with test.nested(
mock.patch.object(instance, 'save'),
mock.patch('nova.objects.Instance.get_network_info',
return_value=network_model.NetworkInfo())
) as (save, get_nw_info):
return_value=network_model.NetworkInfo()),
mock.patch.object(objects.Migration, 'get_by_instance_and_status',
return_value=migration),
mock.patch.object(self.compute, 'live_migration_abort'),
mock.patch.object(self.compute, '_set_migration_status')
) as (save, get_nw_info, mock_get_status, mock_abort, mock_set_migr):
self.compute._init_instance(self.context, instance)
save.assert_called_once_with(expected_task_state=['migrating'])
get_nw_info.assert_called_once_with()
mock_get_status.assert_called_with(self.context, instance.uuid,
'running')
mock_abort.assert_called_with(self.context, instance,
migration.id)
mock_set_migr.assert_called_with(migration, 'error')
self.assertIsNone(instance.task_state)
self.assertEqual(vm_states.ACTIVE, instance.vm_state)