Compute: add support to abort queued live migration
This patch modified compute manager and bumps the nova-compute service version to be able to abort live migration in ``queued`` and ``preparing`` status. The following patch will modify the API part. Part of blueprint abort-live-migration-in-queued-status Change-Id: I3689dd6544c387676983be47cf925c3fd7ce72f1
This commit is contained in:

committed by
Matt Riedemann

parent
79dac41fee
commit
d23794d25f
@@ -6175,9 +6175,6 @@ class ComputeManager(manager.Manager):
|
||||
# done on source/destination. For now, this is just here for status
|
||||
# reporting
|
||||
self._set_migration_status(migration, 'preparing')
|
||||
# NOTE(Kevin_Zheng): The migration is no longer in the `queued` status
|
||||
# so lets remove it from the mapping.
|
||||
self._waiting_live_migrations.pop(instance.uuid)
|
||||
|
||||
class _BreakWaitForInstanceEvent(Exception):
|
||||
"""Used as a signal to stop waiting for the network-vif-plugged
|
||||
@@ -6251,8 +6248,22 @@ class ComputeManager(manager.Manager):
|
||||
self._cleanup_pre_live_migration(
|
||||
context, dest, instance, migration, migrate_data)
|
||||
|
||||
self._set_migration_status(migration, 'running')
|
||||
# NOTE(Kevin_Zheng): Pop the migration from the waiting queue
|
||||
# if it exist in the queue, then we are good to moving on, if
|
||||
# not, some other process must have aborted it, then we should
|
||||
# rollback.
|
||||
try:
|
||||
self._waiting_live_migrations.pop(instance.uuid)
|
||||
except KeyError:
|
||||
LOG.debug('Migration %s aborted by another process, rollback.',
|
||||
migration.uuid, instance=instance)
|
||||
migrate_data.migration = migration
|
||||
self._rollback_live_migration(context, instance, dest,
|
||||
migrate_data, 'cancelled')
|
||||
self._notify_live_migrate_abort_end(context, instance)
|
||||
return
|
||||
|
||||
self._set_migration_status(migration, 'running')
|
||||
if migrate_data:
|
||||
migrate_data.migration = migration
|
||||
LOG.debug('live_migration data is %s', migrate_data)
|
||||
@@ -6331,6 +6342,14 @@ class ComputeManager(manager.Manager):
|
||||
action=fields.NotificationAction.LIVE_MIGRATION_FORCE_COMPLETE,
|
||||
phase=fields.NotificationPhase.END)
|
||||
|
||||
def _notify_live_migrate_abort_end(self, context, instance):
|
||||
self._notify_about_instance_usage(
|
||||
context, instance, 'live.migration.abort.end')
|
||||
compute_utils.notify_about_instance_action(
|
||||
context, instance, self.host,
|
||||
action=fields.NotificationAction.LIVE_MIGRATION_ABORT,
|
||||
phase=fields.NotificationPhase.END)
|
||||
|
||||
@wrap_exception()
|
||||
@wrap_instance_event(prefix='compute')
|
||||
@wrap_instance_fault
|
||||
@@ -6342,26 +6361,51 @@ class ComputeManager(manager.Manager):
|
||||
:param migration_id: ID of in-progress live migration
|
||||
|
||||
"""
|
||||
migration = objects.Migration.get_by_id(context, migration_id)
|
||||
if migration.status != 'running':
|
||||
raise exception.InvalidMigrationState(migration_id=migration_id,
|
||||
instance_uuid=instance.uuid,
|
||||
state=migration.status,
|
||||
method='abort live migration')
|
||||
|
||||
self._notify_about_instance_usage(
|
||||
context, instance, 'live.migration.abort.start')
|
||||
compute_utils.notify_about_instance_action(
|
||||
context, instance, self.host,
|
||||
action=fields.NotificationAction.LIVE_MIGRATION_ABORT,
|
||||
phase=fields.NotificationPhase.START)
|
||||
self.driver.live_migration_abort(instance)
|
||||
self._notify_about_instance_usage(
|
||||
context, instance, 'live.migration.abort.end')
|
||||
compute_utils.notify_about_instance_action(
|
||||
context, instance, self.host,
|
||||
action=fields.NotificationAction.LIVE_MIGRATION_ABORT,
|
||||
phase=fields.NotificationPhase.END)
|
||||
# NOTE(Kevin_Zheng): Pop the migration out from the queue, this might
|
||||
# lead to 3 scenarios:
|
||||
# 1. The selected migration is still in queue, and the future.cancel()
|
||||
# succeed, then the abort action is succeed, mark the migration
|
||||
# status to 'cancelled'.
|
||||
# 2. The selected migration is still in queue, but the future.cancel()
|
||||
# failed, then the _do_live_migration() has started executing, and
|
||||
# the migration status is 'preparing', then we just pop it from the
|
||||
# queue, and the migration process will handle it later. And the
|
||||
# migration status couldn't be 'running' in this scenario because
|
||||
# if _do_live_migration has started executing and we've already
|
||||
# popped it from the queue and set the migration status to
|
||||
# 'running' at this point, popping it here will raise KeyError at
|
||||
# which point we check if it's running and if so, we abort the old
|
||||
# way.
|
||||
# 3. The selected migration is not in the queue, then the migration
|
||||
# status is 'running', let the driver handle it.
|
||||
try:
|
||||
migration, future = (
|
||||
self._waiting_live_migrations.pop(instance.uuid))
|
||||
if future and future.cancel():
|
||||
# If we got here, we've successfully aborted the queued
|
||||
# migration and _do_live_migration won't run so we need
|
||||
# to set the migration status to cancelled and send the
|
||||
# notification. If Future.cancel() fails, it means
|
||||
# _do_live_migration is running and the migration status
|
||||
# is preparing, and _do_live_migration() itself will attempt
|
||||
# to pop the queued migration, hit a KeyError, and rollback,
|
||||
# set the migration to cancelled and send the
|
||||
# live.migration.abort.end notification.
|
||||
self._set_migration_status(migration, 'cancelled')
|
||||
except KeyError:
|
||||
migration = objects.Migration.get_by_id(context, migration_id)
|
||||
if migration.status != 'running':
|
||||
raise exception.InvalidMigrationState(
|
||||
migration_id=migration_id, instance_uuid=instance.uuid,
|
||||
state=migration.status, method='abort live migration')
|
||||
self.driver.live_migration_abort(instance)
|
||||
self._notify_live_migrate_abort_end(context, instance)
|
||||
|
||||
def _live_migration_cleanup_flags(self, migrate_data):
|
||||
"""Determine whether disks or instance path need to be cleaned up after
|
||||
|
@@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# NOTE(danms): This is the global service version counter
|
||||
SERVICE_VERSION = 33
|
||||
SERVICE_VERSION = 34
|
||||
|
||||
|
||||
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
|
||||
@@ -141,6 +141,8 @@ SERVICE_VERSION_HISTORY = (
|
||||
# Version 33: Add support for check on the server group with
|
||||
# 'max_server_per_host' rules
|
||||
{'compute_rpc': '5.0'},
|
||||
# Version 34: Adds support to abort queued/preparing live migrations.
|
||||
{'compute_rpc': '5.0'},
|
||||
)
|
||||
|
||||
|
||||
|
@@ -7279,6 +7279,47 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
||||
'fake', self.instance, True, migration, {})
|
||||
self.assertEqual('error', migration.status)
|
||||
|
||||
@mock.patch(
|
||||
'nova.compute.manager.ComputeManager._notify_about_instance_usage')
|
||||
@mock.patch.object(compute_utils, 'notify_about_instance_action')
|
||||
@mock.patch('nova.compute.manager.ComputeManager._rollback_live_migration')
|
||||
@mock.patch('nova.compute.rpcapi.ComputeAPI.pre_live_migration')
|
||||
def test_live_migration_aborted_before_running(self, mock_rpc,
|
||||
mock_rollback,
|
||||
mock_action_notify,
|
||||
mock_usage_notify):
|
||||
migrate_data = objects.LibvirtLiveMigrateData(
|
||||
wait_for_vif_plugged=True)
|
||||
mock_rpc.return_value = migrate_data
|
||||
self.instance.info_cache = objects.InstanceInfoCache(
|
||||
network_info=network_model.NetworkInfo([
|
||||
network_model.VIF(uuids.port1), network_model.VIF(uuids.port2)
|
||||
]))
|
||||
self.compute._waiting_live_migrations = {}
|
||||
fake_migration = objects.Migration(
|
||||
uuid=uuids.migration, instance_uuid=self.instance.uuid)
|
||||
fake_migration.save = mock.MagicMock()
|
||||
|
||||
with mock.patch.object(self.compute.virtapi,
|
||||
'wait_for_instance_event') as wait_for_event:
|
||||
self.compute._do_live_migration(
|
||||
self.context, 'dest-host', self.instance, 'block_migration',
|
||||
fake_migration, migrate_data)
|
||||
|
||||
self.assertEqual(2, len(wait_for_event.call_args[0][1]))
|
||||
mock_rpc.assert_called_once_with(
|
||||
self.context, self.instance, 'block_migration', None,
|
||||
'dest-host', migrate_data)
|
||||
mock_rollback.assert_called_once_with(
|
||||
self.context, self.instance, 'dest-host', migrate_data,
|
||||
'cancelled')
|
||||
mock_usage_notify.assert_called_once_with(
|
||||
self.context, self.instance, 'live.migration.abort.end')
|
||||
mock_action_notify.assert_called_once_with(
|
||||
self.context, self.instance, self.compute.host,
|
||||
action=fields.NotificationAction.LIVE_MIGRATION_ABORT,
|
||||
phase=fields.NotificationPhase.END)
|
||||
|
||||
def test_live_migration_force_complete_succeeded(self):
|
||||
migration = objects.Migration()
|
||||
migration.status = 'running'
|
||||
@@ -7708,6 +7749,34 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
||||
action='live_migration_abort', phase='end')]
|
||||
)
|
||||
|
||||
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
|
||||
@mock.patch.object(objects.Migration, 'get_by_id')
|
||||
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
||||
def test_live_migration_abort_queued(self, mock_notify_action,
|
||||
mock_get_migration, mock_notify):
|
||||
instance = objects.Instance(id=123, uuid=uuids.instance)
|
||||
migration = self._get_migration(10, 'queued', 'live-migration')
|
||||
migration.save = mock.MagicMock()
|
||||
mock_get_migration.return_value = migration
|
||||
fake_future = mock.MagicMock()
|
||||
self.compute._waiting_live_migrations[instance.uuid] = (
|
||||
migration, fake_future)
|
||||
self.compute.live_migration_abort(self.context, instance, migration.id)
|
||||
mock_notify.assert_has_calls(
|
||||
[mock.call(self.context, instance,
|
||||
'live.migration.abort.start'),
|
||||
mock.call(self.context, instance,
|
||||
'live.migration.abort.end')]
|
||||
)
|
||||
mock_notify_action.assert_has_calls(
|
||||
[mock.call(self.context, instance, 'fake-mini',
|
||||
action='live_migration_abort', phase='start'),
|
||||
mock.call(self.context, instance, 'fake-mini',
|
||||
action='live_migration_abort', phase='end')]
|
||||
)
|
||||
self.assertEqual('cancelled', migration.status)
|
||||
fake_future.cancel.assert_called_once_with()
|
||||
|
||||
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
|
||||
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
|
||||
@mock.patch.object(objects.Migration, 'get_by_id')
|
||||
@@ -7731,11 +7800,13 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
||||
'fake-mini', action='live_migration_abort', phase='start')
|
||||
|
||||
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
|
||||
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
|
||||
@mock.patch.object(objects.Migration, 'get_by_id')
|
||||
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
||||
def test_live_migration_abort_wrong_migration_state(self,
|
||||
mock_notify_action,
|
||||
mock_get_migration,
|
||||
mock_notify,
|
||||
mock_instance_fault):
|
||||
instance = objects.Instance(id=123, uuid=uuids.instance)
|
||||
migration = self._get_migration(10, 'completed', 'live-migration')
|
||||
@@ -7745,7 +7816,6 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
||||
self.context,
|
||||
instance,
|
||||
migration.id)
|
||||
mock_notify_action.assert_not_called()
|
||||
|
||||
def test_live_migration_cleanup_flags_block_migrate_libvirt(self):
|
||||
migrate_data = objects.LibvirtLiveMigrateData(
|
||||
|
Reference in New Issue
Block a user