Make live migration create a migration object record

The object is terminally updated based on whether the driver calls
the success or failure callback, which should be when things have
completely finished and properly reflect the driver's notion of
success.

Related to blueprint robustify-live-migration

Change-Id: I515ab9d67eb205fe171078826e69d1016ab3ebda
This commit is contained in:
Dan Smith 2015-05-14 14:22:18 -07:00
parent 1b26babb2f
commit 2f4e64a7cf
10 changed files with 177 additions and 29 deletions

View File

@ -644,7 +644,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='4.1')
target = messaging.Target(version='4.2')
# How long to wait in seconds before re-issuing a shutdown
# signal to a instance during power off. The overall
@ -4830,17 +4830,27 @@ class ComputeManager(manager.Manager):
@wrap_exception()
@wrap_instance_fault
def live_migration(self, context, dest, instance, block_migration,
migrate_data):
migration, migrate_data):
"""Executing live migration.
:param context: security context
:param instance: a nova.objects.instance.Instance object
:param dest: destination host
:param block_migration: if true, prepare for block migration
:param migration: an nova.objects.Migration object
:param migrate_data: implementation specific params
"""
# NOTE(danms): Remove these guards in v5.0 of the RPC API
if migration:
# NOTE(danms): We should enhance the RT to account for migrations
# and use the status field to denote when the accounting has been
# done on source/destination. For now, this is just here for status
# reporting
migration.status = 'preparing'
migration.save()
# Create a local copy since we'll be modifying the dictionary
migrate_data = dict(migrate_data or {})
try:
@ -4861,16 +4871,30 @@ class ComputeManager(manager.Manager):
with excutils.save_and_reraise_exception():
LOG.exception(_LE('Pre live migration failed at %s'),
dest, instance=instance)
if migration:
migration.status = 'failed'
migration.save()
self._rollback_live_migration(context, instance, dest,
block_migration, migrate_data)
# Executing live migration
# live_migration might raises exceptions, but
# nothing must be recovered in this version.
self.driver.live_migration(context, instance, dest,
self._post_live_migration,
self._rollback_live_migration,
block_migration, migrate_data)
if migration:
migration.status = 'running'
migration.save()
migrate_data['migration'] = migration
try:
self.driver.live_migration(context, instance, dest,
self._post_live_migration,
self._rollback_live_migration,
block_migration, migrate_data)
except Exception:
# Executing live migration
# live_migration might raises exceptions, but
# nothing must be recovered in this version.
with excutils.save_and_reraise_exception():
if migration:
migration.status = 'failed'
migration.save()
def _live_migration_cleanup_flags(self, block_migration, migrate_data):
"""Determine whether disks or instance path need to be cleaned up after
@ -5012,6 +5036,9 @@ class ComputeManager(manager.Manager):
instance=instance)
self._clean_instance_console_tokens(ctxt, instance)
if migrate_data and migrate_data.get('migration'):
migrate_data['migration'].status = 'completed'
migrate_data['migration'].save()
def _consoles_enabled(self):
"""Returns whether a console is enable."""
@ -5108,6 +5135,13 @@ class ComputeManager(manager.Manager):
instance.task_state = None
instance.save(expected_task_state=[task_states.MIGRATING])
# NOTE(danms): Pop out the migration object so we don't pass
# it over RPC unintentionally below
if migrate_data:
migration = migrate_data.pop('migration', None)
else:
migration = None
# NOTE(tr3buchet): setup networks on source host (really it's re-setup)
self.network_api.setup_networks_on_host(context, instance, self.host)
@ -5131,6 +5165,9 @@ class ComputeManager(manager.Manager):
self._notify_about_instance_usage(context, instance,
"live_migration._rollback.end")
if migration:
migration.status = 'error'
migration.save()
@wrap_exception()
@wrap_instance_fault

View File

@ -467,6 +467,13 @@ class ResourceTracker(object):
migrations = objects.MigrationList.get_in_progress_by_host_and_node(
context, self.host, self.nodename)
# Only look at resize/migrate migration records
# NOTE(danms): RT should probably examine live migration
# records as well and do something smart. However, ignore
# those for now to avoid them being included in below calculations.
migrations = [migration for migration in migrations
if migration.migration_type in ('resize', 'migrate')]
self._update_usage_from_migrations(context, migrations)
# Detect and account for orphaned instances that may exist on the

View File

@ -298,6 +298,7 @@ class ComputeAPI(object):
* 4.0 - Remove 3.x compatibility
* 4.1 - Make prep_resize() and resize_instance() send Flavor object
* 4.2 - Add migration argument to live_migration()
'''
VERSION_ALIASES = {
@ -531,12 +532,15 @@ class ComputeAPI(object):
cctxt.cast(ctxt, 'inject_network_info', instance=instance)
def live_migration(self, ctxt, instance, dest, block_migration, host,
migrate_data=None):
version = '4.0'
migration, migrate_data=None):
args = {'migration': migration}
version = '4.2'
if not self.client.can_send_version(version):
version = '4.0'
cctxt = self.client.prepare(server=host, version=version)
cctxt.cast(ctxt, 'live_migration', instance=instance,
dest=dest, block_migration=block_migration,
migrate_data=migrate_data)
migrate_data=migrate_data, **args)
def pause_instance(self, ctxt, instance):
version = '4.0'

View File

@ -621,9 +621,24 @@ class ComputeTaskManager(base.Base):
expected_task_state=task_states.MIGRATING,),
ex, request_spec, self.db)
migration = objects.Migration(context=context.elevated())
migration.dest_compute = destination
migration.status = 'pre-migrating'
migration.instance_uuid = instance.uuid
migration.source_compute = instance.host
migration.migration_type = 'live-migration'
if instance.obj_attr_is_set('flavor'):
migration.old_instance_type_id = instance.flavor.id
migration.new_instance_type_id = instance.flavor.id
else:
migration.old_instance_type_id = instance.instance_type_id
migration.new_instance_type_id = instance.instance_type_id
migration.create()
try:
live_migrate.execute(context, instance, destination,
block_migration, disk_over_commit)
block_migration, disk_over_commit,
migration)
except (exception.NoValidHost,
exception.ComputeServiceUnavailable,
exception.InvalidHypervisorType,
@ -639,6 +654,8 @@ class ComputeTaskManager(base.Base):
with excutils.save_and_reraise_exception():
# TODO(johngarbutt) - eventually need instance actions here
_set_vm_state(context, instance, ex, instance.vm_state)
migration.status = 'error'
migration.save()
except Exception as ex:
LOG.error(_LE('Migration of instance %(instance_id)s to host'
' %(dest)s unexpectedly failed.'),
@ -646,6 +663,8 @@ class ComputeTaskManager(base.Base):
exc_info=True)
_set_vm_state(context, instance, ex, vm_states.ERROR,
instance.task_state)
migration.status = 'failed'
migration.save()
raise exception.MigrationError(reason=six.text_type(ex))
def build_instances(self, context, instances, image, filter_properties,

View File

@ -38,12 +38,13 @@ CONF.register_opt(migrate_opt)
class LiveMigrationTask(object):
def __init__(self, context, instance, destination,
block_migration, disk_over_commit):
block_migration, disk_over_commit, migration):
self.context = context
self.instance = instance
self.destination = destination
self.block_migration = block_migration
self.disk_over_commit = disk_over_commit
self.migration = migration
self.source = instance.host
self.migrate_data = None
self.compute_rpcapi = compute_rpcapi.ComputeAPI()
@ -57,6 +58,8 @@ class LiveMigrationTask(object):
if not self.destination:
self.destination = self._find_destination()
self.migration.dest_compute = self.destination
self.migration.save()
else:
self._check_requested_destination()
@ -67,6 +70,7 @@ class LiveMigrationTask(object):
instance=self.instance,
dest=self.destination,
block_migration=self.block_migration,
migration=self.migration,
migrate_data=self.migrate_data)
def rollback(self):
@ -184,10 +188,11 @@ class LiveMigrationTask(object):
def execute(context, instance, destination,
block_migration, disk_over_commit):
block_migration, disk_over_commit, migration):
task = LiveMigrationTask(context, instance,
destination,
block_migration,
disk_over_commit)
disk_over_commit,
migration)
# TODO(johngarbutt) create a superclass that contains a safe_execute call
return task.execute()

View File

@ -5431,7 +5431,8 @@ class ComputeTestCase(BaseTestCase):
# cleanup
db.instance_destroy(c, instance['uuid'])
def test_live_migration_exception_rolls_back(self):
@mock.patch('nova.objects.Migration.save')
def test_live_migration_exception_rolls_back(self, mock_save):
# Confirm exception when pre_live_migration fails.
c = context.get_admin_context()
@ -5489,16 +5490,20 @@ class ComputeTestCase(BaseTestCase):
# start test
self.mox.ReplayAll()
migration = objects.Migration()
self.assertRaises(test.TestingException,
self.compute.live_migration,
c, dest=dest_host, block_migration=True,
instance=instance, migrate_data={})
instance=instance, migration=migration,
migrate_data={})
instance.refresh()
self.assertEqual('src_host', instance.host)
self.assertEqual(vm_states.ACTIVE, instance.vm_state)
self.assertIsNone(instance.task_state)
self.assertEqual('failed', migration.status)
def test_live_migration_works_correctly(self):
@mock.patch('nova.objects.Migration.save')
def test_live_migration_works_correctly(self, mock_save):
# Confirm live_migration() works as expected correctly.
# creating instance testdata
c = context.get_admin_context()
@ -5533,15 +5538,20 @@ class ComputeTestCase(BaseTestCase):
# start test
self.mox.ReplayAll()
migration = objects.Migration()
ret = self.compute.live_migration(c, dest=dest,
instance=instance,
block_migration=False,
migration=migration,
migrate_data=migrate_data)
self.assertIsNone(ret)
# cleanup
instance.destroy()
self.assertEqual('completed', migration.status)
def test_post_live_migration_no_shared_storage_working_correctly(self):
"""Confirm post_live_migration() works correctly as expected
for non shared storage migration.
@ -5614,6 +5624,8 @@ class ComputeTestCase(BaseTestCase):
'power_state': power_state.PAUSED})
instance.save()
migrate_data = {'migration': mock.MagicMock()}
# creating mocks
with contextlib.nested(
mock.patch.object(self.compute.driver, 'post_live_migration'),
@ -5635,12 +5647,14 @@ class ComputeTestCase(BaseTestCase):
post_live_migration_at_source, setup_networks_on_host,
clear_events, update_available_resource
):
self.compute._post_live_migration(c, instance, dest)
self.compute._post_live_migration(c, instance, dest,
migrate_data=migrate_data)
post_live_migration.assert_has_calls([
mock.call(c, instance, {'swap': None, 'ephemerals': [],
'root_device_name': None,
'block_device_mapping': []}, None)])
'block_device_mapping': []},
migrate_data)])
unfilter_instance.assert_has_calls([mock.call(instance, [])])
migration = {'source_compute': srchost,
'dest_compute': dest, }
@ -5652,6 +5666,8 @@ class ComputeTestCase(BaseTestCase):
[mock.call(c, instance, [])])
clear_events.assert_called_once_with(instance)
update_available_resource.assert_has_calls([mock.call(c)])
self.assertEqual('completed', migrate_data['migration'].status)
migrate_data['migration'].save.assert_called_once_with()
def test_post_live_migration_terminate_volume_connections(self):
c = context.get_admin_context()
@ -5780,6 +5796,29 @@ class ComputeTestCase(BaseTestCase):
updated = self._finish_post_live_migration_at_destination()
self.assertIsNone(updated['node'])
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
def test_rollback_live_migration(self, mock_bdms):
c = context.get_admin_context()
instance = mock.MagicMock()
migration = mock.MagicMock()
migrate_data = {'migration': migration}
mock_bdms.return_value = []
@mock.patch.object(self.compute, '_live_migration_cleanup_flags')
@mock.patch.object(self.compute, 'network_api')
def _test(mock_nw_api, mock_lmcf):
mock_lmcf.return_value = False, False
self.compute._rollback_live_migration(c, instance, 'foo',
False,
migrate_data=migrate_data)
mock_nw_api.setup_networks_on_host.assert_called_once_with(
c, instance, self.compute.host)
_test()
self.assertEqual('error', migration.status)
migration.save.assert_called_once_with()
def test_rollback_live_migration_at_destination_correctly(self):
# creating instance testdata
c = context.get_admin_context()

View File

@ -1175,6 +1175,7 @@ class InstanceClaimTestCase(BaseTrackerTestCase):
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
def test_deleted_instances_with_migrations(self, mock_migration_list):
migration = objects.Migration(context=self.context,
migration_type='resize',
instance_uuid='invalid')
mock_migration_list.return_value = [migration]
self.tracker.update_available_resource(self.context)
@ -1184,6 +1185,20 @@ class InstanceClaimTestCase(BaseTrackerTestCase):
"fakehost",
"fakenode")
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
def test_instances_with_live_migrations(self, mock_migration_list):
instance = self._fake_instance_obj()
migration = objects.Migration(context=self.context,
migration_type='live-migration',
instance_uuid=instance.uuid)
mock_migration_list.return_value = [migration]
self.tracker.update_available_resource(self.context)
self.assertEqual(0, self.tracker.compute_node['memory_mb_used'])
self.assertEqual(0, self.tracker.compute_node['local_gb_used'])
mock_migration_list.assert_called_once_with(self.context,
"fakehost",
"fakenode")
@mock.patch('nova.compute.claims.Claim')
@mock.patch('nova.objects.Instance.save')
def test_claim_saves_numa_topology(self, mock_save, mock_claim):

View File

@ -236,7 +236,8 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
self._test_compute_api('live_migration', 'cast',
instance=self.fake_instance_obj, dest='dest',
block_migration='blockity_block', host='tsoh',
migrate_data={}, version='4.0')
migration='migration',
migrate_data={}, version='4.2')
def test_post_live_migration_at_destination(self):
self._test_compute_api('post_live_migration_at_destination', 'cast',

View File

@ -10,6 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import mock
from mox3 import mox
from nova.compute import power_state
@ -41,12 +42,13 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
self.destination = "destination"
self.block_migration = "bm"
self.disk_over_commit = "doc"
self.migration = objects.Migration()
self._generate_task()
def _generate_task(self):
self.task = live_migrate.LiveMigrationTask(self.context,
self.instance, self.destination, self.block_migration,
self.disk_over_commit)
self.disk_over_commit, self.migration)
def test_execute_with_destination(self):
self.mox.StubOutWithMock(self.task, '_check_host_is_up')
@ -60,6 +62,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
instance=self.instance,
dest=self.destination,
block_migration=self.block_migration,
migration=self.migration,
migrate_data=None).AndReturn("bob")
self.mox.ReplayAll()
@ -81,10 +84,14 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
instance=self.instance,
dest="found_host",
block_migration=self.block_migration,
migration=self.migration,
migrate_data=None).AndReturn("bob")
self.mox.ReplayAll()
self.assertEqual("bob", self.task.execute())
with mock.patch.object(self.migration, 'save') as mock_save:
self.assertEqual("bob", self.task.execute())
self.assertTrue(mock_save.called)
self.assertEqual('found_host', self.migration.dest_compute)
def test_check_instance_is_active_passes_when_paused(self):
self.task.instance['power_state'] = power_state.PAUSED

View File

@ -936,17 +936,21 @@ class _BaseTaskTestCase(object):
rebuild_args.update(update_args)
return rebuild_args
def test_live_migrate(self):
@mock.patch('nova.objects.Migration')
def test_live_migrate(self, migobj):
inst = fake_instance.fake_db_instance()
inst_obj = objects.Instance._from_db_object(
self.context, objects.Instance(), inst, [])
migration = migobj()
self.mox.StubOutWithMock(live_migrate, 'execute')
live_migrate.execute(self.context,
mox.IsA(objects.Instance),
'destination',
'block_migration',
'disk_over_commit')
'disk_over_commit',
migration)
self.mox.ReplayAll()
if isinstance(self.conductor, (conductor_api.ComputeTaskAPI,
@ -960,6 +964,10 @@ class _BaseTaskTestCase(object):
{'host': 'destination'}, True, False, None,
'block_migration', 'disk_over_commit')
self.assertEqual('pre-migrating', migration.status)
self.assertEqual('destination', migration.dest_compute)
self.assertEqual(inst_obj.host, migration.source_compute)
def _test_cold_migrate(self, clean_shutdown=True):
self.mox.StubOutWithMock(utils, 'get_image_from_system_metadata')
self.mox.StubOutWithMock(scheduler_utils, 'build_request_spec')
@ -1521,7 +1529,8 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
'uuid': instance['uuid'], },
}
def _test_migrate_server_deals_with_expected_exceptions(self, ex):
@mock.patch('nova.objects.Migration')
def _test_migrate_server_deals_with_expected_exceptions(self, ex, migobj):
instance = fake_instance.fake_db_instance(uuid='uuid',
vm_state=vm_states.ACTIVE)
inst_obj = objects.Instance._from_db_object(
@ -1530,9 +1539,11 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
self.mox.StubOutWithMock(scheduler_utils,
'set_vm_state_and_notify')
migration = migobj()
live_migrate.execute(self.context, mox.IsA(objects.Instance),
'destination', 'block_migration',
'disk_over_commit').AndRaise(ex)
'disk_over_commit', migration).AndRaise(ex)
scheduler_utils.set_vm_state_and_notify(self.context,
inst_obj.uuid,
@ -1551,6 +1562,8 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
{'host': 'destination'}, True, False, None, 'block_migration',
'disk_over_commit')
self.assertEqual('error', migration.status)
def test_migrate_server_deals_with_invalidcpuinfo_exception(self):
instance = fake_instance.fake_db_instance(uuid='uuid',
vm_state=vm_states.ACTIVE)
@ -1563,7 +1576,8 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
ex = exc.InvalidCPUInfo(reason="invalid cpu info.")
live_migrate.execute(self.context, mox.IsA(objects.Instance),
'destination', 'block_migration',
'disk_over_commit').AndRaise(ex)
'disk_over_commit',
mox.IsA(objects.Migration)).AndRaise(ex)
scheduler_utils.set_vm_state_and_notify(self.context,
inst_obj.uuid,