Merge "Fix use of stale instance data in compute manager"

This commit is contained in:
Jenkins 2013-01-13 12:54:07 +00:00 committed by Gerrit Code Review
commit 59333ce9f3
2 changed files with 152 additions and 180 deletions

View File

@ -709,7 +709,8 @@ class ComputeManager(manager.SchedulerDependentManager):
# Spawn success:
if (is_first_time and not instance['access_ip_v4']
and not instance['access_ip_v6']):
self._update_access_ip(context, instance, network_info)
instance = self._update_access_ip(context, instance,
network_info)
self._notify_about_instance_usage(context, instance,
"create.end", network_info=network_info,
@ -826,7 +827,7 @@ class ComputeManager(manager.SchedulerDependentManager):
network_name = CONF.default_access_ip_network_name
if not network_name:
return
return instance
update_info = {}
for vif in nw_info:
@ -837,7 +838,9 @@ class ComputeManager(manager.SchedulerDependentManager):
if ip['version'] == 6:
update_info['access_ip_v6'] = ip['address']
if update_info:
self._instance_update(context, instance['uuid'], **update_info)
instance = self._instance_update(context, instance['uuid'],
**update_info)
return instance
def _check_instance_not_already_created(self, context, instance):
"""Ensure an instance with the same name is not already present."""
@ -914,10 +917,10 @@ class ComputeManager(manager.SchedulerDependentManager):
def _allocate_network(self, context, instance, requested_networks):
"""Allocate networks for an instance and return the network info."""
self._instance_update(context, instance['uuid'],
vm_state=vm_states.BUILDING,
task_state=task_states.NETWORKING,
expected_task_state=None)
instance = self._instance_update(context, instance['uuid'],
vm_state=vm_states.BUILDING,
task_state=task_states.NETWORKING,
expected_task_state=None)
is_vpn = pipelib.is_vpn_image(instance['image_ref'])
try:
# allocate and get network info
@ -936,9 +939,9 @@ class ComputeManager(manager.SchedulerDependentManager):
def _prep_block_device(self, context, instance, bdms):
"""Set up the block device for an instance with error logging."""
self._instance_update(context, instance['uuid'],
vm_state=vm_states.BUILDING,
task_state=task_states.BLOCK_DEVICE_MAPPING)
instance = self._instance_update(context, instance['uuid'],
vm_state=vm_states.BUILDING,
task_state=task_states.BLOCK_DEVICE_MAPPING)
try:
return self._setup_block_device_mapping(context, instance, bdms)
except Exception:
@ -949,11 +952,10 @@ class ComputeManager(manager.SchedulerDependentManager):
def _spawn(self, context, instance, image_meta, network_info,
block_device_info, injected_files, admin_password):
"""Spawn an instance with error logging and update its power state."""
self._instance_update(context, instance['uuid'],
vm_state=vm_states.BUILDING,
task_state=task_states.SPAWNING,
expected_task_state=task_states.
BLOCK_DEVICE_MAPPING)
instance = self._instance_update(context, instance['uuid'],
vm_state=vm_states.BUILDING,
task_state=task_states.SPAWNING,
expected_task_state=task_states.BLOCK_DEVICE_MAPPING)
try:
self.driver.spawn(context, instance, image_meta,
injected_files, admin_password,
@ -1180,13 +1182,12 @@ class ComputeManager(manager.SchedulerDependentManager):
self._notify_about_instance_usage(context, instance, "power_off.start")
self.driver.power_off(instance)
current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.STOPPED,
expected_task_state=(task_states.POWERING_OFF,
task_states.STOPPING),
task_state=None)
instance = self._instance_update(context, instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.STOPPED,
expected_task_state=(task_states.POWERING_OFF,
task_states.STOPPING),
task_state=None)
self._notify_about_instance_usage(context, instance, "power_off.end")
# NOTE(johannes): This is probably better named power_on_instance
@ -1200,13 +1201,12 @@ class ComputeManager(manager.SchedulerDependentManager):
self._notify_about_instance_usage(context, instance, "power_on.start")
self.driver.power_on(instance)
current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.ACTIVE,
task_state=None,
expected_task_state=(task_states.POWERING_ON,
task_states.STARTING))
instance = self._instance_update(context, instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.ACTIVE,
task_state=None,
expected_task_state=(task_states.POWERING_ON,
task_states.STARTING))
self._notify_about_instance_usage(context, instance, "power_on.end")
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@ -1223,12 +1223,11 @@ class ComputeManager(manager.SchedulerDependentManager):
# doesn't implement the soft_delete method
self.driver.power_off(instance)
current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.SOFT_DELETED,
expected_task_state=task_states.SOFT_DELETING,
task_state=None)
instance = self._instance_update(context, instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.SOFT_DELETED,
expected_task_state=task_states.SOFT_DELETING,
task_state=None)
self._notify_about_instance_usage(context, instance, "soft_delete.end")
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@ -1244,12 +1243,11 @@ class ComputeManager(manager.SchedulerDependentManager):
# doesn't implement the restore method
self.driver.power_on(instance)
current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.ACTIVE,
expected_task_state=task_states.RESTORING,
task_state=None)
instance = self._instance_update(context, instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.ACTIVE,
expected_task_state=task_states.RESTORING,
task_state=None)
self._notify_about_instance_usage(context, instance, "restore.end")
# NOTE(johannes): In the folsom release, power_off_instance was poorly
@ -1357,11 +1355,10 @@ class ComputeManager(manager.SchedulerDependentManager):
"rebuild.start", extra_usage_info=extra_usage_info)
current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
instance['uuid'],
power_state=current_power_state,
task_state=task_states.REBUILDING,
expected_task_state=task_states.REBUILDING)
instance = self._instance_update(context, instance['uuid'],
power_state=current_power_state,
task_state=task_states.REBUILDING,
expected_task_state=task_states.REBUILDING)
if recreate:
# Detaching volumes.
@ -1381,11 +1378,9 @@ class ComputeManager(manager.SchedulerDependentManager):
self.driver.destroy(instance,
self._legacy_nw_info(network_info))
instance = self._instance_update(context,
instance['uuid'],
task_state=task_states.
REBUILD_BLOCK_DEVICE_MAPPING,
expected_task_state=task_states.REBUILDING)
instance = self._instance_update(context, instance['uuid'],
task_state=task_states.REBUILD_BLOCK_DEVICE_MAPPING,
expected_task_state=task_states.REBUILDING)
instance['injected_files'] = injected_files
network_info = self._get_instance_nw_info(context, instance)
@ -1396,14 +1391,11 @@ class ComputeManager(manager.SchedulerDependentManager):
device_info = self._setup_block_device_mapping(context, instance,
bdms)
instance = self._instance_update(context,
instance['uuid'],
task_state=task_states.
REBUILD_SPAWNING,
expected_task_state=task_states.
REBUILD_BLOCK_DEVICE_MAPPING)
# pull in new password here since the original password isn't in
# the db
expected_task_state = task_states.REBUILD_BLOCK_DEVICE_MAPPING
instance = self._instance_update(context, instance['uuid'],
task_state=task_states.REBUILD_SPAWNING,
expected_task_state=expected_task_state)
admin_password = new_pass
self.driver.spawn(context, instance, image_meta,
@ -1510,9 +1502,8 @@ class ComputeManager(manager.SchedulerDependentManager):
context = context.elevated()
current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
instance['uuid'],
power_state=current_power_state)
instance = self._instance_update(context, instance['uuid'],
power_state=current_power_state)
LOG.audit(_('instance snapshotting'), context=context,
instance=instance)
@ -1535,14 +1526,17 @@ class ComputeManager(manager.SchedulerDependentManager):
expected_task_state = task_states.IMAGE_BACKUP
def update_task_state(task_state, expected_state=expected_task_state):
self._instance_update(context, instance['uuid'],
task_state=task_state,
expected_task_state=expected_state)
return self._instance_update(context, instance['uuid'],
task_state=task_state,
expected_task_state=expected_state)
self.driver.snapshot(context, instance, image_id, update_task_state)
# The instance could have changed from the driver. But since
# we're doing a fresh update here, we'll grab the changes.
self._instance_update(context, instance['uuid'], task_state=None,
expected_task_state=task_states.IMAGE_UPLOADING)
instance = self._instance_update(context, instance['uuid'],
task_state=None,
expected_task_state=task_states.IMAGE_UPLOADING)
if image_type == 'snapshot' and rotation:
raise exception.ImageRotationNotAllowed()
@ -1900,18 +1894,15 @@ class ComputeManager(manager.SchedulerDependentManager):
# Just roll back the record. There's no need to resize down since
# the 'old' VM already has the preferred attributes
self._instance_update(context,
instance['uuid'],
launched_at=timeutils.utcnow(),
expected_task_state=task_states.
RESIZE_REVERTING)
instance = self._instance_update(context,
instance['uuid'], launched_at=timeutils.utcnow(),
expected_task_state=task_states.RESIZE_REVERTING)
self.network_api.migrate_instance_finish(context, instance,
migration)
self._instance_update(context, instance['uuid'],
vm_state=vm_states.ACTIVE,
task_state=None)
instance = self._instance_update(context, instance['uuid'],
vm_state=vm_states.ACTIVE, task_state=None)
rt = self._get_resource_tracker(instance.get('node'))
rt.revert_resize(context, migration)
@ -2317,12 +2308,11 @@ class ComputeManager(manager.SchedulerDependentManager):
self.driver.suspend(instance)
current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.SUSPENDED,
task_state=None,
expected_task_state=task_states.SUSPENDING)
instance = self._instance_update(context, instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.SUSPENDED,
task_state=None,
expected_task_state=task_states.SUSPENDING)
self._notify_about_instance_usage(context, instance, 'suspend')
@ -2342,11 +2332,9 @@ class ComputeManager(manager.SchedulerDependentManager):
block_device_info)
current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
instance['uuid'],
power_state=current_power_state,
vm_state=vm_states.ACTIVE,
task_state=None)
instance = self._instance_update(context,
instance['uuid'], power_state=current_power_state,
vm_state=vm_states.ACTIVE, task_state=None)
self._notify_about_instance_usage(context, instance, 'resume')
@ -2859,23 +2847,20 @@ class ComputeManager(manager.SchedulerDependentManager):
block_migration)
# Restore instance state
current_power_state = self._get_power_state(context, instance)
self._instance_update(context,
instance['uuid'],
host=self.host,
power_state=current_power_state,
vm_state=vm_states.ACTIVE,
task_state=None,
expected_task_state=task_states.MIGRATING)
instance = self._instance_update(context, instance['uuid'],
host=self.host, power_state=current_power_state,
vm_state=vm_states.ACTIVE, task_state=None,
expected_task_state=task_states.MIGRATING)
# NOTE(vish): this is necessary to update dhcp
self.network_api.setup_networks_on_host(context, instance, self.host)
def _rollback_live_migration(self, context, instance_ref,
def _rollback_live_migration(self, context, instance,
dest, block_migration, migrate_data=None):
"""Recovers Instance/volume state from migrating -> running.
:param context: security context
:param instance_ref: nova.db.sqlalchemy.models.Instance
:param instance: nova.db.sqlalchemy.models.Instance
:param dest:
This method is called from live migration src host.
This param specifies destination host.
@ -2884,23 +2869,18 @@ class ComputeManager(manager.SchedulerDependentManager):
if not none, contains implementation specific data.
"""
host = instance_ref['host']
self._instance_update(context,
instance_ref['uuid'],
host=host,
vm_state=vm_states.ACTIVE,
task_state=None,
expected_task_state=task_states.MIGRATING)
host = instance['host']
instance = self._instance_update(context, instance['uuid'],
host=host, vm_state=vm_states.ACTIVE,
task_state=None, expected_task_state=task_states.MIGRATING)
# NOTE(tr3buchet): setup networks on source host (really it's re-setup)
self.network_api.setup_networks_on_host(context, instance_ref,
self.host)
self.network_api.setup_networks_on_host(context, instance, self.host)
for bdm in self._get_instance_volume_bdms(context, instance_ref):
for bdm in self._get_instance_volume_bdms(context, instance):
volume_id = bdm['volume_id']
volume = self.volume_api.get(context, volume_id)
self.compute_rpcapi.remove_volume_connection(context, instance_ref,
volume['id'], dest)
self.compute_rpcapi.remove_volume_connection(context, instance,
volume_id, dest)
# Block migration needs empty image at destination host
# before migration starts, so if any failure occurs,
@ -2915,7 +2895,7 @@ class ComputeManager(manager.SchedulerDependentManager):
is_shared_storage = migrate_data.get('is_shared_storage', True)
if block_migration or (is_volume_backed and not is_shared_storage):
self.compute_rpcapi.rollback_live_migration_at_destination(context,
instance_ref, dest)
instance, dest)
def rollback_live_migration_at_destination(self, context, instance):
"""Cleaning up image directory that is created pre_live_migration.

View File

@ -2368,80 +2368,60 @@ class ComputeTestCase(BaseTestCase):
# cleanup
db.instance_destroy(c, instance['uuid'])
def test_live_migration_dest_raises_exception(self):
def test_live_migration_exception_rolls_back(self):
# Confirm exception when pre_live_migration fails.
# creating instance testdata
instance_ref = self._create_fake_instance({'host': 'dummy'})
instance = jsonutils.to_primitive(instance_ref)
inst_uuid = instance['uuid']
inst_id = instance['id']
c = context.get_admin_context()
topic = rpc.queue_get_for(c, CONF.compute_topic, instance['host'])
# creating volume testdata
volume_id = 'fake'
values = {'instance_uuid': inst_uuid, 'device_name': '/dev/vdc',
'delete_on_termination': False, 'volume_id': volume_id}
db.block_device_mapping_create(c, values)
def fake_volume_get(self, context, volume_id):
return {'id': volume_id}
self.stubs.Set(cinder.API, 'get', fake_volume_get)
def fake_instance_update(context, instance_uuid, **updates):
return db.instance_update_and_get_original(context, instance_uuid,
updates)
self.stubs.Set(self.compute, '_instance_update',
fake_instance_update)
src_host = 'fake-src-host'
dest_host = 'fake-dest-host'
instance = dict(uuid='fake_instance', host=src_host,
name='fake-name')
updated_instance = 'fake_updated_instance'
fake_bdms = [dict(volume_id='vol1-id'), dict(volume_id='vol2-id')]
# creating mocks
self.mox.StubOutWithMock(rpc, 'call')
self.mox.StubOutWithMock(self.compute.driver,
'get_instance_disk_info')
self.compute.driver.get_instance_disk_info(instance['name'])
self.mox.StubOutWithMock(self.compute.compute_rpcapi,
'pre_live_migration')
self.compute.compute_rpcapi.pre_live_migration(c,
mox.IsA(instance), True, None, instance['host'],
None).AndRaise(rpc.common.RemoteError('', '', ''))
self.mox.StubOutWithMock(self.compute, '_instance_update')
self.mox.StubOutWithMock(self.compute, '_get_instance_volume_bdms')
self.mox.StubOutWithMock(self.compute.network_api,
'setup_networks_on_host')
self.mox.StubOutWithMock(self.compute.compute_rpcapi,
'remove_volume_connection')
self.mox.StubOutWithMock(self.compute.compute_rpcapi,
'rollback_live_migration_at_destination')
db.instance_update(self.context, instance['uuid'],
{'task_state': task_states.MIGRATING})
# mocks for rollback
rpc.call(c, 'network', {'method': 'setup_networks_on_host',
'args': {'instance_id': inst_id,
'host': self.compute.host,
'teardown': False},
'version': '1.0'}, None)
rpcinst = jsonutils.to_primitive(
db.instance_get_by_uuid(self.context, instance['uuid']))
rpc.call(c, topic,
{"method": "remove_volume_connection",
"args": {'instance': rpcinst,
'volume_id': volume_id},
"version": compute_rpcapi.ComputeAPI.BASE_RPC_API_VERSION},
None)
rpc.cast(c, topic,
{"method": "rollback_live_migration_at_destination",
"args": {'instance': rpcinst},
"version": compute_rpcapi.ComputeAPI.BASE_RPC_API_VERSION})
self.compute.driver.get_instance_disk_info(
instance['name']).AndReturn('fake_disk')
self.compute.compute_rpcapi.pre_live_migration(c,
instance, True, 'fake_disk', dest_host,
None).AndRaise(test.TestingException())
self.compute._instance_update(c, instance['uuid'],
host=src_host, vm_state=vm_states.ACTIVE,
task_state=None,
expected_task_state=task_states.MIGRATING).AndReturn(
updated_instance)
self.compute.network_api.setup_networks_on_host(c,
updated_instance, self.compute.host)
self.compute._get_instance_volume_bdms(c,
updated_instance).AndReturn(fake_bdms)
self.compute.compute_rpcapi.remove_volume_connection(
c, updated_instance, 'vol1-id', dest_host)
self.compute.compute_rpcapi.remove_volume_connection(
c, updated_instance, 'vol2-id', dest_host)
self.compute.compute_rpcapi.rollback_live_migration_at_destination(
c, updated_instance, dest_host)
# start test
self.mox.ReplayAll()
self.assertRaises(rpc_common.RemoteError,
self.assertRaises(test.TestingException,
self.compute.live_migration,
c, dest=instance['host'], block_migration=True,
instance=rpcinst)
# cleanup
for bdms in db.block_device_mapping_get_all_by_instance(
c, inst_uuid):
db.block_device_mapping_destroy(c, bdms['id'])
db.instance_destroy(c, inst_uuid)
c, dest=dest_host, block_migration=True,
instance=instance)
def test_live_migration_works_correctly(self):
# Confirm live_migration() works as expected correctly.
@ -2559,38 +2539,50 @@ class ComputeTestCase(BaseTestCase):
self.compute._post_live_migration(c, inst_ref, dest)
def test_post_live_migration_at_destination(self):
self.mox.StubOutWithMock(self.compute.network_api,
'setup_networks_on_host')
self.mox.StubOutWithMock(self.compute.network_api,
'migrate_instance_finish')
self.mox.StubOutWithMock(self.compute.driver,
'post_live_migration_at_destination')
self.mox.StubOutWithMock(self.compute, '_get_power_state')
self.mox.StubOutWithMock(self.compute, '_instance_update')
params = {'task_state': task_states.MIGRATING,
'power_state': power_state.PAUSED, }
instance = jsonutils.to_primitive(self._create_fake_instance(params))
admin_ctxt = context.get_admin_context()
instance = db.instance_get_by_uuid(admin_ctxt, instance['uuid'])
self.mox.StubOutWithMock(self.compute.network_api,
'setup_networks_on_host')
self.compute.network_api.setup_networks_on_host(admin_ctxt, instance,
self.compute.host)
self.mox.StubOutWithMock(self.compute.network_api,
'migrate_instance_finish')
migration = {'source_compute': instance['host'],
'dest_compute': self.compute.host, }
self.compute.network_api.migrate_instance_finish(admin_ctxt,
instance, migration)
self.mox.StubOutWithMock(self.compute.driver,
'post_live_migration_at_destination')
fake_net_info = []
self.compute.driver.post_live_migration_at_destination(admin_ctxt,
instance,
fake_net_info,
False)
self.compute.network_api.setup_networks_on_host(admin_ctxt, instance,
self.compute.host)
self.compute._get_power_state(admin_ctxt, instance).AndReturn(
'fake_power_state')
updated_instance = 'fake_updated_instance'
self.compute._instance_update(admin_ctxt, instance['uuid'],
host=self.compute.host,
power_state='fake_power_state',
vm_state=vm_states.ACTIVE,
task_state=None,
expected_task_state=task_states.MIGRATING).AndReturn(
updated_instance)
self.compute.network_api.setup_networks_on_host(admin_ctxt,
updated_instance, self.compute.host)
self.mox.ReplayAll()
self.compute.post_live_migration_at_destination(admin_ctxt, instance)
instance = db.instance_get_by_uuid(admin_ctxt, instance['uuid'])
self.assertEqual(instance['host'], self.compute.host)
self.assertEqual(instance['vm_state'], vm_states.ACTIVE)
self.assertEqual(instance['task_state'], None)
def test_run_kill_vm(self):
# Detect when a vm is terminated behind the scenes.