Merge "Add finish_revert_snapshot_based_resize_at_source compute method"

This commit is contained in:
Zuul
2019-12-14 01:59:15 +00:00
committed by Gerrit Code Review
6 changed files with 461 additions and 10 deletions

View File

@@ -560,7 +560,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='5.9')
target = messaging.Target(version='5.10')
def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
@@ -4634,6 +4634,153 @@ class ComputeManager(manager.Manager):
self.rt.drop_move_claim(ctxt, instance, instance.node,
instance_type=instance.new_flavor)
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@errors_out_migration
@wrap_instance_fault
def finish_revert_snapshot_based_resize_at_source(
self, ctxt, instance, migration):
"""Reverts a snapshot-based resize at the source host.
Spawn the guest and re-connect volumes/VIFs on the source host and
revert the instance to use the old_flavor for resource usage reporting.
Updates allocations in the placement service to move the source node
allocations, held by the migration record, to the instance and drop
the allocations held by the instance on the destination node.
:param ctxt: nova auth request context targeted at the target cell
:param instance: Instance object whose vm_state is "resized" and
task_state is "resize_reverting".
:param migration: Migration object whose status is "reverting".
"""
@utils.synchronized(instance.uuid)
def do_revert():
LOG.info('Reverting resize on source host.', instance=instance)
with self._error_out_instance_on_exception(ctxt, instance):
self._finish_revert_snapshot_based_resize_at_source(
ctxt, instance, migration)
do_revert()
# Broadcast to all schedulers that the instance is on this host.
# This is best effort so if anything fails just log it.
try:
self._update_scheduler_instance_info(ctxt, instance)
except Exception as e:
LOG.warning('finish_revert_snapshot_based_resize_at_source failed '
'during post-processing. Error: %s', e,
instance=instance)
def _finish_revert_snapshot_based_resize_at_source(
self, ctxt, instance, migration):
"""Private version of finish_revert_snapshot_based_resize_at_source.
This allows the main method to be decorated with error handlers.
:param ctxt: nova auth request context targeted at the source cell
:param instance: Instance object whose vm_state is "resized" and
task_state is "resize_reverting".
:param migration: Migration object whose status is "reverting".
"""
# Delete stashed old_vm_state information. We will use this to
# determine if the guest should be powered on when we spawn it.
old_vm_state = instance.system_metadata.pop(
'old_vm_state', vm_states.ACTIVE)
# Update instance host/node and flavor-related fields. After this
# if anything fails the instance will get rebuilt/rebooted on this
# host.
self._finish_revert_resize_update_instance_flavor_host_node(
instance, migration)
# Move the allocations against the source compute node resource
# provider, held by the migration, to the instance which will drop
# the destination compute node resource provider allocations held by
# the instance. This puts the allocations against the source node
# back to the old_flavor and owned by the instance.
try:
self._revert_allocation(ctxt, instance, migration)
except exception.AllocationMoveFailed:
# Log the error but do not re-raise because we want to continue to
# process ports and volumes below.
LOG.error('Reverting allocation in placement for migration '
'%(migration_uuid)s failed. You may need to manually '
'remove the allocations for the migration consumer '
'against the source node resource provider '
'%(source_provider)s and the allocations for the '
'instance consumer against the destination node '
'resource provider %(dest_provider)s and then run the '
'"nova-manage placement heal_allocations" command.',
{'instance_uuid': instance.uuid,
'migration_uuid': migration.uuid,
'source_provider': migration.source_node,
'dest_provider': migration.dest_node},
instance=instance)
bdms = instance.get_bdms()
# prep_snapshot_based_resize_at_source created empty volume attachments
# that we need to update here to get the connection_info before calling
# driver.finish_revert_migration which will connect the volumes to this
# host.
LOG.debug('Updating volume attachments for target host %s.',
self.host, instance=instance)
# TODO(mriedem): We should probably make _update_volume_attachments
# (optionally) graceful to errors so we (1) try to process all
# attachments and (2) continue to process networking below.
self._update_volume_attachments(ctxt, instance, bdms)
LOG.debug('Updating port bindings for source host %s.',
self.host, instance=instance)
# TODO(mriedem): Calculate provider mappings when we support
# cross-cell resize/migrate with ports having resource requests.
self._finish_revert_resize_network_migrate_finish(
ctxt, instance, migration, provider_mappings=None)
network_info = self.network_api.get_instance_nw_info(ctxt, instance)
# Remember that prep_snapshot_based_resize_at_source destroyed the
# guest but left the disks intact so we cannot call spawn() here but
# finish_revert_migration should do the job.
block_device_info = self._get_instance_block_device_info(
ctxt, instance, bdms=bdms)
power_on = old_vm_state == vm_states.ACTIVE
driver_error = None
try:
self.driver.finish_revert_migration(
ctxt, instance, network_info, migration,
block_device_info=block_device_info, power_on=power_on)
except Exception as e:
driver_error = e
# Leave a hint about hard rebooting the guest and reraise so the
# instance is put into ERROR state.
with excutils.save_and_reraise_exception(logger=LOG):
LOG.error('An error occurred during finish_revert_migration. '
'The instance may need to be hard rebooted. Error: '
'%s', driver_error, instance=instance)
else:
# Perform final cleanup of the instance in the database.
instance.drop_migration_context()
# If the original vm_state was STOPPED, set it back to STOPPED.
vm_state = vm_states.ACTIVE if power_on else vm_states.STOPPED
self._update_instance_after_spawn(
ctxt, instance, vm_state=vm_state)
instance.save(expected_task_state=[task_states.RESIZE_REVERTING])
finally:
# Complete any volume attachments so the volumes are in-use. We
# do this regardless of finish_revert_migration failing because
# the instance is back on this host now and we do not want to leave
# the volumes in a pending state in case the instance is hard
# rebooted.
LOG.debug('Completing volume attachments for instance on source '
'host.', instance=instance)
with excutils.save_and_reraise_exception(
reraise=driver_error is not None, logger=LOG):
self._complete_volume_attachments(ctxt, bdms)
migration.status = 'reverted'
migration.save()
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@@ -4737,6 +4884,27 @@ class ComputeManager(manager.Manager):
LOG.error('Timeout waiting for Neutron events: %s', events,
instance=instance)
def _finish_revert_resize_update_instance_flavor_host_node(self, instance,
migration):
"""Updates host/node and flavor-related fields on the instance.
This is used when finish the revert resize operation on the source
host and updates the instance flavor-related fields back to the old
flavor and then nulls out the old/new_flavor fields.
The instance host/node fields are also set back to the source compute
host/node.
:param instance: Instance object
:param migration: Migration object
"""
self._set_instance_info(instance, instance.old_flavor)
instance.old_flavor = None
instance.new_flavor = None
instance.host = migration.source_compute
instance.node = migration.source_node
instance.save(expected_task_state=[task_states.RESIZE_REVERTING])
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@@ -4765,12 +4933,8 @@ class ComputeManager(manager.Manager):
old_vm_state = instance.system_metadata.pop('old_vm_state',
vm_states.ACTIVE)
self._set_instance_info(instance, instance.old_flavor)
instance.old_flavor = None
instance.new_flavor = None
instance.host = migration.source_compute
instance.node = migration.source_node
instance.save()
self._finish_revert_resize_update_instance_flavor_host_node(
instance, migration)
try:
source_allocations = self._revert_allocation(

View File

@@ -375,6 +375,7 @@ class ComputeAPI(object):
* 5.7 - Add finish_snapshot_based_resize_at_dest()
* 5.8 - Add confirm_snapshot_based_resize_at_source()
* 5.9 - Add revert_snapshot_based_resize_at_dest()
* 5.10 - Add finish_revert_snapshot_based_resize_at_source()
'''
VERSION_ALIASES = {
@@ -732,6 +733,41 @@ class ComputeAPI(object):
instance=instance, migration=migration, snapshot_id=snapshot_id,
request_spec=request_spec)
def finish_revert_snapshot_based_resize_at_source(
self, ctxt, instance, migration):
"""Reverts a snapshot-based resize at the source host.
Spawn the guest and re-connect volumes/VIFs on the source host and
revert the instance to use the old_flavor for resource usage reporting.
Updates allocations in the placement service to move the source node
allocations, held by the migration record, to the instance and drop
the allocations held by the instance on the destination node.
This is a synchronous RPC call using the ``long_rpc_timeout``
configuration option.
:param ctxt: nova auth request context targeted at the source cell
:param instance: Instance object whose vm_state is "resized" and
task_state is "resize_reverting".
:param migration: Migration object whose status is "reverting".
:raises: nova.exception.MigrationError if the source compute is too
old to perform the operation
:raises: oslo_messaging.exceptions.MessagingTimeout if the RPC call
times out
"""
version = '5.10'
client = self.router.client(ctxt)
if not client.can_send_version(version):
raise exception.MigrationError(reason=_('Compute too old'))
cctxt = client.prepare(server=migration.source_compute,
version=version,
call_monitor_timeout=CONF.rpc_response_timeout,
timeout=CONF.long_rpc_timeout)
return cctxt.call(
ctxt, 'finish_revert_snapshot_based_resize_at_source',
instance=instance, migration=migration)
def get_console_output(self, ctxt, instance, tail_length):
version = '5.0'
cctxt = self.router.client(ctxt).prepare(

View File

@@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 46
SERVICE_VERSION = 47
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@@ -173,6 +173,9 @@ SERVICE_VERSION_HISTORY = (
{'compute_rpc': '5.8'},
# Version 46: Compute RPC v5.9: revert_snapshot_based_resize_at_dest
{'compute_rpc': '5.9'},
# Version 47: Compute RPC v5.10:
# finish_revert_snapshot_based_resize_at_source
{'compute_rpc': '5.10'},
)

View File

@@ -7773,7 +7773,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
instance_uuid=self.instance.uuid,
new_instance_type_id=7,
dest_compute='dest_compute',
dest_node=None,
dest_node='dest_node',
dest_host=None,
source_compute='source_compute',
source_node='source_node',
@@ -11262,6 +11262,223 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
self.context, self.instance, self.instance.node,
instance_type=self.instance.new_flavor)
@mock.patch('nova.compute.manager.ComputeManager.'
'_finish_revert_snapshot_based_resize_at_source')
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.compute.manager.ComputeManager.'
'_update_scheduler_instance_info')
@mock.patch('nova.compute.utils.add_instance_fault_from_exc')
def test_finish_revert_snapshot_based_resize_at_source_error_handling(
self, mock_add_fault, mock_update_sched, mock_inst_save,
mock_finish_revert):
"""Tests error handling (context managers, decorators, post-processing)
in finish_revert_snapshot_based_resize_at_source.
"""
self.instance.task_state = task_states.RESIZE_REVERTING
# First make _finish_revert_snapshot_based_resize_at_source fail.
error = test.TestingException('oops')
mock_finish_revert.side_effect = error
self.assertRaises(
test.TestingException,
self.compute.finish_revert_snapshot_based_resize_at_source,
self.context, self.instance, self.migration)
mock_finish_revert.assert_called_once_with(
self.context, self.instance, self.migration)
# _error_out_instance_on_exception should have set the instance status
# to ERROR.
mock_inst_save.assert_called()
self.assertEqual(vm_states.ERROR, self.instance.vm_state)
# We should not have updated the scheduler since we failed.
mock_update_sched.assert_not_called()
# reverts_task_state should have set the task_state to None.
self.assertIsNone(self.instance.task_state)
# errors_out_migration should have set the migration status to error.
self.migration.save.assert_called_once_with()
self.assertEqual('error', self.migration.status)
# wrap_instance_fault should have recorded a fault.
mock_add_fault.assert_called_once_with(
self.context, self.instance, error, test.MatchType(tuple))
# wrap_exception should have sent an error notification.
self.assertEqual(1, len(fake_notifier.VERSIONED_NOTIFICATIONS))
self.assertEqual(
'compute.%s' % fields.NotificationAction.EXCEPTION,
fake_notifier.VERSIONED_NOTIFICATIONS[0]['event_type'])
# Now run it again but _finish_revert_snapshot_based_resize_at_source
# will pass and _update_scheduler_instance_info will fail but not be
# fatal (just logged).
mock_finish_revert.reset_mock(side_effect=True) # reset side_effect
mock_update_sched.side_effect = test.TestingException('scheduler oops')
self.compute.finish_revert_snapshot_based_resize_at_source(
self.context, self.instance, self.migration)
mock_finish_revert.assert_called_once_with(
self.context, self.instance, self.migration)
mock_update_sched.assert_called_once_with(self.context, self.instance)
self.assertIn('finish_revert_snapshot_based_resize_at_source failed '
'during post-processing. Error: scheduler oops',
self.stdlog.logger.output)
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.compute.manager.ComputeManager._revert_allocation',
side_effect=exception.AllocationMoveFailed('placement down'))
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
@mock.patch('nova.compute.manager.ComputeManager.'
'_update_volume_attachments')
@mock.patch('nova.compute.manager.ComputeManager.'
'_finish_revert_resize_network_migrate_finish')
@mock.patch('nova.compute.manager.ComputeManager.'
'_get_instance_block_device_info')
@mock.patch('nova.objects.Instance.drop_migration_context')
@mock.patch('nova.compute.manager.ComputeManager.'
'_update_instance_after_spawn')
@mock.patch('nova.compute.manager.ComputeManager.'
'_complete_volume_attachments')
def test_finish_revert_snapshot_based_resize_at_source(
self, mock_complete_attachments, mock_update_after_spawn,
mock_drop_mig_context, mock_get_bdi, mock_net_migrate_finish,
mock_update_attachments, mock_get_bdms, mock_revert_allocs,
mock_inst_save):
"""Happy path test for finish_revert_snapshot_based_resize_at_source.
Also makes sure some cleanups that are handled gracefully do not raise.
"""
# Make _update_scheduler_instance_info a no-op.
self.flags(track_instance_changes=False, group='filter_scheduler')
# Configure the instance with old_vm_state = STOPPED so the guest is
# created but not powered on.
self.instance.system_metadata['old_vm_state'] = vm_states.STOPPED
# Configure the instance with an old_flavor for the revert.
old_flavor = fake_flavor.fake_flavor_obj(self.context)
self.instance.old_flavor = old_flavor
with test.nested(
mock.patch.object(self.compute.network_api,
'get_instance_nw_info'),
mock.patch.object(self.compute.driver, 'finish_revert_migration')
) as (
mock_get_nw_info, mock_driver_finish
):
# Run the code.
self.compute.finish_revert_snapshot_based_resize_at_source(
self.context, self.instance, self.migration)
# Assert the migration status was updated.
self.migration.save.assert_called_once_with()
self.assertEqual('reverted', self.migration.status)
# Assert the instance host/node and flavor info was updated.
self.assertEqual(self.migration.source_compute, self.instance.host)
self.assertEqual(self.migration.source_node, self.instance.node)
self.assertIs(self.instance.flavor, old_flavor)
self.assertEqual(old_flavor.id, self.instance.instance_type_id)
# Assert _revert_allocation was called, raised and logged the error.
mock_revert_allocs.assert_called_once_with(
self.context, self.instance, self.migration)
self.assertIn('Reverting allocation in placement for migration %s '
'failed.' % self.migration.uuid,
self.stdlog.logger.output)
# Assert that volume attachments were updated.
mock_update_attachments.assert_called_once_with(
self.context, self.instance, mock_get_bdms.return_value)
# Assert that port bindings were updated to point at the source host.
mock_net_migrate_finish.assert_called_once_with(
self.context, self.instance, self.migration,
provider_mappings=None)
# Assert the driver finished reverting the migration.
mock_get_bdi.assert_called_once_with(
self.context, self.instance, bdms=mock_get_bdms.return_value)
mock_driver_finish.assert_called_once_with(
self.context, self.instance, mock_get_nw_info.return_value,
self.migration, block_device_info=mock_get_bdi.return_value,
power_on=False)
# Assert final DB cleanup for the instance.
mock_drop_mig_context.assert_called_once_with()
mock_update_after_spawn.assert_called_once_with(
self.context, self.instance, vm_state=vm_states.STOPPED)
mock_inst_save.assert_has_calls([
mock.call(expected_task_state=[task_states.RESIZE_REVERTING])] * 2)
# And finally that the volume attachments were completed.
mock_complete_attachments.assert_called_once_with(
self.context, mock_get_bdms.return_value)
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.compute.manager.ComputeManager._revert_allocation')
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
@mock.patch('nova.compute.manager.ComputeManager.'
'_update_volume_attachments')
@mock.patch('nova.compute.manager.ComputeManager.'
'_finish_revert_resize_network_migrate_finish')
@mock.patch('nova.compute.manager.ComputeManager.'
'_get_instance_block_device_info')
@mock.patch('nova.objects.Instance.drop_migration_context')
@mock.patch('nova.compute.manager.ComputeManager.'
'_update_instance_after_spawn')
@mock.patch('nova.compute.manager.ComputeManager.'
'_complete_volume_attachments',
side_effect=test.TestingException('vol complete failed'))
def test_finish_revert_snapshot_based_resize_at_source_driver_fails(
self, mock_complete_attachments, mock_update_after_spawn,
mock_drop_mig_context, mock_get_bdi, mock_net_migrate_finish,
mock_update_attachments, mock_get_bdms, mock_revert_allocs,
mock_inst_save):
"""Test for _finish_revert_snapshot_based_resize_at_source where the
driver call to finish_revert_migration fails.
"""
self.instance.system_metadata['old_vm_state'] = vm_states.ACTIVE
# Configure the instance with an old_flavor for the revert.
old_flavor = fake_flavor.fake_flavor_obj(self.context)
self.instance.old_flavor = old_flavor
with test.nested(
mock.patch.object(self.compute.network_api,
'get_instance_nw_info'),
mock.patch.object(self.compute.driver, 'finish_revert_migration',
side_effect=test.TestingException('driver fail'))
) as (
mock_get_nw_info, mock_driver_finish,
):
# Run the code.
ex = self.assertRaises(
test.TestingException,
self.compute._finish_revert_snapshot_based_resize_at_source,
self.context, self.instance, self.migration)
# Assert the driver call (note power_on=True b/c old_vm_state=active)
mock_get_bdi.assert_called_once_with(
self.context, self.instance, bdms=mock_get_bdms.return_value)
mock_driver_finish.assert_called_once_with(
self.context, self.instance, mock_get_nw_info.return_value,
self.migration, block_device_info=mock_get_bdi.return_value,
power_on=True)
# _complete_volume_attachments is called even though the driver call
# failed.
mock_complete_attachments.assert_called_once_with(
self.context, mock_get_bdms.return_value)
# finish_revert_migration failed but _complete_volume_attachments
# is still called and in this case also fails so the resulting
# exception should be the one from _complete_volume_attachments
# but the finish_revert_migration error should have been logged.
self.assertIn('vol complete failed', six.text_type(ex))
self.assertIn('driver fail', self.stdlog.logger.output)
# Assert the migration status was not updated.
self.migration.save.assert_not_called()
# Assert the instance host/node and flavor info was updated.
self.assertEqual(self.migration.source_compute, self.instance.host)
self.assertEqual(self.migration.source_node, self.instance.node)
self.assertIs(self.instance.flavor, old_flavor)
self.assertEqual(old_flavor.id, self.instance.instance_type_id)
# Assert allocations were reverted.
mock_revert_allocs.assert_called_once_with(
self.context, self.instance, self.migration)
# Assert that volume attachments were updated.
mock_update_attachments.assert_called_once_with(
self.context, self.instance, mock_get_bdms.return_value)
# Assert that port bindings were updated to point at the source host.
mock_net_migrate_finish.assert_called_once_with(
self.context, self.instance, self.migration,
provider_mappings=None)
# Assert final DB cleanup for the instance did not happen.
mock_drop_mig_context.assert_not_called()
mock_update_after_spawn.assert_not_called()
# _finish_revert_resize_update_instance_flavor_host_node updated the
# instance.
mock_inst_save.assert_called_once_with(
expected_task_state=[task_states.RESIZE_REVERTING])
class ComputeManagerInstanceUsageAuditTestCase(test.TestCase):
def setUp(self):

View File

@@ -655,6 +655,36 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
migration=migration_obj.Migration(dest_compute='dest'))
self.assertIn('Compute too old', six.text_type(ex))
def test_finish_revert_snapshot_based_resize_at_source(self):
"""Tests happy path for finish_revert_snapshot_based_resize_at_source.
"""
self.flags(long_rpc_timeout=1234)
self._test_compute_api(
'finish_revert_snapshot_based_resize_at_source', 'call',
# compute method kwargs
instance=self.fake_instance_obj,
migration=migration_obj.Migration(source_compute='source'),
# client.prepare kwargs
version='5.10', prepare_server='source',
call_monitor_timeout=60, timeout=1234)
@mock.patch('nova.rpc.ClientRouter.client')
def test_finish_revert_snapshot_based_resize_at_source_old_compute(
self, client):
"""Tests when the source compute service is too old to call
finish_revert_snapshot_based_resize_at_source so MigrationError is
raised.
"""
client.return_value.can_send_version.return_value = False
rpcapi = compute_rpcapi.ComputeAPI()
ex = self.assertRaises(
exception.MigrationError,
rpcapi.finish_revert_snapshot_based_resize_at_source,
self.context,
instance=self.fake_instance_obj,
migration=migration_obj.Migration(source_compute='source'))
self.assertIn('Compute too old', six.text_type(ex))
def test_reboot_instance(self):
self.maxDiff = None
self._test_compute_api('reboot_instance', 'cast',

View File

@@ -254,8 +254,9 @@ class FakeDriver(driver.ComputeDriver):
def finish_revert_migration(self, context, instance, network_info,
migration, block_device_info=None,
power_on=True):
state = power_state.RUNNING if power_on else power_state.SHUTDOWN
self.instances[instance.uuid] = FakeInstance(
instance.name, power_state.RUNNING, instance.uuid)
instance.name, state, instance.uuid)
def post_live_migration_at_destination(self, context, instance,
network_info,