Add revert_snapshot_based_resize_at_dest compute method

This adds the revert_snapshot_based_resize_at_dest() compute
service method which will be called from conductor during
a cross-cell resize revert operation. This runs on the dest
host in the target cell and is similar to
confirm_snapshot_based_resize_at_source() except it destroys
the guest and works on dropping the new_flavor resource usage
rather than the old_flavor. It also sends the legacy
"compute.instance.exists" notification like a traditional
revert_resize().

As part of this cleanup, the dest host volume attachments
are deleted. The source host port bindings, which are inactive
when this runs, are activated and then the (previously active)
dest host port bindings are deleted. This is to make sure the
ports are not left unbound after the revert.

Part of blueprint cross-cell-resize

Change-Id: I200c51acbbda767c2d8ff90103eef54dbf35fb01
This commit is contained in:
Matt Riedemann 2019-02-18 15:58:42 -05:00
parent 386aa315a4
commit 11b7bcd947
5 changed files with 317 additions and 2 deletions

View File

@ -560,7 +560,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='5.8')
target = messaging.Target(version='5.9')
def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
@ -4579,6 +4579,113 @@ class ComputeManager(manager.Manager):
'Error: %s', bdm.attachment_id, six.text_type(e),
instance_uuid=bdm.instance_uuid)
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@errors_out_migration
@wrap_instance_fault
def revert_snapshot_based_resize_at_dest(self, ctxt, instance, migration):
"""Reverts a snapshot-based resize at the destination host.
Cleans the guest from the destination compute service host hypervisor
and related resources (ports, volumes) and frees resource usage from
the compute service on that host.
:param ctxt: nova auth request context targeted at the target cell
:param instance: Instance object whose vm_state is "resized" and
task_state is "resize_reverting".
:param migration: Migration object whose status is "reverting".
"""
# A resize revert is essentially a resize back to the old size, so we
# need to send a usage event here.
compute_utils.notify_usage_exists(
self.notifier, ctxt, instance, self.host, current_period=True)
@utils.synchronized(instance.uuid)
def do_revert():
LOG.info('Reverting resize on destination host.',
instance=instance)
with self._error_out_instance_on_exception(ctxt, instance):
self._revert_snapshot_based_resize_at_dest(
ctxt, instance, migration)
do_revert()
# Broadcast to all schedulers that the instance is no longer on
# this host and clear any waiting callback events. This is best effort
# so if anything fails just log it.
try:
self._delete_scheduler_instance_info(ctxt, instance.uuid)
self.instance_events.clear_events_for_instance(instance)
except Exception as e:
LOG.warning('revert_snapshot_based_resize_at_dest failed during '
'post-processing. Error: %s', e, instance=instance)
def _revert_snapshot_based_resize_at_dest(
self, ctxt, instance, migration):
"""Private version of revert_snapshot_based_resize_at_dest.
This allows the main method to be decorated with error handlers.
:param ctxt: nova auth request context targeted at the target cell
:param instance: Instance object whose vm_state is "resized" and
task_state is "resize_reverting".
:param migration: Migration object whose status is "reverting".
"""
# Cleanup the guest from the hypervisor including local disks.
network_info = self.network_api.get_instance_nw_info(ctxt, instance)
bdms = instance.get_bdms()
block_device_info = self._get_instance_block_device_info(
ctxt, instance, bdms=bdms)
LOG.debug('Destroying guest from destination hypervisor including '
'disks.', instance=instance)
self.driver.destroy(
ctxt, instance, network_info, block_device_info=block_device_info)
# Activate source host port bindings. We need to do this before
# deleting the (active) dest host port bindings in
# setup_networks_on_host otherwise the ports will be unbound and
# finish on the source will fail.
# migrate_instance_start uses migration.dest_compute for the port
# binding host and since we want to activate the source host port
# bindings, we need to temporarily mutate the migration object.
with utils.temporary_mutation(
migration, dest_compute=migration.source_compute):
LOG.debug('Activating port bindings for source host %s.',
migration.source_compute, instance=instance)
# TODO(mriedem): https://review.opendev.org/#/c/594139/ would allow
# us to remove this and make setup_networks_on_host do it.
# TODO(mriedem): Should we try/except/log any errors but continue?
self.network_api.migrate_instance_start(
ctxt, instance, migration)
# Delete port bindings for the target host. This relies on the
# instance.host not being the same as the host we pass in, so we
# have to mutate the instance to effectively trick this code.
with utils.temporary_mutation(instance, host=migration.source_compute):
LOG.debug('Deleting port bindings for target host %s.',
self.host, instance=instance)
try:
# Note that deleting the destination host port bindings does
# not automatically activate the source host port bindings.
self.network_api.setup_networks_on_host(
ctxt, instance, host=self.host, teardown=True)
except exception.PortBindingDeletionFailed as e:
# Do not let this stop us from cleaning up since the guest
# is already gone.
LOG.error('Failed to delete port bindings from target host. '
'Error: %s', six.text_type(e), instance=instance)
# Delete any volume attachments remaining for this target host.
LOG.debug('Deleting volume attachments for target host.',
instance=instance)
self._delete_volume_attachments(ctxt, bdms)
# Free up the new_flavor usage from the resource tracker for this host.
instance.revert_migration_context()
instance.save(expected_task_state=task_states.RESIZE_REVERTING)
self.rt.drop_move_claim(ctxt, instance, instance.node,
instance_type=instance.new_flavor)
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')

View File

@ -374,6 +374,7 @@ class ComputeAPI(object):
* 5.6 - Add prep_snapshot_based_resize_at_source()
* 5.7 - Add finish_snapshot_based_resize_at_dest()
* 5.8 - Add confirm_snapshot_based_resize_at_source()
* 5.9 - Add revert_snapshot_based_resize_at_dest()
'''
VERSION_ALIASES = {
@ -1133,6 +1134,37 @@ class ComputeAPI(object):
server=_compute_host(host, instance), version=version)
cctxt.cast(ctxt, 'revert_resize', **msg_args)
def revert_snapshot_based_resize_at_dest(self, ctxt, instance, migration):
"""Reverts a snapshot-based resize at the destination host.
Cleans the guest from the destination compute service host hypervisor
and related resources (ports, volumes) and frees resource usage from
the compute service on that host.
This is a synchronous RPC call using the ``long_rpc_timeout``
configuration option.
:param ctxt: nova auth request context targeted at the target cell
:param instance: Instance object whose vm_state is "resized" and
task_state is "resize_reverting".
:param migration: Migration object whose status is "reverting".
:raises: nova.exception.MigrationError if the destination compute
service is too old to perform the operation
:raises: oslo_messaging.exceptions.MessagingTimeout if the RPC call
times out
"""
version = '5.9'
client = self.router.client(ctxt)
if not client.can_send_version(version):
raise exception.MigrationError(reason=_('Compute too old'))
cctxt = client.prepare(server=migration.dest_compute,
version=version,
call_monitor_timeout=CONF.rpc_response_timeout,
timeout=CONF.long_rpc_timeout)
return cctxt.call(
ctxt, 'revert_snapshot_based_resize_at_dest',
instance=instance, migration=migration)
def rollback_live_migration_at_destination(self, ctxt, instance, host,
destroy_disks,
migrate_data):

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 45
SERVICE_VERSION = 46
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -171,6 +171,8 @@ SERVICE_VERSION_HISTORY = (
{'compute_rpc': '5.7'},
# Version 45: Compute RPC v5.8: confirm_snapshot_based_resize_at_source
{'compute_rpc': '5.8'},
# Version 46: Compute RPC v5.9: revert_snapshot_based_resize_at_dest
{'compute_rpc': '5.9'},
)

View File

@ -11114,6 +11114,153 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
self.assertIn('Failed to delete volume attachment with ID %s' %
uuids.attachment1, self.stdlog.logger.output)
@mock.patch('nova.compute.utils.notify_usage_exists')
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.compute.utils.add_instance_fault_from_exc')
@mock.patch('nova.compute.manager.InstanceEvents.'
'clear_events_for_instance')
def test_revert_snapshot_based_resize_at_dest_error_handling(
self, mock_clear_events, mock_add_fault, mock_inst_save,
mock_notify_usage):
"""Tests error handling in revert_snapshot_based_resize_at_dest when
a failure occurs.
"""
self.instance.task_state = task_states.RESIZE_REVERTING
error = test.TestingException('oops')
with mock.patch.object(
self.compute, '_revert_snapshot_based_resize_at_dest',
side_effect=error) as mock_revert:
self.assertRaises(
test.TestingException,
self.compute.revert_snapshot_based_resize_at_dest,
self.context, self.instance, self.migration)
mock_notify_usage.assert_called_once_with(
self.compute.notifier, self.context, self.instance,
self.compute.host, current_period=True)
mock_revert.assert_called_once_with(
self.context, self.instance, self.migration)
mock_inst_save.assert_called()
# _error_out_instance_on_exception sets the instance to ERROR.
self.assertEqual(vm_states.ERROR, self.instance.vm_state)
# reverts_task_state will reset the task_state to None.
self.assertIsNone(self.instance.task_state)
# Ensure wrap_instance_fault was called.
mock_add_fault.assert_called_once_with(
self.context, self.instance, error, test.MatchType(tuple))
# errors_out_migration should mark the migration as 'error' status
self.assertEqual('error', self.migration.status)
self.migration.save.assert_called_once_with()
# Assert wrap_exception is called.
self.assertEqual(1, len(fake_notifier.VERSIONED_NOTIFICATIONS))
self.assertEqual(
'compute.%s' % fields.NotificationAction.EXCEPTION,
fake_notifier.VERSIONED_NOTIFICATIONS[0]['event_type'])
# clear_events_for_instance should not have been called.
mock_clear_events.assert_not_called()
@mock.patch('nova.compute.utils.notify_usage_exists', new=mock.Mock())
@mock.patch('nova.compute.manager.ComputeManager.'
'_revert_snapshot_based_resize_at_dest')
def test_revert_snapshot_based_resize_at_dest_post_error_log(self, revert):
"""Tests when _revert_snapshot_based_resize_at_dest is OK but
post-processing cleanup fails and is just logged.
"""
# First test _delete_scheduler_instance_info failing.
with mock.patch.object(
self.compute, '_delete_scheduler_instance_info',
side_effect=(
test.TestingException('scheduler'), None)) as mock_del:
self.compute.revert_snapshot_based_resize_at_dest(
self.context, self.instance, self.migration)
revert.assert_called_once()
mock_del.assert_called_once_with(self.context, self.instance.uuid)
self.assertIn('revert_snapshot_based_resize_at_dest failed during '
'post-processing. Error: scheduler',
self.stdlog.logger.output)
revert.reset_mock()
mock_del.reset_mock()
# Now test clear_events_for_instance failing.
with mock.patch.object(
self.compute.instance_events, 'clear_events_for_instance',
side_effect=test.TestingException(
'events')) as mock_events:
self.compute.revert_snapshot_based_resize_at_dest(
self.context, self.instance, self.migration)
revert.assert_called_once()
mock_del.assert_called_once_with(self.context, self.instance.uuid)
mock_events.assert_called_once_with(self.instance)
self.assertIn('revert_snapshot_based_resize_at_dest failed during '
'post-processing. Error: events',
self.stdlog.logger.output)
# Assert _error_out_instance_on_exception wasn't tripped somehow.
self.assertNotEqual(vm_states.ERROR, self.instance.vm_state)
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.objects.Instance.revert_migration_context')
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
def test_revert_snapshot_based_resize_at_dest(
self, mock_get_bdms, mock_revert_mig_ctxt, mock_inst_save):
"""Happy path test for _revert_snapshot_based_resize_at_dest"""
# Setup more mocks.
def stub_migrate_instance_start(ctxt, instance, migration):
# The migration.dest_compute should have been mutated to point
# at the source compute.
self.assertEqual(migration.source_compute, migration.dest_compute)
def stub_setup_networks_on_host(ctxt, instance, *args, **kwargs):
# The instance.host should have been mutated to point at the
# source compute.
self.assertEqual(self.migration.source_compute, instance.host)
# Raise PortBindingDeletionFailed to make sure it's caught and
# logged but not fatal.
raise exception.PortBindingDeletionFailed(port_id=uuids.port_id,
host=self.compute.host)
with test.nested(
mock.patch.object(self.compute, 'network_api'),
mock.patch.object(self.compute, '_get_instance_block_device_info'),
mock.patch.object(self.compute.driver, 'destroy'),
mock.patch.object(self.compute, '_delete_volume_attachments'),
mock.patch.object(self.compute.rt, 'drop_move_claim')
) as (
mock_network_api, mock_get_bdi, mock_destroy,
mock_delete_attachments, mock_drop_claim
):
mock_network_api.migrate_instance_start.side_effect = \
stub_migrate_instance_start
mock_network_api.setup_networks_on_host.side_effect = \
stub_setup_networks_on_host
# Run the code.
self.compute._revert_snapshot_based_resize_at_dest(
self.context, self.instance, self.migration)
# Assert the calls.
mock_network_api.get_instance_nw_info.assert_called_once_with(
self.context, self.instance)
mock_get_bdi.assert_called_once_with(
self.context, self.instance, bdms=mock_get_bdms.return_value)
mock_destroy.assert_called_once_with(
self.context, self.instance,
mock_network_api.get_instance_nw_info.return_value,
block_device_info=mock_get_bdi.return_value)
mock_network_api.migrate_instance_start.assert_called_once_with(
self.context, self.instance, self.migration)
mock_network_api.setup_networks_on_host.assert_called_once_with(
self.context, self.instance, host=self.compute.host,
teardown=True)
# Assert that even though setup_networks_on_host raised
# PortBindingDeletionFailed it was handled and logged.
self.assertIn('Failed to delete port bindings from target host.',
self.stdlog.logger.output)
mock_delete_attachments.assert_called_once_with(
self.context, mock_get_bdms.return_value)
mock_revert_mig_ctxt.assert_called_once_with()
mock_inst_save.assert_called_once_with(
expected_task_state=task_states.RESIZE_REVERTING)
mock_drop_claim.assert_called_once_with(
self.context, self.instance, self.instance.node,
instance_type=self.instance.new_flavor)
class ComputeManagerInstanceUsageAuditTestCase(test.TestCase):
def setUp(self):

View File

@ -628,6 +628,33 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
migration=migration_obj.Migration(source_compute='source'))
self.assertIn('Compute too old', six.text_type(ex))
def test_revert_snapshot_based_resize_at_dest(self):
"""Tests happy path for revert_snapshot_based_resize_at_dest."""
self.flags(long_rpc_timeout=1234)
self._test_compute_api(
'revert_snapshot_based_resize_at_dest', 'call',
# compute method kwargs
instance=self.fake_instance_obj,
migration=migration_obj.Migration(dest_compute='dest'),
# client.prepare kwargs
version='5.9', prepare_server='dest',
call_monitor_timeout=60, timeout=1234)
@mock.patch('nova.rpc.ClientRouter.client')
def test_revert_snapshot_based_resize_at_dest_old_compute(self, client):
"""Tests when the dest compute service is too old to call
revert_snapshot_based_resize_at_dest so MigrationError is raised.
"""
client.return_value.can_send_version.return_value = False
rpcapi = compute_rpcapi.ComputeAPI()
ex = self.assertRaises(
exception.MigrationError,
rpcapi.revert_snapshot_based_resize_at_dest,
self.context,
instance=self.fake_instance_obj,
migration=migration_obj.Migration(dest_compute='dest'))
self.assertIn('Compute too old', six.text_type(ex))
def test_reboot_instance(self):
self.maxDiff = None
self._test_compute_api('reboot_instance', 'cast',