Add ConfirmResizeTask

This adds the ConfirmResizeTask task which will be called
from the API to orchestrate confirming a cross-cell resize.

This task is responsible for cleaning up the source host
and cell including destroying the guest from the source
hypervisor and hard deleting the instance-related records from
the source cell database. It also updates the information
about the instance and migration in the target cell database.

The resize.confirm.(start|end) notifications, which are
traditionally sent from the confirm_resize() method on the
source host, are sent from this task since confirming a cross
cell resize spans multiple cells.

Part of blueprint cross-cell-resize

Change-Id: I5b9d41ef34385689d8da9b3962a1eac759eddf6a
This commit is contained in:
Matt Riedemann
2019-02-14 17:27:41 -05:00
parent e937c5c6c4
commit 0d653f35c4
3 changed files with 397 additions and 1 deletions

View File

@@ -18,6 +18,8 @@ import oslo_messaging as messaging
from oslo_utils import excutils
from nova import availability_zones
from nova.compute import instance_actions
from nova.compute import power_state
from nova.compute import task_states
from nova.compute import utils as compute_utils
from nova.compute import vm_states
@@ -29,6 +31,8 @@ from nova import image as nova_image
from nova import network
from nova.network.neutronv2 import constants as neutron_constants
from nova import objects
from nova.objects import fields
from nova.scheduler import utils as scheduler_utils
from nova.volume import cinder
LOG = logging.getLogger(__name__)
@@ -888,3 +892,212 @@ class CrossCellMigrationTask(base.TaskBase):
self._completed_tasks[task_name].rollback(ex)
except Exception:
LOG.exception('Rollback for task %s failed.', task_name)
def get_instance_from_source_cell(
target_cell_context, source_compute, instance_uuid):
"""Queries the instance from the source cell database.
:param target_cell_context: nova auth request context targeted at the
target cell database
:param source_compute: name of the source compute service host
:param instance_uuid: UUID of the instance
:returns: Instance object from the source cell database.
"""
# We can get the source cell via the host mapping based on the
# source_compute in the migration object.
source_host_mapping = objects.HostMapping.get_by_host(
target_cell_context, source_compute)
source_cell_mapping = source_host_mapping.cell_mapping
# Clone the context targeted at the target cell and then target the
# clone at the source cell.
source_cell_context = copy.copy(target_cell_context)
nova_context.set_target_cell(source_cell_context, source_cell_mapping)
# Now get the instance from the source cell DB using the source
# cell context which will make the source cell instance permanently
# targeted to the source cell database.
return objects.Instance.get_by_uuid(
source_cell_context, instance_uuid,
expected_attrs=['flavor', 'info_cache', 'system_metadata'])
class ConfirmResizeTask(base.TaskBase):
"""Task which orchestrates a cross-cell resize confirm operation
When confirming a cross-cell resize, the instance is in both the source
and target cell databases and on the source and target compute hosts.
The API operation is performed on the target cell instance and it is the
job of this task to cleanup the source cell host and database and
update the status of the instance in the target cell.
This can be called either asynchronously from the API service during a
normal confirmResize server action or synchronously when deleting a server
in VERIFY_RESIZE status.
"""
def __init__(self, context, instance, migration, legacy_notifier,
compute_rpcapi):
"""Initialize this ConfirmResizeTask instance
:param context: nova auth request context targeted at the target cell
:param instance: Instance object in "resized" status from the target
cell
:param migration: Migration object from the target cell for the resize
operation expected to have status "confirming"
:param legacy_notifier: LegacyValidatingNotifier for sending legacy
unversioned notifications
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
"""
super(ConfirmResizeTask, self).__init__(context, instance)
self.migration = migration
self.legacy_notifier = legacy_notifier
self.compute_rpcapi = compute_rpcapi
def _send_resize_confirm_notification(self, instance, phase):
"""Sends an unversioned and versioned resize.confirm.(phase)
notification.
:param instance: The instance whose resize is being confirmed.
:param phase: The phase for the resize.confirm operation (either
"start" or "end").
"""
ctxt = instance._context
# Send the legacy unversioned notification.
compute_utils.notify_about_instance_usage(
self.legacy_notifier, ctxt, instance, 'resize.confirm.%s' % phase)
# Send the versioned notification.
compute_utils.notify_about_instance_action(
ctxt, instance, instance.host, # TODO(mriedem): Use CONF.host?
action=fields.NotificationAction.RESIZE_CONFIRM,
phase=phase)
def _cleanup_source_host(self, source_instance):
"""Cleans up the instance from the source host.
Creates a confirmResize instance action in the source cell DB.
Destroys the guest from the source hypervisor, cleans up networking
and storage and frees up resource usage on the source host.
:param source_instance: Instance object from the source cell DB
"""
ctxt = source_instance._context
# The confirmResize instance action has to be created in the source
# cell database before calling the compute service to properly
# track action events. Note that the API created the same action
# record but on the target cell instance.
objects.InstanceAction.action_start(
ctxt, source_instance.uuid, instance_actions.CONFIRM_RESIZE,
want_result=False)
# Get the Migration record from the source cell database.
source_migration = objects.Migration.get_by_uuid(
ctxt, self.migration.uuid)
LOG.debug('Cleaning up source host %s for cross-cell resize confirm.',
source_migration.source_compute, instance=source_instance)
# The instance.old_flavor field needs to be set before the source
# host drops the MoveClaim in the ResourceTracker.
source_instance.old_flavor = source_instance.flavor
# Use the EventReport context manager to create the same event that
# the source compute will create but in the target cell DB so we do not
# have to explicitly copy it over from source to target DB.
event_name = 'compute_confirm_snapshot_based_resize_at_source'
with compute_utils.EventReporter(
self.context, event_name, source_migration.source_compute,
source_instance.uuid):
self.compute_rpcapi.confirm_snapshot_based_resize_at_source(
ctxt, source_instance, source_migration)
def _finish_confirm_in_target_cell(self):
"""Sets "terminal" states on the migration and instance in target cell.
This is similar to how ``confirm_resize`` works in the compute service
for same-cell resize.
"""
LOG.debug('Updating migration and instance status in target cell DB.',
instance=self.instance)
# Complete the migration confirmation.
self.migration.status = 'confirmed'
self.migration.save()
# Update the target cell instance.
# Delete stashed information for the resize.
self.instance.old_flavor = None
self.instance.new_flavor = None
self.instance.system_metadata.pop('old_vm_state', None)
self._set_vm_and_task_state()
self.instance.drop_migration_context()
# There are multiple possible task_states set on the instance because
# if we are called from the confirmResize instance action the
# task_state should be None, but if we are called from
# _confirm_resize_on_deleting then the instance is being deleted.
self.instance.save(expected_task_state=[
None, task_states.DELETING, task_states.SOFT_DELETING])
def _set_vm_and_task_state(self):
"""Sets the target cell instance vm_state based on the power_state.
The task_state is set to None.
"""
# The old_vm_state could be STOPPED but the user might have manually
# powered up the instance to confirm the resize/migrate, so we need to
# check the current power state on the instance and set the vm_state
# appropriately. We default to ACTIVE because if the power state is
# not SHUTDOWN, we assume the _sync_power_states periodic task in the
# compute service will clean it up.
p_state = self.instance.power_state
if p_state == power_state.SHUTDOWN:
vm_state = vm_states.STOPPED
LOG.debug("Resized/migrated instance is powered off. "
"Setting vm_state to '%s'.", vm_state,
instance=self.instance)
else:
vm_state = vm_states.ACTIVE
self.instance.vm_state = vm_state
self.instance.task_state = None
def _execute(self):
# First get the instance from the source cell so we can cleanup.
source_cell_instance = get_instance_from_source_cell(
self.context, self.migration.source_compute, self.instance.uuid)
# Send the resize.confirm.start notification(s) using the source
# cell instance since we start there.
self._send_resize_confirm_notification(
source_cell_instance, fields.NotificationPhase.START)
# RPC call the source compute to cleanup.
self._cleanup_source_host(source_cell_instance)
# Now we can delete the instance in the source cell database.
LOG.info('Deleting instance record from source cell %s',
source_cell_instance._context.cell_uuid,
instance=source_cell_instance)
# This needs to be a hard delete because we want to be able to resize
# back to this cell without hitting a duplicate entry unique constraint
# error.
source_cell_instance.destroy(hard_delete=True)
# Update the information in the target cell database.
self._finish_confirm_in_target_cell()
# Send the resize.confirm.end notification using the target cell
# instance since we end there.
self._send_resize_confirm_notification(
self.instance, fields.NotificationPhase.END)
def rollback(self, ex):
with excutils.save_and_reraise_exception():
LOG.exception(
'An error occurred while confirming the resize for instance '
'in target cell %s. Depending on the error, a copy of the '
'instance may still exist in the source cell database which '
'contains the source host %s. At this point the instance is '
'on the target host %s and anything left in the source cell '
'can be cleaned up.', self.context.cell_uuid,
self.migration.source_compute, self.migration.dest_compute,
instance=self.instance)
# If anything failed set the migration status to 'error'.
self.migration.status = 'error'
self.migration.save()
# Put the instance in the target DB into ERROR status, record
# a fault and send an error notification.
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
request_spec = objects.RequestSpec.get_by_instance_uuid(
self.context, self.instance.uuid)
scheduler_utils.set_vm_state_and_notify(
self.context, self.instance.uuid, 'compute_task',
'migrate_server', updates, ex, request_spec)

View File

@@ -285,7 +285,7 @@ class SingleCellSimple(fixtures.Fixture):
self._fake_target_cell))
self.useFixture(fixtures.MonkeyPatch(
'nova.context.set_target_cell',
lambda c, m: None))
self._fake_set_target_cell))
def _fake_hostmapping_get(self, *args):
return {'id': 1,
@@ -335,6 +335,14 @@ class SingleCellSimple(fixtures.Fixture):
# targeting anything.
yield context
def _fake_set_target_cell(self, context, cell_mapping):
# Just do something simple and set/unset the cell_uuid on the context.
if cell_mapping:
context.cell_uuid = getattr(cell_mapping, 'uuid',
uuidsentinel.cell1)
else:
context.cell_uuid = None
class CheatingSerializer(rpc.RequestContextSerializer):
"""A messaging.RequestContextSerializer that helps with cells.

View File

@@ -17,6 +17,8 @@ from oslo_messaging import exceptions as messaging_exceptions
from oslo_utils.fixture import uuidsentinel as uuids
import six
from nova.compute import instance_actions
from nova.compute import power_state
from nova.compute import task_states
from nova.compute import utils as compute_utils
from nova.compute import vm_states
@@ -26,6 +28,7 @@ from nova import exception
from nova.network import model as network_model
from nova import objects
from nova.objects import base as obj_base
from nova.objects import fields
from nova.objects import instance as instance_obj
from nova import test
from nova.tests.unit.db import test_db_api
@@ -1094,3 +1097,175 @@ class FinishResizeAtDestTaskTestCase(test.TestCase):
mock_log.assert_called_once()
self.assertIn('Failed to copy %s instance action event from target',
mock_log.call_args[0][0])
class UtilityTestCase(test.NoDBTestCase):
"""Tests utility methods in the cross_cell_migrate module."""
@mock.patch('nova.objects.HostMapping.get_by_host',
return_value=objects.HostMapping(
cell_mapping=objects.CellMapping(uuid=uuids.cell)))
@mock.patch('nova.objects.Instance.get_by_uuid')
def test_get_instance_from_source_cell(self, mock_get_inst,
mock_get_by_host):
target_cell_context = nova_context.get_admin_context()
# Stub out Instance.get_by_uuid to make sure a copy of the context is
# targeted at the source cell mapping.
def stub_get_by_uuid(ctxt, *args, **kwargs):
self.assertIsNot(ctxt, target_cell_context)
self.assertEqual(uuids.cell, ctxt.cell_uuid)
return mock.sentinel.instance
mock_get_inst.side_effect = stub_get_by_uuid
inst = cross_cell_migrate.get_instance_from_source_cell(
target_cell_context, 'source-host', uuids.instance)
self.assertIs(inst, mock.sentinel.instance)
mock_get_by_host.assert_called_once_with(
target_cell_context, 'source-host')
mock_get_inst.assert_called_once_with(
test.MatchType(nova_context.RequestContext), uuids.instance,
expected_attrs=['flavor', 'info_cache', 'system_metadata'])
class ConfirmResizeTaskTestCase(test.NoDBTestCase):
def setUp(self):
super(ConfirmResizeTaskTestCase, self).setUp()
context = nova_context.get_admin_context()
compute_rpcapi = mock.Mock()
self.task = cross_cell_migrate.ConfirmResizeTask(
context,
objects.Instance(context, uuid=uuids.instance,
host='target-host', vm_state=vm_states.RESIZED,
system_metadata={
'old_vm_state': vm_states.ACTIVE}),
objects.Migration(context, uuid=uuids.migration,
dest_compute='target-host',
source_compute='source-host',
status='confirming'),
mock.sentinel.legacy_notifier,
compute_rpcapi)
@mock.patch('nova.conductor.tasks.cross_cell_migrate.'
'get_instance_from_source_cell',
return_value=objects.Instance(
mock.MagicMock(), uuid=uuids.instance))
def test_execute(self, mock_get_instance):
mock_get_instance.return_value.destroy = mock.Mock()
with test.nested(
mock.patch.object(self.task, '_send_resize_confirm_notification'),
mock.patch.object(self.task, '_cleanup_source_host'),
mock.patch.object(self.task, '_finish_confirm_in_target_cell')
) as (
_send_resize_confirm_notification, _cleanup_source_host,
_finish_confirm_in_target_cell
):
self.task.execute()
mock_get_instance.assert_called_once_with(
self.task.context, self.task.migration.source_compute,
self.task.instance.uuid)
self.assertEqual(2, _send_resize_confirm_notification.call_count)
_send_resize_confirm_notification.assert_has_calls([
mock.call(mock_get_instance.return_value,
fields.NotificationPhase.START),
mock.call(self.task.instance, fields.NotificationPhase.END)])
_cleanup_source_host.assert_called_once_with(
mock_get_instance.return_value)
mock_get_instance.return_value.destroy.assert_called_once_with(
hard_delete=True)
_finish_confirm_in_target_cell.assert_called_once_with()
@mock.patch('nova.conductor.tasks.cross_cell_migrate.'
'get_instance_from_source_cell',
side_effect=exception.InstanceNotFound(
instance_id=uuids.instance))
@mock.patch('nova.objects.Migration.save')
@mock.patch('nova.objects.RequestSpec.get_by_instance_uuid')
@mock.patch('nova.scheduler.utils.set_vm_state_and_notify')
def test_rollback(self, mock_set_state_notify, mock_get_reqspec,
mock_mig_save, mock_get_instance):
self.assertRaises(exception.InstanceNotFound, self.task.execute)
mock_get_instance.assert_called_once_with(
self.task.context, self.task.migration.source_compute,
self.task.instance.uuid)
self.assertEqual('error', self.task.migration.status)
mock_mig_save.assert_called_once_with()
mock_get_reqspec.assert_called_once_with(
self.task.context, self.task.instance.uuid)
mock_set_state_notify.assert_called_once_with(
self.task.context, self.task.instance.uuid, 'compute_task',
'migrate_server',
{'vm_state': vm_states.ERROR, 'task_state': None},
mock_get_instance.side_effect,
mock_get_reqspec.return_value)
@mock.patch('nova.compute.utils.notify_about_instance_usage')
@mock.patch('nova.compute.utils.notify_about_instance_action')
def test_send_resize_confirm_notification(self, mock_versioned_notify,
mock_legacy_notify):
instance = self.task.instance
self.task._send_resize_confirm_notification(instance, 'fake-phase')
mock_legacy_notify.assert_called_once_with(
self.task.legacy_notifier, instance._context, instance,
'resize.confirm.fake-phase')
mock_versioned_notify.assert_called_once_with(
instance._context, instance, instance.host,
action=fields.NotificationAction.RESIZE_CONFIRM,
phase='fake-phase')
@mock.patch('nova.objects.InstanceAction.action_start')
@mock.patch('nova.objects.Migration.get_by_uuid')
@mock.patch('nova.objects.InstanceActionEvent') # stub EventReporter calls
def test_cleanup_source_host(
self, mock_action_event, mock_get_mig, mock_action_start):
instance = objects.Instance(nova_context.get_admin_context(),
uuid=uuids.instance,
flavor=objects.Flavor())
self.task._cleanup_source_host(instance)
self.assertIs(instance.old_flavor, instance.flavor)
mock_action_start.assert_called_once_with(
instance._context, instance.uuid, instance_actions.CONFIRM_RESIZE,
want_result=False)
mock_get_mig.assert_called_once_with(
instance._context, self.task.migration.uuid)
self.task.compute_rpcapi.confirm_snapshot_based_resize_at_source.\
assert_called_once_with(instance._context, instance,
mock_get_mig.return_value)
mock_action_event.event_start.assert_called_once_with(
self.task.context, uuids.instance,
'compute_confirm_snapshot_based_resize_at_source',
want_result=False, host=mock_get_mig.return_value.source_compute)
mock_action_event.event_finish_with_failure.assert_called_once_with(
self.task.context, uuids.instance,
'compute_confirm_snapshot_based_resize_at_source',
exc_val=None, exc_tb=None, want_result=False)
@mock.patch('nova.objects.Migration.save')
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.objects.Instance.drop_migration_context')
def test_finish_confirm_in_target_cell(self, mock_drop_ctx, mock_inst_save,
mock_mig_save):
with mock.patch.object(
self.task, '_set_vm_and_task_state') as mock_set_state:
self.task._finish_confirm_in_target_cell()
self.assertEqual('confirmed', self.task.migration.status)
mock_mig_save.assert_called_once_with()
self.assertNotIn('old_vm_state', self.task.instance.system_metadata)
self.assertIsNone(self.task.instance.old_flavor)
self.assertIsNone(self.task.instance.new_flavor)
mock_set_state.assert_called_once_with()
mock_drop_ctx.assert_called_once_with()
mock_inst_save.assert_called_once_with(expected_task_state=[
None, task_states.DELETING, task_states.SOFT_DELETING])
def test_set_vm_and_task_state_shutdown(self):
self.task.instance.power_state = power_state.SHUTDOWN
self.task._set_vm_and_task_state()
self.assertEqual(vm_states.STOPPED, self.task.instance.vm_state)
self.assertIsNone(self.task.instance.task_state)
def test_set_vm_and_task_state_active(self):
self.task.instance.power_state = power_state.RUNNING
self.task._set_vm_and_task_state()
self.assertEqual(vm_states.ACTIVE, self.task.instance.vm_state)
self.assertIsNone(self.task.instance.task_state)