Add FinishResizeAtDestTask

This adds the FinishResizeAtDestTask sub-task which
synchronously RPC calls finish_snapshot_based_resize_at_dest
on the destination compute service in the target cell
to finish the resize.

If that is successful, the "hidden" field on the source
and target cell instance record is swapped so that the API
shows the target cell instance and updates the instance
mapping record to point at the target cell since confirm/revert
actions at that point start in the target cell.

Part of blueprint cross-cell-resize

Change-Id: I3e28c0163dc14dacf847c5a69730ba2e29650370
This commit is contained in:
Matt Riedemann
2019-02-07 15:57:14 -05:00
parent a2fba47bcf
commit 612d8c682d
2 changed files with 463 additions and 15 deletions

View File

@@ -15,10 +15,12 @@ import copy
from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_utils import excutils
from nova import availability_zones
from nova.compute import task_states
from nova.compute import utils as compute_utils
from nova.compute import vm_states
from nova.conductor.tasks import base
from nova import context as nova_context
from nova import exception
@@ -447,6 +449,187 @@ class PrepResizeAtSourceTask(base.TaskBase):
self.instance.host, instance=self.instance)
class FinishResizeAtDestTask(base.TaskBase):
"""Task to finish the resize at the destination host.
Calls the finish_snapshot_based_resize_at_dest method on the destination
compute service which sets up networking and block storage and spawns
the guest on the destination host. Upon successful completion of this
task, the migration status should be 'finished', the instance task_state
should be None and the vm_state should be 'resized'. The instance host/node
information should also reflect the destination compute.
If the compute call is successful, the task will change the instance
mapping to point at the target cell and hide the source cell instance thus
making the confirm/revert operations act on the target cell instance.
"""
def __init__(self, context, instance, migration, source_cell_instance,
compute_rpcapi, target_cell_mapping, snapshot_id,
request_spec):
"""Initialize this task.
:param context: nova auth request context targeted at the target cell
:param instance: Instance object in the target cell database
:param migration: Migration object in the target cell database
:param source_cell_instance: Instance object in the source cell DB
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
:param target_cell_mapping: CellMapping object for the target cell
:param snapshot_id: ID of the image snapshot to use for a
non-volume-backed instance.
:param request_spec: nova.objects.RequestSpec object for the operation
"""
super(FinishResizeAtDestTask, self).__init__(context, instance)
self.migration = migration
self.source_cell_instance = source_cell_instance
self.compute_rpcapi = compute_rpcapi
self.target_cell_mapping = target_cell_mapping
self.snapshot_id = snapshot_id
self.request_spec = request_spec
def _finish_snapshot_based_resize_at_dest(self):
"""Synchronously RPC calls finish_snapshot_based_resize_at_dest
If the finish_snapshot_based_resize_at_dest method fails in the
compute service, this method will update the source cell instance
data to reflect the error (vm_state='error', copy the fault and
instance action events for that compute method).
"""
LOG.debug('Finishing cross-cell resize at the destination host %s',
self.migration.dest_compute, instance=self.instance)
# prep_snapshot_based_resize_at_source in the source cell would have
# changed the source cell instance.task_state to resize_migrated and
# we need to reflect that in the target cell instance before calling
# the destination compute.
self.instance.task_state = task_states.RESIZE_MIGRATED
self.instance.save()
try:
self.compute_rpcapi.finish_snapshot_based_resize_at_dest(
self.context, self.instance, self.migration, self.snapshot_id,
self.request_spec)
except Exception:
# We need to mimic the error handlers on
# finish_snapshot_based_resize_at_dest in the destination compute
# service so those changes are reflected in the source cell
# instance.
with excutils.save_and_reraise_exception(logger=LOG):
# reverts_task_state and _error_out_instance_on_exception:
self.source_cell_instance.task_state = None
self.source_cell_instance.vm_state = vm_states.ERROR
self.source_cell_instance.save()
source_cell_context = self.source_cell_instance._context
# wrap_instance_fault (this is best effort)
self._copy_latest_fault(source_cell_context)
# wrap_instance_event (this is best effort)
self._copy_finish_snapshot_based_resize_at_dest_event(
source_cell_context)
def _copy_latest_fault(self, source_cell_context):
"""Copies the latest instance fault from the target cell to the source
:param source_cell_context: nova auth request context targeted at the
source cell
"""
try:
# Get the latest fault from the target cell database.
fault = objects.InstanceFault.get_latest_for_instance(
self.context, self.instance.uuid)
if fault:
fault = clone_creatable_object(source_cell_context, fault)
fault.create()
except Exception:
LOG.exception(
'Failed to copy instance fault from target cell DB',
instance=self.instance)
def _copy_finish_snapshot_based_resize_at_dest_event(
self, source_cell_context):
"""Copies the compute_finish_snapshot_based_resize_at_dest event from
the target cell database to the source cell database.
:param source_cell_context: nova auth request context targeted at the
source cell
"""
event_name = 'compute_finish_snapshot_based_resize_at_dest'
try:
# TODO(mriedem): Need a method on InstanceActionEventList to
# lookup an event by action request_id and event name.
# Get the single action for this request in the target cell DB.
action = objects.InstanceAction.get_by_request_id(
self.context, self.instance.uuid,
self.context.request_id)
if action:
# Get the events for this action in the target cell DB.
events = objects.InstanceActionEventList.get_by_action(
self.context, action.id)
# Find the finish_snapshot_based_resize_at_dest event and
# create it in the source cell DB.
for event in events:
if event.event == event_name:
event = clone_creatable_object(
source_cell_context, event)
event.create(action.instance_uuid, action.request_id)
break
else:
LOG.warning('Failed to find InstanceActionEvent with '
'name %s in target cell DB', event_name,
instance=self.instance)
else:
LOG.warning(
'Failed to find InstanceAction by request_id %s',
self.context.request_id, instance=self.instance)
except Exception:
LOG.exception(
'Failed to copy %s instance action event from target cell DB',
event_name, instance=self.instance)
def _update_instance_mapping(self):
"""Swaps the hidden field value on the source and target cell instance
and updates the instance mapping to point at the target cell.
"""
LOG.debug('Marking instance in source cell as hidden and updating '
'instance mapping to point at target cell %s.',
self.target_cell_mapping.identity, instance=self.instance)
# Get the instance mapping first to make the window of time where both
# instances are hidden=False as small as possible.
instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
self.context, self.instance.uuid)
# Mark the target cell instance record as hidden=False so it will show
# up when listing servers. Note that because of how the API filters
# duplicate instance records, even if the user is listing servers at
# this exact moment only one copy of the instance will be returned.
self.instance.hidden = False
self.instance.save()
# Update the instance mapping to point at the target cell. This is so
# that the confirm/revert actions will be performed on the resized
# instance in the target cell rather than the destroyed guest in the
# source cell. Note that we could do this before finishing the resize
# on the dest host, but it makes sense to defer this until the
# instance is successfully resized in the dest host because if that
# fails, we want to be able to rebuild in the source cell to recover
# the instance.
instance_mapping.cell_mapping = self.target_cell_mapping
# If this fails the cascading task failures should delete the instance
# in the target cell database so we do not need to hide it again.
instance_mapping.save()
# Mark the source cell instance record as hidden=True to hide it from
# the user when listing servers.
self.source_cell_instance.hidden = True
self.source_cell_instance.save()
def _execute(self):
# Finish the resize on the destination host in the target cell.
self._finish_snapshot_based_resize_at_dest()
# Do the instance.hidden/instance_mapping.cell_mapping swap.
self._update_instance_mapping()
def rollback(self):
# The method executed in this task are self-contained for rollbacks.
pass
class CrossCellMigrationTask(base.TaskBase):
"""Orchestrates a cross-cell cold migration (resize)."""
@@ -509,7 +692,10 @@ class CrossCellMigrationTask(base.TaskBase):
Upon successful completion the self._target_cell_context and
self._target_cell_instance variables are set.
:returns: The active Migration object from the target cell DB.
:returns: A 2-item tuple of:
- The active Migration object from the target cell DB
- The CellMapping for the target cell
"""
LOG.debug('Setting up the target cell database for the instance and '
'its related records.', instance=self.instance)
@@ -524,7 +710,7 @@ class CrossCellMigrationTask(base.TaskBase):
self._target_cell_context)
self._target_cell_instance, target_cell_migration = task.execute()
self._completed_tasks['TargetDBSetupTask'] = task
return target_cell_migration
return target_cell_migration, target_cell_mapping
def _perform_external_api_checks(self):
"""Performs checks on external service APIs for support.
@@ -634,6 +820,22 @@ class CrossCellMigrationTask(base.TaskBase):
self._completed_tasks['PrepResizeAtSourceTask'] = prep_source_task
return snapshot_id
def _finish_resize_at_dest(
self, target_cell_migration, target_cell_mapping, snapshot_id):
"""Executes FinishResizeAtDestTask
:param target_cell_migration: Migration object from the target cell DB
:param target_cell_mapping: CellMapping object for the target cell
:param snapshot_id: ID of the image snapshot to use for a
non-volume-backed instance.
"""
task = FinishResizeAtDestTask(
self._target_cell_context, self._target_cell_instance,
target_cell_migration, self.instance, self.compute_rpcapi,
target_cell_mapping, snapshot_id, self.request_spec)
task.execute()
self._completed_tasks['FinishResizeAtDestTask'] = task
def _execute(self):
"""Execute high-level orchestration of the cross-cell resize"""
# We are committed to a cross-cell move at this point so update the
@@ -654,7 +856,8 @@ class CrossCellMigrationTask(base.TaskBase):
# cannot simply pass the source cell context and instance over RPC
# to the target compute host and assume changes get mirrored back to
# the source cell database.
target_cell_migration = self._setup_target_cell_db()
target_cell_migration, target_cell_mapping = (
self._setup_target_cell_db())
# Claim resources and validate the selected host in the target cell.
target_cell_migration = self._prep_resize_at_dest(
@@ -662,15 +865,12 @@ class CrossCellMigrationTask(base.TaskBase):
# Prepare the instance at the source host (stop it, optionally snapshot
# it, disconnect volumes and VIFs, etc).
self._prep_resize_at_source()
snapshot_id = self._prep_resize_at_source()
# TODO(mriedem): Copy data to dest cell DB.
# TODO(mriedem): Update instance mapping to dest cell DB.
# TODO(mriedem): Spawn in target cell host:
# - Use new flavor to spawn guest
# - Wait for ACTIVE or keep powered off
# - Activate target host port bindings
# - Update/complete volume attachments and update BDM.attachment_id.
# Finish the resize at the destination host, swap the hidden fields
# on the instances and update the instance mapping.
self._finish_resize_at_dest(
target_cell_migration, target_cell_mapping, snapshot_id)
def rollback(self):
"""Rollback based on how sub-tasks completed

View File

@@ -402,11 +402,15 @@ class CrossCellMigrationTaskTestCase(test.NoDBTestCase):
mock.patch.object(self.task, '_setup_target_cell_db'),
mock.patch.object(self.task, '_prep_resize_at_dest'),
mock.patch.object(self.task, '_prep_resize_at_source'),
mock.patch.object(self.task, '_finish_resize_at_dest'),
) as (
mock_migration_save, mock_perform_external_api_checks,
mock_setup_target_cell_db, mock_prep_resize_at_dest,
mock_prep_resize_at_source,
mock_prep_resize_at_source, mock_finish_resize_at_dest,
):
mock_setup_target_cell_db.return_value = (
mock.sentinel.target_cell_migration,
mock.sentinel.target_cell_mapping)
self.task.execute()
# Assert the calls
self.assertTrue(self.task.source_migration.cross_cell_move,
@@ -415,8 +419,12 @@ class CrossCellMigrationTaskTestCase(test.NoDBTestCase):
mock_perform_external_api_checks.assert_called_once_with()
mock_setup_target_cell_db.assert_called_once_with()
mock_prep_resize_at_dest.assert_called_once_with(
mock_setup_target_cell_db.return_value)
mock.sentinel.target_cell_migration)
mock_prep_resize_at_source.assert_called_once_with()
mock_finish_resize_at_dest.assert_called_once_with(
mock_prep_resize_at_dest.return_value,
mock.sentinel.target_cell_mapping,
mock_prep_resize_at_source.return_value)
# Now rollback the completed sub-tasks
self.task.rollback()
@@ -487,8 +495,9 @@ class CrossCellMigrationTaskTestCase(test.NoDBTestCase):
mock_set_target_cell.assert_called_once_with(
self.task._target_cell_context, mock_get_cell_mapping.return_value)
# The resulting migration record from TargetDBSetupTask should have
# been returned.
self.assertIs(result, mock.sentinel.target_cell_migration)
# been returned along with the target cell mapping.
self.assertIs(result[0], mock.sentinel.target_cell_migration)
self.assertIs(result[1], mock_get_cell_mapping.return_value)
# The target_cell_instance should be set on the main task.
self.assertIsNotNone(self.task._target_cell_instance)
self.assertIs(self.task._target_cell_instance,
@@ -584,6 +593,19 @@ class CrossCellMigrationTaskTestCase(test.NoDBTestCase):
self.task._completed_tasks['PrepResizeAtSourceTask'],
cross_cell_migrate.PrepResizeAtSourceTask)
@mock.patch.object(cross_cell_migrate.FinishResizeAtDestTask, 'execute')
def test_finish_resize_at_dest(self, mock_task_execute):
"""Tests setting up and executing FinishResizeAtDestTask"""
target_cell_migration = objects.Migration()
target_cell_mapping = objects.CellMapping()
self.task._finish_resize_at_dest(
target_cell_migration, target_cell_mapping, uuids.snapshot_id)
mock_task_execute.assert_called_once_with()
self.assertIn('FinishResizeAtDestTask', self.task._completed_tasks)
self.assertIsInstance(
self.task._completed_tasks['FinishResizeAtDestTask'],
cross_cell_migrate.FinishResizeAtDestTask)
class PrepResizeAtDestTaskTestCase(test.NoDBTestCase):
@@ -843,3 +865,229 @@ class PrepResizeAtSourceTaskTestCase(test.NoDBTestCase):
delete_image.assert_called_once_with(
self.task.context, self.task.instance, self.task.image_api,
self.task._image_id)
class FinishResizeAtDestTaskTestCase(test.TestCase):
"""Tests for FinishResizeAtDestTask which rely on a database"""
def _create_instance(self, ctxt, create_instance_mapping=False, **updates):
"""Create a fake instance with the given cell-targeted context
:param ctxt: Cell-targeted RequestContext
:param create_instance_mapping: If True, create an InstanceMapping
for the instance pointed at the cell in which the ctxt is targeted,
otherwise no InstanceMapping is created.
:param updates: Additional fields to set on the Instance object.
:returns: Instance object that was created.
"""
inst = fake_instance.fake_instance_obj(ctxt, **updates)
delattr(inst, 'id') # make it creatable
# Now we have to dirty all of the fields because fake_instance_obj
# uses Instance._from_db_object to create the Instance object we have
# but _from_db_object calls obj_reset_changes() which resets all of
# the fields that were on the object, including the basic stuff like
# the 'host' field, which means those fields don't get set in the DB.
# TODO(mriedem): This should live in fake_instance_obj
for field in inst.obj_fields:
if field in inst:
setattr(inst, field, getattr(inst, field))
# FIXME(mriedem): db.instance_create does not handle tags
inst.obj_reset_changes(['tags'])
inst.create()
if create_instance_mapping:
# Find the cell mapping from the context.
self.assertIsNotNone(ctxt.cell_uuid,
'ctxt must be targeted to a cell.')
for cell in self.cell_mappings.values():
if cell.uuid == ctxt.cell_uuid:
break
else:
raise Exception('Unable to find CellMapping with UUID %s' %
ctxt.cell_uuid)
mapping = objects.InstanceMapping(
ctxt, instance_uuid=inst.uuid,
project_id=inst.project_id, cell_mapping=cell)
mapping.create()
return inst
def setUp(self):
super(FinishResizeAtDestTaskTestCase, self).setUp()
cells = list(self.cell_mappings.values())
source_cell = cells[0]
target_cell = cells[1]
self.source_context = nova_context.RequestContext(
user_id='fake-user', project_id='fake-project', is_admin=True)
self.target_context = self.source_context.elevated() # copy source
nova_context.set_target_cell(self.source_context, source_cell)
nova_context.set_target_cell(self.target_context, target_cell)
# Create the source cell instance.
source_instance = self._create_instance(
self.source_context, create_instance_mapping=True,
hidden=False)
# Create the target cell instance which would normally be a clone of
# the source cell instance but the only thing these tests care about
# is that the UUID matches. The target cell instance is also hidden.
target_instance = self._create_instance(
self.target_context, hidden=True, uuid=source_instance.uuid)
target_migration = objects.Migration(dest_compute='target.host.com')
self.task = cross_cell_migrate.FinishResizeAtDestTask(
self.target_context, target_instance, target_migration,
source_instance, compute_rpcapi=mock.Mock(),
target_cell_mapping=target_cell, snapshot_id=uuids.snapshot_id,
request_spec=objects.RequestSpec())
def test_execute(self):
"""Tests the happy path scenario for the task execution."""
with mock.patch.object(
self.task.compute_rpcapi,
'finish_snapshot_based_resize_at_dest') as finish_resize:
self.task.execute()
# _finish_snapshot_based_resize_at_dest will set the instance
# task_state to resize_migrated, save the change, and call the
# finish_snapshot_based_resize_at_dest method.
target_instance = self.task.instance
target_instance.refresh()
self.assertEqual(task_states.RESIZE_MIGRATED,
self.task.instance.task_state)
finish_resize.assert_called_once_with(
self.task.context, target_instance, self.task.migration,
self.task.snapshot_id, self.task.request_spec)
# _update_instance_mapping will swap the hidden fields and update
# the instance mapping to point at the target cell.
self.assertFalse(target_instance.hidden,
'Target cell instance should not be hidden')
source_instance = self.task.source_cell_instance
source_instance.refresh()
self.assertTrue(source_instance.hidden,
'Source cell instance should be hidden')
mapping = objects.InstanceMapping.get_by_instance_uuid(
self.task.context, target_instance.uuid)
self.assertEqual(self.target_context.cell_uuid,
mapping.cell_mapping.uuid)
def test_finish_snapshot_based_resize_at_dest_fails(self):
"""Tests when the finish_snapshot_based_resize_at_dest compute method
raises an error.
"""
with test.nested(
mock.patch.object(self.task.compute_rpcapi,
'finish_snapshot_based_resize_at_dest',
side_effect=test.TestingException),
mock.patch.object(self.task, '_copy_latest_fault'),
mock.patch.object(
self.task, '_copy_finish_snapshot_based_resize_at_dest_event'),
) as (
finish_resize, copy_fault, copy_event
):
self.assertRaises(test.TestingException,
self.task._finish_snapshot_based_resize_at_dest)
# The source cell instance should be in error state.
source_instance = self.task.source_cell_instance
source_instance.refresh()
self.assertEqual(vm_states.ERROR, source_instance.vm_state)
self.assertIsNone(source_instance.task_state)
# And the latest fault and instance action event should have been
# copied from the target cell DB to the source cell DB.
copy_fault.assert_called_once_with(self.source_context)
copy_event.assert_called_once_with(self.source_context)
def test_copy_latest_fault(self):
"""Tests _copy_latest_fault working as expected"""
# Inject a fault in the target cell database.
try:
raise test.TestingException('test-fault')
except test.TestingException as fault:
compute_utils.add_instance_fault_from_exc(
self.target_context, self.task.instance, fault)
self.task._copy_latest_fault(self.source_context)
# Now make sure that fault shows up in the source cell DB (it will
# get lazy-loaded here).
fault = self.task.source_cell_instance.fault
self.assertIsNotNone(fault, 'Fault not copied to source cell DB')
# And it's the fault we expect.
self.assertEqual('TestingException', fault.message)
@mock.patch('nova.conductor.tasks.cross_cell_migrate.LOG.exception')
def test_copy_latest_fault_error(self, mock_log):
"""Tests that _copy_latest_fault errors are swallowed"""
with mock.patch('nova.objects.InstanceFault.get_latest_for_instance',
side_effect=test.TestingException):
self.task._copy_latest_fault(self.source_context)
# The source cell should not have a fault.
self.assertIsNone(self.task.source_cell_instance.fault)
# The error should have been logged.
mock_log.assert_called_once()
self.assertIn('Failed to copy instance fault from target cell DB',
mock_log.call_args[0][0])
@mock.patch('nova.conductor.tasks.cross_cell_migrate.LOG.warning')
def test_copy_finish_snapshot_based_resize_at_dest_event(self, mock_warn):
"""Tests _copy_finish_snapshot_based_resize_at_dest_event working
without errors (but also warning cases).
"""
# First run it without any action record created and we should get a
# warning logged that the action could not be found.
self.task._copy_finish_snapshot_based_resize_at_dest_event(
self.source_context)
mock_warn.assert_called_once()
self.assertIn('Failed to find InstanceAction by request_id',
mock_warn.call_args[0][0])
# The source and target context must have the same request_id for this
# to work.
self.assertEqual(self.source_context.request_id,
self.target_context.request_id)
# Create an action record in the source cell database. This is needed
# to find the action for the events when they get copied over.
src_action = objects.InstanceAction.action_start(
self.source_context, self.task.instance.uuid, 'resize')
# Create the same action in the target cell database.
objects.InstanceAction.action_start(
self.target_context, self.task.instance.uuid, 'resize')
# Now run it again without creating the underlying event record and
# we should log a warning that no event was found.
mock_warn.reset_mock()
self.task._copy_finish_snapshot_based_resize_at_dest_event(
self.source_context)
mock_warn.assert_called_once()
self.assertIn('Failed to find InstanceActionEvent',
mock_warn.call_args[0][0])
# Generate the event in the target cell database.
@compute_utils.wrap_instance_event(prefix='compute')
def finish_snapshot_based_resize_at_dest(_self, context, instance):
raise test.TestingException('oops')
self.assertRaises(test.TestingException,
finish_snapshot_based_resize_at_dest,
mock.Mock(host='dest-host'),
self.target_context, self.task.instance)
self.task._copy_finish_snapshot_based_resize_at_dest_event(
self.source_context)
# There should now be one InstanceActionEvent in the source cell DB.
src_events = objects.InstanceActionEventList.get_by_action(
self.source_context, src_action.id)
self.assertEqual(1, len(src_events))
self.assertEqual('compute_finish_snapshot_based_resize_at_dest',
src_events[0].event)
self.assertEqual('Error', src_events[0].result)
@mock.patch('nova.conductor.tasks.cross_cell_migrate.LOG.exception')
@mock.patch('nova.objects.InstanceAction.get_by_request_id',
side_effect=test.TestingException)
def test_copy_finish_snapshot_based_resize_at_dest_event_error(
self, get_by_request_id, mock_log):
"""Tests that _copy_finish_snapshot_based_resize_at_dest_event errors
are swallowed.
"""
self.task._copy_finish_snapshot_based_resize_at_dest_event(
self.source_context)
mock_log.assert_called_once()
self.assertIn('Failed to copy %s instance action event from target',
mock_log.call_args[0][0])