nova/nova/conductor/tasks/cross_cell_migrate.py

1467 lines
72 KiB
Python

# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
import copy
from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_utils import excutils
from nova import availability_zones
from nova.compute import instance_actions
from nova.compute import power_state
from nova.compute import task_states
from nova.compute import utils as compute_utils
from nova.compute import vm_states
from nova.conductor.tasks import base
from nova import conf
from nova import context as nova_context
from nova import exception
from nova.i18n import _
from nova.image import glance
from nova.network import constants as neutron_constants
from nova.network import neutron
from nova import objects
from nova.objects import fields
from nova.scheduler import utils as scheduler_utils
from nova.volume import cinder
LOG = logging.getLogger(__name__)
CONF = conf.CONF
def clone_creatable_object(ctxt, obj, delete_fields=None):
"""Targets the object at the given context and removes its id attribute
Dirties all of the set fields on a new copy of the object.
This is necessary before the object is created in a new cell.
:param ctxt: cell-targeted nova auth request context to set on the clone
:param obj: the object to re-target
:param delete_fields: list of fields to delete from the new object;
note that the ``id`` field is always deleted
:returns: Cloned version of ``obj`` with all set fields marked as
"changed" so they will be persisted on a subsequent
``obj.create()`` call.
"""
if delete_fields is None:
delete_fields = []
if 'id' not in delete_fields:
delete_fields.append('id')
new_obj = obj.obj_clone()
new_obj._context = ctxt
for field in obj.obj_fields:
if field in obj:
if field in delete_fields:
delattr(new_obj, field)
else:
# Dirty the field since obj_clone does not modify
# _changed_fields.
setattr(new_obj, field, getattr(obj, field))
return new_obj
class TargetDBSetupTask(base.TaskBase):
"""Sub-task to create the instance data in the target cell DB.
This is needed before any work can be done with the instance in the target
cell, like validating the selected target compute host.
"""
def __init__(self, context, instance, source_migration,
target_cell_context):
"""Initialize this task.
:param context: source-cell targeted auth RequestContext
:param instance: source-cell Instance object
:param source_migration: source-cell Migration object for this
operation
:param target_cell_context: target-cell targeted auth RequestContext
"""
super(TargetDBSetupTask, self).__init__(context, instance)
self.target_ctx = target_cell_context
self.source_migration = source_migration
self._target_cell_instance = None
def _copy_migrations(self, migrations):
"""Copy migration records from the source cell to the target cell.
:param migrations: MigrationList object of source cell DB records.
:returns: Migration record in the target cell database that matches
the active migration in the source cell.
"""
target_cell_migration = None
for migration in migrations:
migration = clone_creatable_object(self.target_ctx, migration)
migration.create()
if self.source_migration.uuid == migration.uuid:
# Save this off so subsequent tasks don't need to look it up.
target_cell_migration = migration
return target_cell_migration
def _execute(self):
"""Creates the instance and its related records in the target cell
Instance.pci_devices are not copied over since those records are
tightly coupled to the compute_nodes records and are meant to track
inventory and allocations of PCI devices on a specific compute node.
The instance.pci_requests are what "move" with the instance to the
target cell and will result in new PCIDevice allocations on the target
compute node in the target cell during the resize_claim.
The instance.services field is not copied over since that represents
the nova-compute service mapped to the instance.host, which will not
make sense in the target cell.
:returns: A two-item tuple of the Instance and Migration object
created in the target cell
"""
LOG.debug(
'Creating (hidden) instance and its related records in the target '
'cell: %s', self.target_ctx.cell_uuid, instance=self.instance)
# We also have to create the BDMs and tags separately, just like in
# ComputeTaskManager.schedule_and_build_instances, so get those out
# of the source cell DB first before we start creating anything.
# NOTE(mriedem): Console auth tokens are not copied over to the target
# cell DB since they will be regenerated in the target cell as needed.
# Similarly, expired console auth tokens will be automatically cleaned
# from the source cell.
bdms = self.instance.get_bdms()
vifs = objects.VirtualInterfaceList.get_by_instance_uuid(
self.context, self.instance.uuid)
tags = self.instance.tags
# We copy instance actions to preserve the history of the instance
# in case the resize is confirmed.
actions = objects.InstanceActionList.get_by_instance_uuid(
self.context, self.instance.uuid)
migrations = objects.MigrationList.get_by_filters(
self.context, filters={'instance_uuid': self.instance.uuid})
# db.instance_create cannot handle some fields which might be loaded on
# the instance object, so we omit those from the cloned object and
# explicitly create the ones we care about (like tags) below. Things
# like pci_devices and services will not make sense in the target DB
# so we omit those as well.
# TODO(mriedem): Determine if we care about copying faults over to the
# target cell in case people use those for auditing (remember that
# faults are only shown in the API for ERROR/DELETED instances and only
# the most recent fault is shown).
inst = clone_creatable_object(
self.target_ctx, self.instance,
delete_fields=['fault', 'pci_devices', 'services', 'tags'])
# This part is important - we want to create the instance in the target
# cell as "hidden" so while we have two copies of the instance in
# different cells, listing servers out of the API will filter out the
# hidden one.
inst.hidden = True
inst.create()
self._target_cell_instance = inst # keep track of this for rollbacks
# TODO(mriedem): Consider doing all of the inserts in a single
# transaction context. If any of the following creates fail, the
# rollback should perform a cascading hard-delete anyway.
# Do the same dance for the other instance-related records.
for bdm in bdms:
bdm = clone_creatable_object(self.target_ctx, bdm)
bdm.create()
for vif in vifs:
vif = clone_creatable_object(self.target_ctx, vif)
vif.create()
if tags:
primitive_tags = [tag.tag for tag in tags]
objects.TagList.create(self.target_ctx, inst.uuid, primitive_tags)
for action in actions:
new_action = clone_creatable_object(self.target_ctx, action)
new_action.create()
# For each pre-existing action, we need to also re-create its
# events in the target cell.
events = objects.InstanceActionEventList.get_by_action(
self.context, action.id)
for event in events:
new_event = clone_creatable_object(self.target_ctx, event)
new_event.create(action.instance_uuid, action.request_id)
target_cell_migration = self._copy_migrations(migrations)
return inst, target_cell_migration
def rollback(self, ex):
"""Deletes the instance data from the target cell in case of failure"""
if self._target_cell_instance:
# Deleting the instance in the target cell DB should perform a
# cascading delete of all related records, e.g. BDMs, VIFs, etc.
LOG.debug('Destroying instance from target cell: %s',
self.target_ctx.cell_uuid,
instance=self._target_cell_instance)
# This needs to be a hard delete because if resize fails later for
# some reason, we want to be able to retry the resize to this cell
# again without hitting a duplicate entry unique constraint error.
self._target_cell_instance.destroy(hard_delete=True)
class PrepResizeAtDestTask(base.TaskBase):
"""Task used to verify a given target host in a target cell.
Upon successful completion, port bindings and volume attachments
should be created for the target host in the target cell and resources
should be claimed on the target host for the resize. Also, the instance
task_state should be ``resize_prep``.
"""
def __init__(self, context, instance, flavor, target_migration,
request_spec, compute_rpcapi, host_selection, network_api,
volume_api):
"""Construct the PrepResizeAtDestTask instance
:param context: The user request auth context. This should be targeted
at the target cell.
:param instance: The instance being migrated (this is the target cell
copy of the instance record).
:param flavor: The new flavor if performing resize and not just a
cold migration
:param target_migration: The Migration object from the target cell DB.
:param request_spec: nova.objects.RequestSpec object for the operation
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
:param host_selection: nova.objects.Selection which is a possible
target host for the cross-cell resize
:param network_api: The neutron (client side) networking API.
:param volume_api: The cinder (client side) block-storage API.
"""
super(PrepResizeAtDestTask, self).__init__(context, instance)
self.flavor = flavor
self.target_migration = target_migration
self.request_spec = request_spec
self.compute_rpcapi = compute_rpcapi
self.host_selection = host_selection
self.network_api = network_api
self.volume_api = volume_api
# Keep track of anything we created so we can rollback.
self._bindings_by_port_id = {}
self._created_volume_attachment_ids = []
def _create_port_bindings(self):
"""Creates inactive port bindings against the selected target host
for the ports attached to the instance.
The ``self._bindings_by_port_id`` variable will be set upon successful
completion.
:raises: MigrationPreCheckError if port binding failed
"""
LOG.debug('Creating port bindings for destination host %s',
self.host_selection.service_host)
try:
self._bindings_by_port_id = self.network_api.bind_ports_to_host(
self.context, self.instance, self.host_selection.service_host)
except exception.PortBindingFailed:
raise exception.MigrationPreCheckError(reason=_(
'Failed to create port bindings for host %s') %
self.host_selection.service_host)
def _create_volume_attachments(self):
"""Create empty volume attachments for volume BDMs attached to the
instance in the target cell.
The BlockDeviceMapping.attachment_id field is updated for each
volume BDM processed. Remember that these BDM records are from the
target cell database so the changes will only go there.
:return: BlockDeviceMappingList of volume BDMs with an updated
attachment_id field for the newly created empty attachment for
that BDM
"""
LOG.debug('Creating volume attachments for destination host %s',
self.host_selection.service_host)
volume_bdms = objects.BlockDeviceMappingList(objects=[
bdm for bdm in self.instance.get_bdms() if bdm.is_volume])
for bdm in volume_bdms:
# Create the empty (no host connector) attachment.
attach_ref = self.volume_api.attachment_create(
self.context, bdm.volume_id, bdm.instance_uuid)
# Keep track of what we create for rollbacks.
self._created_volume_attachment_ids.append(attach_ref['id'])
# Update the BDM in the target cell database.
bdm.attachment_id = attach_ref['id']
# Note that ultimately the BDMs in the target cell are either
# pointing at attachments that we can use, or this sub-task has
# failed in which case we will fail the main task and should
# rollback and delete the instance and its BDMs in the target cell
# database, so that is why we do not track the original attachment
# IDs in order to roll them back on the BDM records.
bdm.save()
return volume_bdms
def _execute(self):
"""Performs pre-cross-cell resize checks/claims on the targeted host
This ensures things like networking (ports) will continue to work on
the target host in the other cell before we initiate the migration of
the server.
Resources are also claimed on the target host which in turn creates the
MigrationContext for the instance in the target cell database.
:returns: MigrationContext created in the target cell database during
the resize_claim in the destination compute service.
:raises: nova.exception.MigrationPreCheckError if the pre-check
validation fails for the given host selection; this indicates an
alternative host *may* work but this one does not.
"""
destination = self.host_selection.service_host
LOG.debug('Verifying selected host %s for cross-cell resize.',
destination, instance=self.instance)
# Validate networking by creating port bindings for this host.
self._create_port_bindings()
# Create new empty volume attachments for the volume BDMs attached
# to the instance. Technically this is not host specific and we could
# do this outside of the PrepResizeAtDestTask sub-task but volume
# attachments are meant to be cheap and plentiful so it is nice to
# keep them self-contained within each execution of this task and
# rollback anything we created if we fail.
self._create_volume_attachments()
try:
LOG.debug('Calling destination host %s to prepare for cross-cell '
'resize and claim resources.', destination)
return self.compute_rpcapi.prep_snapshot_based_resize_at_dest(
self.context, self.instance, self.flavor,
self.host_selection.nodename, self.target_migration,
self.host_selection.limits, self.request_spec, destination)
except messaging.MessagingTimeout:
msg = _('RPC timeout while checking if we can cross-cell migrate '
'to host: %s') % destination
raise exception.MigrationPreCheckError(reason=msg)
def rollback(self, ex):
# Rollback anything we created.
host = self.host_selection.service_host
# Cleanup any destination host port bindings.
LOG.debug('Cleaning up port bindings for destination host %s', host)
for port_id in self._bindings_by_port_id:
try:
self.network_api.delete_port_binding(
self.context, port_id, host)
except Exception:
# Don't raise if we fail to cleanup, just log it.
LOG.exception('An error occurred while cleaning up binding '
'for port %s on host %s.', port_id, host,
instance=self.instance)
# Cleanup any destination host volume attachments.
LOG.debug(
'Cleaning up volume attachments for destination host %s', host)
for attachment_id in self._created_volume_attachment_ids:
try:
self.volume_api.attachment_delete(self.context, attachment_id)
except Exception:
# Don't raise if we fail to cleanup, just log it.
LOG.exception('An error occurred while cleaning up volume '
'attachment %s.', attachment_id,
instance=self.instance)
class PrepResizeAtSourceTask(base.TaskBase):
"""Task to prepare the instance at the source host for the resize.
Will power off the instance at the source host, create and upload a
snapshot image for a non-volume-backed server, and disconnect volumes and
networking from the source host.
The vm_state is recorded with the "old_vm_state" key in the
instance.system_metadata field prior to powering off the instance so the
revert flow can determine if the guest should be running or stopped.
Returns the snapshot image ID, if one was created, from the ``execute``
method.
Upon successful completion, the instance.task_state will be
``resize_migrated`` and the migration.status will be ``post-migrating``.
"""
def __init__(
self, context, instance, flavor, migration, request_spec,
compute_rpcapi, image_api,
):
"""Initializes this PrepResizeAtSourceTask instance.
:param context: nova auth context targeted at the source cell
:param instance: Instance object from the source cell
:param flavor: The new flavor if performing resize and not just a
cold migration
:param migration: Migration object from the source cell
:param request_spec: RequestSpec object for the resize operation
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
:param image_api: instance of nova.image.glance.API
"""
super(PrepResizeAtSourceTask, self).__init__(context, instance)
self.flavor = flavor
self.migration = migration
self.request_spec = request_spec
self.compute_rpcapi = compute_rpcapi
self.image_api = image_api
self._image_id = None
def _execute(self):
# Save off the vm_state so we can use that later on the source host
# if the resize is reverted - it is used to determine if the reverted
# guest should be powered on.
self.instance.system_metadata['old_vm_state'] = self.instance.vm_state
self.instance.task_state = task_states.RESIZE_MIGRATING
self.instance.old_flavor = self.instance.flavor
self.instance.new_flavor = self.flavor
# If the instance is not volume-backed, create a snapshot of the root
# disk.
if not self.request_spec.is_bfv:
# Create an empty image.
name = '%s-resize-temp' % self.instance.display_name
image_meta = compute_utils.create_image(
self.context, self.instance, name, 'snapshot', self.image_api)
self._image_id = image_meta['id']
LOG.debug('Created snapshot image %s for cross-cell resize.',
self._image_id, instance=self.instance)
self.instance.save(expected_task_state=task_states.RESIZE_PREP)
# RPC call the source host to prepare for resize.
self.compute_rpcapi.prep_snapshot_based_resize_at_source(
self.context, self.instance, self.migration,
snapshot_id=self._image_id)
return self._image_id
def rollback(self, ex):
# If we created a snapshot image, attempt to delete it.
if self._image_id:
compute_utils.delete_image(
self.context, self.instance, self.image_api, self._image_id)
# If the compute service successfully powered off the guest but failed
# to snapshot (or timed out during the snapshot), then the
# _sync_power_states periodic task should mark the instance as stopped
# and the user can start/reboot it.
# If the compute service powered off the instance, snapshot it and
# destroyed the guest and then a failure occurred, the instance should
# have been set to ERROR status (by the compute service) so the user
# has to hard reboot or rebuild it.
LOG.error('Preparing for cross-cell resize at the source host %s '
'failed. The instance may need to be hard rebooted.',
self.instance.host, instance=self.instance)
class FinishResizeAtDestTask(base.TaskBase):
"""Task to finish the resize at the destination host.
Calls the finish_snapshot_based_resize_at_dest method on the destination
compute service which sets up networking and block storage and spawns
the guest on the destination host. Upon successful completion of this
task, the migration status should be 'finished', the instance task_state
should be None and the vm_state should be 'resized'. The instance host/node
information should also reflect the destination compute.
If the compute call is successful, the task will change the instance
mapping to point at the target cell and hide the source cell instance thus
making the confirm/revert operations act on the target cell instance.
"""
def __init__(self, context, instance, migration, source_cell_instance,
compute_rpcapi, target_cell_mapping, snapshot_id,
request_spec):
"""Initialize this task.
:param context: nova auth request context targeted at the target cell
:param instance: Instance object in the target cell database
:param migration: Migration object in the target cell database
:param source_cell_instance: Instance object in the source cell DB
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
:param target_cell_mapping: CellMapping object for the target cell
:param snapshot_id: ID of the image snapshot to use for a
non-volume-backed instance.
:param request_spec: nova.objects.RequestSpec object for the operation
"""
super(FinishResizeAtDestTask, self).__init__(context, instance)
self.migration = migration
self.source_cell_instance = source_cell_instance
self.compute_rpcapi = compute_rpcapi
self.target_cell_mapping = target_cell_mapping
self.snapshot_id = snapshot_id
self.request_spec = request_spec
def _finish_snapshot_based_resize_at_dest(self):
"""Synchronously RPC calls finish_snapshot_based_resize_at_dest
If the finish_snapshot_based_resize_at_dest method fails in the
compute service, this method will update the source cell instance
data to reflect the error (vm_state='error', copy the fault and
instance action events for that compute method).
"""
LOG.debug('Finishing cross-cell resize at the destination host %s',
self.migration.dest_compute, instance=self.instance)
# prep_snapshot_based_resize_at_source in the source cell would have
# changed the source cell instance.task_state to resize_migrated and
# we need to reflect that in the target cell instance before calling
# the destination compute.
self.instance.task_state = task_states.RESIZE_MIGRATED
self.instance.save()
event_name = 'compute_finish_snapshot_based_resize_at_dest'
source_cell_context = self.source_cell_instance._context
try:
with compute_utils.EventReporter(
source_cell_context, event_name,
self.migration.dest_compute, self.instance.uuid):
self.compute_rpcapi.finish_snapshot_based_resize_at_dest(
self.context, self.instance, self.migration,
self.snapshot_id, self.request_spec)
# finish_snapshot_based_resize_at_dest updates the target cell
# instance so we need to refresh it here to have the latest copy.
self.instance.refresh()
except Exception:
# We need to mimic the error handlers on
# finish_snapshot_based_resize_at_dest in the destination compute
# service so those changes are reflected in the source cell
# instance.
with excutils.save_and_reraise_exception(logger=LOG):
# reverts_task_state and _error_out_instance_on_exception:
self.source_cell_instance.task_state = None
self.source_cell_instance.vm_state = vm_states.ERROR
self.source_cell_instance.save()
# wrap_instance_fault (this is best effort)
self._copy_latest_fault(source_cell_context)
def _copy_latest_fault(self, source_cell_context):
"""Copies the latest instance fault from the target cell to the source
:param source_cell_context: nova auth request context targeted at the
source cell
"""
try:
# Get the latest fault from the target cell database.
fault = objects.InstanceFault.get_latest_for_instance(
self.context, self.instance.uuid)
if fault:
fault_clone = clone_creatable_object(source_cell_context,
fault)
fault_clone.create()
except Exception:
LOG.exception(
'Failed to copy instance fault from target cell DB',
instance=self.instance)
def _update_instance_mapping(self):
"""Swaps the hidden field value on the source and target cell instance
and updates the instance mapping to point at the target cell.
"""
LOG.debug('Marking instance in source cell as hidden and updating '
'instance mapping to point at target cell %s.',
self.target_cell_mapping.identity, instance=self.instance)
# Get the instance mapping first to make the window of time where both
# instances are hidden=False as small as possible.
instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
self.context, self.instance.uuid)
# Mark the target cell instance record as hidden=False so it will show
# up when listing servers. Note that because of how the API filters
# duplicate instance records, even if the user is listing servers at
# this exact moment only one copy of the instance will be returned.
self.instance.hidden = False
self.instance.save()
# Update the instance mapping to point at the target cell. This is so
# that the confirm/revert actions will be performed on the resized
# instance in the target cell rather than the destroyed guest in the
# source cell. Note that we could do this before finishing the resize
# on the dest host, but it makes sense to defer this until the
# instance is successfully resized in the dest host because if that
# fails, we want to be able to rebuild in the source cell to recover
# the instance.
instance_mapping.cell_mapping = self.target_cell_mapping
# If this fails the cascading task failures should delete the instance
# in the target cell database so we do not need to hide it again.
instance_mapping.save()
# Mark the source cell instance record as hidden=True to hide it from
# the user when listing servers.
self.source_cell_instance.hidden = True
self.source_cell_instance.save()
def _execute(self):
# Finish the resize on the destination host in the target cell.
self._finish_snapshot_based_resize_at_dest()
# Do the instance.hidden/instance_mapping.cell_mapping swap.
self._update_instance_mapping()
def rollback(self, ex):
# The method executed in this task are self-contained for rollbacks.
pass
class CrossCellMigrationTask(base.TaskBase):
"""Orchestrates a cross-cell cold migration (resize)."""
def __init__(self, context, instance, flavor,
request_spec, source_migration, compute_rpcapi,
host_selection, alternate_hosts):
"""Construct the CrossCellMigrationTask instance
:param context: The user request auth context. This should be targeted
to the source cell in which the instance is currently running.
:param instance: The instance being migrated (from the source cell)
:param flavor: The new flavor if performing resize and not just a
cold migration
:param request_spec: nova.objects.RequestSpec with scheduling details
:param source_migration: nova.objects.Migration record for this
operation (from the source cell)
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
:param host_selection: nova.objects.Selection of the initial
selected target host from the scheduler where the selected host
is in another cell which is different from the cell in which
the instance is currently running
:param alternate_hosts: list of 0 or more nova.objects.Selection
objects representing alternate hosts within the same target cell
as ``host_selection``.
"""
super(CrossCellMigrationTask, self).__init__(context, instance)
self.request_spec = request_spec
self.flavor = flavor
self.source_migration = source_migration
self.compute_rpcapi = compute_rpcapi
self.host_selection = host_selection
self.alternate_hosts = alternate_hosts
self._target_cell_instance = None
self._target_cell_context = None
self.network_api = neutron.API()
self.volume_api = cinder.API()
self.image_api = glance.API()
# Keep an ordered dict of the sub-tasks completed so we can call their
# rollback routines if something fails.
self._completed_tasks = collections.OrderedDict()
def _get_target_cell_mapping(self):
"""Get the target host CellMapping for the selected host
:returns: nova.objects.CellMapping for the cell of the selected target
host
:raises: nova.exception.CellMappingNotFound if the cell mapping for
the selected target host cannot be found (this should not happen
if the scheduler just selected it)
"""
return objects.CellMapping.get_by_uuid(
self.context, self.host_selection.cell_uuid)
def _setup_target_cell_db(self):
"""Creates the instance and its related records in the target cell
Upon successful completion the self._target_cell_context and
self._target_cell_instance variables are set.
:returns: A 2-item tuple of:
- The active Migration object from the target cell DB
- The CellMapping for the target cell
"""
LOG.debug('Setting up the target cell database for the instance and '
'its related records.', instance=self.instance)
target_cell_mapping = self._get_target_cell_mapping()
# Clone the context targeted at the source cell and then target the
# clone at the target cell.
self._target_cell_context = copy.copy(self.context)
nova_context.set_target_cell(
self._target_cell_context, target_cell_mapping)
task = TargetDBSetupTask(
self.context, self.instance, self.source_migration,
self._target_cell_context)
self._target_cell_instance, target_cell_migration = task.execute()
self._completed_tasks['TargetDBSetupTask'] = task
return target_cell_migration, target_cell_mapping
def _perform_external_api_checks(self):
"""Performs checks on external service APIs for support.
* Checks that the neutron port binding-extended API is available
:raises: MigrationPreCheckError if any checks fail
"""
LOG.debug('Making sure neutron is new enough for cross-cell resize.')
# Check that the port binding-extended API extension is available in
# neutron because if it's not we can just fail fast.
if not self.network_api.supports_port_binding_extension(self.context):
raise exception.MigrationPreCheckError(
reason=_("Required networking service API extension '%s' "
"not found.") %
neutron_constants.PORT_BINDING_EXTENDED)
def _prep_resize_at_dest(self, target_cell_migration):
"""Executes PrepResizeAtDestTask and updates the source migration.
:param target_cell_migration: Migration record from the target cell DB
:returns: Refreshed Migration record from the target cell DB after the
resize_claim on the destination host has updated the record.
"""
# TODO(mriedem): Check alternates if the primary selected host fails;
# note that alternates are always in the same cell as the selected host
# so if the primary fails pre-checks, the alternates may also fail. We
# could reschedule but the scheduler does not yet have an ignore_cells
# capability like ignore_hosts.
# We set the target cell instance new_flavor attribute now since the
# ResourceTracker.resize_claim on the destination host uses it.
self._target_cell_instance.new_flavor = self.flavor
verify_task = PrepResizeAtDestTask(
self._target_cell_context, self._target_cell_instance, self.flavor,
target_cell_migration, self.request_spec, self.compute_rpcapi,
self.host_selection, self.network_api, self.volume_api)
target_cell_migration_context = verify_task.execute()
self._completed_tasks['PrepResizeAtDestTask'] = verify_task
# Stash the old vm_state so we can set the resized/reverted instance
# back to the same state later, i.e. if STOPPED do not power on the
# guest.
self._target_cell_instance.system_metadata['old_vm_state'] = (
self._target_cell_instance.vm_state)
# Update the target cell instance availability zone now that we have
# prepared the resize on the destination host. We do this in conductor
# to avoid the "up-call" from the compute service to the API database.
self._target_cell_instance.availability_zone = (
availability_zones.get_host_availability_zone(
self.context, self.host_selection.service_host))
self._target_cell_instance.save()
# We need to mirror the MigrationContext, created in the target cell
# database, into the source cell database. Keep in mind that the
# MigrationContext has pci_devices and a migration_id in it which
# are specific to the target cell database. The only one we care about
# correcting for the source cell database is migration_id since that
# is used to route neutron external events to the source and target
# hosts.
self.instance.migration_context = (
target_cell_migration_context.obj_clone())
self.instance.migration_context.migration_id = self.source_migration.id
self.instance.save()
return self._update_migration_from_dest_after_claim(
target_cell_migration)
def _update_migration_from_dest_after_claim(self, target_cell_migration):
"""Update the source cell migration record with target cell info.
The PrepResizeAtDestTask runs a resize_claim on the target compute
host service in the target cell which sets fields about the destination
in the migration record in the target cell. We need to reflect those
changes back into the migration record in the source cell.
:param target_cell_migration: Migration record from the target cell DB
:returns: Refreshed Migration record from the target cell DB after the
resize_claim on the destination host has updated the record.
"""
# Copy information about the dest compute that was set on the dest
# migration record during the resize claim on the dest host.
# We have to get a fresh copy of the target cell migration record to
# pick up the changes made in the dest compute service.
target_cell_migration = objects.Migration.get_by_uuid(
self._target_cell_context, target_cell_migration.uuid)
self.source_migration.dest_compute = target_cell_migration.dest_compute
self.source_migration.dest_node = target_cell_migration.dest_node
self.source_migration.dest_host = target_cell_migration.dest_host
self.source_migration.save()
return target_cell_migration
def _prep_resize_at_source(self):
"""Executes PrepResizeAtSourceTask
:return: The image snapshot ID if the instance is not volume-backed,
else None.
"""
LOG.debug('Preparing source host %s for cross-cell resize.',
self.source_migration.source_compute, instance=self.instance)
prep_source_task = PrepResizeAtSourceTask(
self.context, self.instance, self.flavor, self.source_migration,
self.request_spec, self.compute_rpcapi, self.image_api)
snapshot_id = prep_source_task.execute()
self._completed_tasks['PrepResizeAtSourceTask'] = prep_source_task
return snapshot_id
def _finish_resize_at_dest(
self, target_cell_migration, target_cell_mapping, snapshot_id):
"""Executes FinishResizeAtDestTask
:param target_cell_migration: Migration object from the target cell DB
:param target_cell_mapping: CellMapping object for the target cell
:param snapshot_id: ID of the image snapshot to use for a
non-volume-backed instance.
"""
task = FinishResizeAtDestTask(
self._target_cell_context, self._target_cell_instance,
target_cell_migration, self.instance, self.compute_rpcapi,
target_cell_mapping, snapshot_id, self.request_spec)
task.execute()
self._completed_tasks['FinishResizeAtDestTask'] = task
def _execute(self):
"""Execute high-level orchestration of the cross-cell resize"""
# We are committed to a cross-cell move at this point so update the
# migration record to reflect that. If we fail after this we are not
# going to go back and try to run the MigrationTask to do a same-cell
# migration, so we set the cross_cell_move flag early for audit/debug
# in case something fails later and the operator wants to know if this
# was a cross-cell or same-cell move operation.
self.source_migration.cross_cell_move = True
self.source_migration.save()
# Make sure neutron APIs we need are available.
self._perform_external_api_checks()
# Before preparing the target host create the instance record data
# in the target cell database since we cannot do anything in the
# target cell without having an instance record there. Remember that
# we lose the cell-targeting on the request context over RPC so we
# cannot simply pass the source cell context and instance over RPC
# to the target compute host and assume changes get mirrored back to
# the source cell database.
target_cell_migration, target_cell_mapping = (
self._setup_target_cell_db())
# Claim resources and validate the selected host in the target cell.
target_cell_migration = self._prep_resize_at_dest(
target_cell_migration)
# Prepare the instance at the source host (stop it, optionally snapshot
# it, disconnect volumes and VIFs, etc).
snapshot_id = self._prep_resize_at_source()
# Finish the resize at the destination host, swap the hidden fields
# on the instances and update the instance mapping.
self._finish_resize_at_dest(
target_cell_migration, target_cell_mapping, snapshot_id)
def rollback(self, ex):
"""Rollback based on how sub-tasks completed
Sub-tasks should rollback appropriately for whatever they do but here
we need to handle cleaning anything up from successful tasks, e.g. if
tasks A and B were successful but task C fails, then we might need to
cleanup changes from A and B here.
"""
# Rollback the completed tasks in reverse order.
for task_name in reversed(self._completed_tasks):
try:
self._completed_tasks[task_name].rollback(ex)
except Exception:
LOG.exception('Rollback for task %s failed.', task_name)
def get_inst_and_cell_map_from_source(
target_cell_context, source_compute, instance_uuid):
"""Queries the instance from the source cell database.
:param target_cell_context: nova auth request context targeted at the
target cell database
:param source_compute: name of the source compute service host
:param instance_uuid: UUID of the instance
:returns: 2-item tuple of:
- Instance object from the source cell database.
- CellMapping object of the source cell mapping
"""
# We can get the source cell via the host mapping based on the
# source_compute in the migration object.
source_host_mapping = objects.HostMapping.get_by_host(
target_cell_context, source_compute)
source_cell_mapping = source_host_mapping.cell_mapping
# Clone the context targeted at the target cell and then target the
# clone at the source cell.
source_cell_context = copy.copy(target_cell_context)
nova_context.set_target_cell(source_cell_context, source_cell_mapping)
# Now get the instance from the source cell DB using the source
# cell context which will make the source cell instance permanently
# targeted to the source cell database.
instance = objects.Instance.get_by_uuid(
source_cell_context, instance_uuid,
expected_attrs=['flavor', 'info_cache', 'system_metadata'])
return instance, source_cell_mapping
class ConfirmResizeTask(base.TaskBase):
"""Task which orchestrates a cross-cell resize confirm operation
When confirming a cross-cell resize, the instance is in both the source
and target cell databases and on the source and target compute hosts.
The API operation is performed on the target cell instance and it is the
job of this task to cleanup the source cell host and database and
update the status of the instance in the target cell.
This can be called either asynchronously from the API service during a
normal confirmResize server action or synchronously when deleting a server
in VERIFY_RESIZE status.
"""
def __init__(self, context, instance, migration, legacy_notifier,
compute_rpcapi):
"""Initialize this ConfirmResizeTask instance
:param context: nova auth request context targeted at the target cell
:param instance: Instance object in "resized" status from the target
cell
:param migration: Migration object from the target cell for the resize
operation expected to have status "confirming"
:param legacy_notifier: LegacyValidatingNotifier for sending legacy
unversioned notifications
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
"""
super(ConfirmResizeTask, self).__init__(context, instance)
self.migration = migration
self.legacy_notifier = legacy_notifier
self.compute_rpcapi = compute_rpcapi
def _send_resize_confirm_notification(self, instance, phase):
"""Sends an unversioned and versioned resize.confirm.(phase)
notification.
:param instance: The instance whose resize is being confirmed.
:param phase: The phase for the resize.confirm operation (either
"start" or "end").
"""
ctxt = instance._context
# Send the legacy unversioned notification.
compute_utils.notify_about_instance_usage(
self.legacy_notifier, ctxt, instance, 'resize.confirm.%s' % phase)
# Send the versioned notification.
compute_utils.notify_about_instance_action(
ctxt, instance, CONF.host,
action=fields.NotificationAction.RESIZE_CONFIRM,
phase=phase)
def _cleanup_source_host(self, source_instance):
"""Cleans up the instance from the source host.
Creates a confirmResize instance action in the source cell DB.
Destroys the guest from the source hypervisor, cleans up networking
and storage and frees up resource usage on the source host.
:param source_instance: Instance object from the source cell DB
"""
ctxt = source_instance._context
# The confirmResize instance action has to be created in the source
# cell database before calling the compute service to properly
# track action events. Note that the API created the same action
# record but on the target cell instance.
objects.InstanceAction.action_start(
ctxt, source_instance.uuid, instance_actions.CONFIRM_RESIZE,
want_result=False)
# Get the Migration record from the source cell database.
source_migration = objects.Migration.get_by_uuid(
ctxt, self.migration.uuid)
LOG.debug('Cleaning up source host %s for cross-cell resize confirm.',
source_migration.source_compute, instance=source_instance)
# Use the EventReport context manager to create the same event that
# the source compute will create but in the target cell DB so we do not
# have to explicitly copy it over from source to target DB.
event_name = 'compute_confirm_snapshot_based_resize_at_source'
with compute_utils.EventReporter(
self.context, event_name, source_migration.source_compute,
source_instance.uuid):
self.compute_rpcapi.confirm_snapshot_based_resize_at_source(
ctxt, source_instance, source_migration)
def _finish_confirm_in_target_cell(self):
"""Sets "terminal" states on the migration and instance in target cell.
This is similar to how ``confirm_resize`` works in the compute service
for same-cell resize.
"""
LOG.debug('Updating migration and instance status in target cell DB.',
instance=self.instance)
# Complete the migration confirmation.
self.migration.status = 'confirmed'
self.migration.save()
# Update the target cell instance.
# Delete stashed information for the resize.
self.instance.old_flavor = None
self.instance.new_flavor = None
self.instance.system_metadata.pop('old_vm_state', None)
self._set_vm_and_task_state()
self.instance.drop_migration_context()
# There are multiple possible task_states set on the instance because
# if we are called from the confirmResize instance action the
# task_state should be None, but if we are called from
# _confirm_resize_on_deleting then the instance is being deleted.
self.instance.save(expected_task_state=[
None, task_states.DELETING, task_states.SOFT_DELETING])
def _set_vm_and_task_state(self):
"""Sets the target cell instance vm_state based on the power_state.
The task_state is set to None.
"""
# The old_vm_state could be STOPPED but the user might have manually
# powered up the instance to confirm the resize/migrate, so we need to
# check the current power state on the instance and set the vm_state
# appropriately. We default to ACTIVE because if the power state is
# not SHUTDOWN, we assume the _sync_power_states periodic task in the
# compute service will clean it up.
p_state = self.instance.power_state
if p_state == power_state.SHUTDOWN:
vm_state = vm_states.STOPPED
LOG.debug("Resized/migrated instance is powered off. "
"Setting vm_state to '%s'.", vm_state,
instance=self.instance)
else:
vm_state = vm_states.ACTIVE
self.instance.vm_state = vm_state
self.instance.task_state = None
def _execute(self):
# First get the instance from the source cell so we can cleanup.
source_cell_instance = get_inst_and_cell_map_from_source(
self.context, self.migration.source_compute, self.instance.uuid)[0]
# Send the resize.confirm.start notification(s) using the source
# cell instance since we start there.
self._send_resize_confirm_notification(
source_cell_instance, fields.NotificationPhase.START)
# RPC call the source compute to cleanup.
self._cleanup_source_host(source_cell_instance)
# Now we can delete the instance in the source cell database.
LOG.info('Deleting instance record from source cell %s',
source_cell_instance._context.cell_uuid,
instance=source_cell_instance)
# This needs to be a hard delete because we want to be able to resize
# back to this cell without hitting a duplicate entry unique constraint
# error.
source_cell_instance.destroy(hard_delete=True)
# Update the information in the target cell database.
self._finish_confirm_in_target_cell()
# Send the resize.confirm.end notification using the target cell
# instance since we end there.
self._send_resize_confirm_notification(
self.instance, fields.NotificationPhase.END)
def rollback(self, ex):
with excutils.save_and_reraise_exception():
LOG.exception(
'An error occurred while confirming the resize for instance '
'in target cell %s. Depending on the error, a copy of the '
'instance may still exist in the source cell database which '
'contains the source host %s. At this point the instance is '
'on the target host %s and anything left in the source cell '
'can be cleaned up.', self.context.cell_uuid,
self.migration.source_compute, self.migration.dest_compute,
instance=self.instance)
# If anything failed set the migration status to 'error'.
self.migration.status = 'error'
self.migration.save()
# Put the instance in the target DB into ERROR status, record
# a fault and send an error notification.
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
request_spec = objects.RequestSpec.get_by_instance_uuid(
self.context, self.instance.uuid)
scheduler_utils.set_vm_state_and_notify(
self.context, self.instance.uuid, 'compute_task',
'migrate_server', updates, ex, request_spec)
class RevertResizeTask(base.TaskBase):
"""Task to orchestrate a cross-cell resize revert operation.
This task is responsible for coordinating the cleanup of the resources
in the target cell and restoring the server and its related resources
(e.g. networking and volumes) in the source cell.
Upon successful completion the instance mapping should point back at the
source cell, the source cell instance should no longer be hidden and the
instance in the target cell should be destroyed.
"""
def __init__(self, context, instance, migration, legacy_notifier,
compute_rpcapi):
"""Initialize this RevertResizeTask instance
:param context: nova auth request context targeted at the target cell
:param instance: Instance object in "resized" status from the target
cell with task_state "resize_reverting"
:param migration: Migration object from the target cell for the resize
operation expected to have status "reverting"
:param legacy_notifier: LegacyValidatingNotifier for sending legacy
unversioned notifications
:param compute_rpcapi: instance of nova.compute.rpcapi.ComputeAPI
"""
super(RevertResizeTask, self).__init__(context, instance)
self.migration = migration
self.legacy_notifier = legacy_notifier
self.compute_rpcapi = compute_rpcapi
# These are used for rollback handling.
self._source_cell_migration = None
self._source_cell_instance = None
self.volume_api = cinder.API()
def _send_resize_revert_notification(self, instance, phase):
"""Sends an unversioned and versioned resize.revert.(phase)
notification.
:param instance: The instance whose resize is being reverted.
:param phase: The phase for the resize.revert operation (either
"start" or "end").
"""
ctxt = instance._context
# Send the legacy unversioned notification.
compute_utils.notify_about_instance_usage(
self.legacy_notifier, ctxt, instance, 'resize.revert.%s' % phase)
# Send the versioned notification.
compute_utils.notify_about_instance_action(
ctxt, instance, CONF.host,
action=fields.NotificationAction.RESIZE_REVERT,
phase=phase)
@staticmethod
def _update_source_obj_from_target_cell(source_obj, target_obj):
"""Updates the object from the source cell using the target cell object
WARNING: This method does not support objects with nested objects, i.e.
objects that have fields which are other objects. An error will be
raised in that case.
All fields on the source object are updated from the target object
except for the ``id`` and ``created_at`` fields since those value must
not change during an update. The ``updated_at`` field is also skipped
because saving changes to ``source_obj`` will automatically update the
``updated_at`` field.
It is expected that the two objects represent the same thing but from
different cell databases, so for example, a uuid field (if one exists)
should not change.
Note that the changes to ``source_obj`` are not persisted in this
method.
:param source_obj: Versioned object from the source cell database
:param target_obj: Versioned object from the target cell database
:raises: ObjectActionError if nested object fields are encountered
"""
ignore_fields = ['created_at', 'id', 'updated_at']
for field in source_obj.obj_fields:
if field in target_obj and field not in ignore_fields:
if isinstance(source_obj.fields[field], fields.ObjectField):
raise exception.ObjectActionError(
action='_update_source_obj_from_target_cell',
reason='nested objects are not supported')
setattr(source_obj, field, getattr(target_obj, field))
def _update_bdms_in_source_cell(self, source_cell_context):
"""Update BlockDeviceMapppings in the source cell database.
It is possible to attach/detach volumes to/from a resized instance,
which would create/delete BDM records in the target cell, so we have
to recreate newly attached BDMs in the source cell database and
delete any old BDMs that were detached while resized in the target
cell.
:param source_cell_context: nova auth request context targeted at the
source cell database
"""
bdms_from_source_cell = (
objects.BlockDeviceMappingList.get_by_instance_uuid(
source_cell_context, self.instance.uuid))
source_cell_bdms_by_uuid = {
bdm.uuid: bdm for bdm in bdms_from_source_cell}
bdms_from_target_cell = (
objects.BlockDeviceMappingList.get_by_instance_uuid(
self.context, self.instance.uuid))
# Copy new/updated BDMs from the target cell DB to the source cell DB.
for bdm in bdms_from_target_cell:
if bdm.uuid in source_cell_bdms_by_uuid:
# Remove this BDM from the list since we want to preserve it
# along with its attachment_id.
source_cell_bdms_by_uuid.pop(bdm.uuid)
else:
# Newly attached BDM while in the target cell, so create it
# in the source cell.
source_bdm = clone_creatable_object(source_cell_context, bdm)
# revert_snapshot_based_resize_at_dest is going to delete the
# attachment for this BDM so we need to create a new empty
# attachment to reserve this volume so that
# finish_revert_snapshot_based_resize_at_source can use it.
attach_ref = self.volume_api.attachment_create(
source_cell_context, bdm.volume_id, self.instance.uuid)
source_bdm.attachment_id = attach_ref['id']
LOG.debug('Creating BlockDeviceMapping with volume ID %s '
'and attachment %s in the source cell database '
'since the volume was attached while the server was '
'resized.', bdm.volume_id, attach_ref['id'],
instance=self.instance)
source_bdm.create()
# If there are any source bdms left that were not processed from the
# target cell bdms, it means those source bdms were detached while
# resized in the target cell, and we need to delete them from the
# source cell so they don't re-appear once the revert is complete.
self._delete_orphan_source_cell_bdms(source_cell_bdms_by_uuid.values())
def _delete_orphan_source_cell_bdms(self, source_cell_bdms):
"""Deletes orphaned BDMs and volume attachments from the source cell.
If any volumes were detached while the server was resized into the
target cell they are destroyed here so they do not show up again once
the instance is mapped back to the source cell.
:param source_cell_bdms: Iterator of BlockDeviceMapping objects.
"""
for bdm in source_cell_bdms:
LOG.debug('Destroying BlockDeviceMapping with volume ID %s and '
'attachment ID %s from source cell database during '
'cross-cell resize revert since the volume was detached '
'while the server was resized.', bdm.volume_id,
bdm.attachment_id, instance=self.instance)
# First delete the (empty) attachment, created by
# prep_snapshot_based_resize_at_source, so it is not leaked.
try:
self.volume_api.attachment_delete(
bdm._context, bdm.attachment_id)
except Exception as e:
LOG.error('Failed to delete attachment %s for volume %s. The '
'attachment may be leaked and needs to be manually '
'cleaned up. Error: %s', bdm.attachment_id,
bdm.volume_id, e, instance=self.instance)
bdm.destroy()
def _update_instance_actions_in_source_cell(self, source_cell_context):
"""Update instance action records in the source cell database
We need to copy the REVERT_RESIZE instance action and related events
from the target cell to the source cell. Otherwise the revert operation
in the source compute service will not be able to lookup the correct
instance action to track events.
:param source_cell_context: nova auth request context targeted at the
source cell database
"""
# FIXME(mriedem): This is a hack to just get revert working on
# the source; we need to re-create any actions created in the target
# cell DB after the instance was moved while it was in
# VERIFY_RESIZE status, like if volumes were attached/detached.
# Can we use a changes-since filter for that, i.e. find the last
# instance action for the instance in the source cell database and then
# get all instance actions from the target cell database that were
# created after that time.
action = objects.InstanceAction.get_by_request_id(
self.context, self.instance.uuid, self.context.request_id)
new_action = clone_creatable_object(source_cell_context, action)
new_action.create()
# Also create the events under this action.
events = objects.InstanceActionEventList.get_by_action(
self.context, action.id)
for event in events:
new_event = clone_creatable_object(source_cell_context, event)
new_event.create(action.instance_uuid, action.request_id)
def _update_migration_in_source_cell(self, source_cell_context):
"""Update the migration record in the source cell database.
Updates the migration record in the source cell database based on the
current information about the migration in the target cell database.
:param source_cell_context: nova auth request context targeted at the
source cell database
:return: Migration object of the updated source cell database migration
record
"""
source_cell_migration = objects.Migration.get_by_uuid(
source_cell_context, self.migration.uuid)
# The only change we really expect here is the status changing to
# "reverting".
self._update_source_obj_from_target_cell(
source_cell_migration, self.migration)
source_cell_migration.save()
return source_cell_migration
def _update_instance_in_source_cell(self, instance):
"""Updates the instance and related records in the source cell DB.
Before reverting in the source cell we need to copy the
latest state information from the target cell database where the
instance lived before the revert. This is because data about the
instance could have changed while it was in VERIFY_RESIZE status, like
attached volumes.
:param instance: Instance object from the source cell database
:return: Migration object of the updated source cell database migration
record
"""
LOG.debug('Updating instance-related records in the source cell '
'database based on target cell database information.',
instance=instance)
# Copy information from the target cell instance that we need in the
# source cell instance for doing the revert on the source compute host.
instance.system_metadata['old_vm_state'] = (
self.instance.system_metadata.get('old_vm_state'))
instance.task_state = task_states.RESIZE_REVERTING
instance.save()
source_cell_context = instance._context
self._update_bdms_in_source_cell(source_cell_context)
self._update_instance_actions_in_source_cell(source_cell_context)
source_cell_migration = self._update_migration_in_source_cell(
source_cell_context)
# NOTE(mriedem): We do not have to worry about ports changing while
# resized since the API does not allow attach/detach interface while
# resized. Same for tags.
return source_cell_migration
def _update_instance_mapping(
self, source_cell_instance, source_cell_mapping):
"""Swaps the hidden field value on the source and target cell instance
and updates the instance mapping to point at the source cell.
:param source_cell_instance: Instance object from the source cell DB
:param source_cell_mapping: CellMapping object for the source cell
"""
LOG.debug('Marking instance in target cell as hidden and updating '
'instance mapping to point at source cell %s.',
source_cell_mapping.identity, instance=source_cell_instance)
# Get the instance mapping first to make the window of time where both
# instances are hidden=False as small as possible.
instance_mapping = objects.InstanceMapping.get_by_instance_uuid(
self.context, self.instance.uuid)
# Mark the source cell instance record as hidden=False so it will show
# up when listing servers. Note that because of how the API filters
# duplicate instance records, even if the user is listing servers at
# this exact moment only one copy of the instance will be returned.
source_cell_instance.hidden = False
source_cell_instance.save()
# Update the instance mapping to point at the source cell. We do this
# before cleaning up the target host/cell because that is really best
# effort and if something fails on the target we want the user to
# now interact with the instance in the source cell with the original
# flavor because they are ultimately trying to revert and get back
# there, so if they hard reboot/rebuild after an error (for example)
# that should happen in the source cell.
instance_mapping.cell_mapping = source_cell_mapping
instance_mapping.save()
# Mark the target cell instance record as hidden=True to hide it from
# the user when listing servers.
self.instance.hidden = True
self.instance.save()
def _execute(self):
# Send the resize.revert.start notification(s) using the target
# cell instance since we start there.
self._send_resize_revert_notification(
self.instance, fields.NotificationPhase.START)
source_cell_instance, source_cell_mapping = (
get_inst_and_cell_map_from_source(
self.context, self.migration.source_compute,
self.instance.uuid))
self._source_cell_instance = source_cell_instance
# Update the source cell database information based on the target cell
# database, i.e. the instance/migration/BDMs/action records. Do all of
# this before updating the instance mapping in case it fails.
source_cell_migration = self._update_instance_in_source_cell(
source_cell_instance)
# Swap the instance.hidden values and update the instance mapping to
# point at the source cell. From here on out the user will see and
# operate on the instance in the source cell.
self._update_instance_mapping(
source_cell_instance, source_cell_mapping)
# Save off the source cell migration record for rollbacks.
self._source_cell_migration = source_cell_migration
# Clean the instance from the target host.
LOG.debug('Calling destination host %s to revert cross-cell resize.',
self.migration.dest_compute, instance=self.instance)
# Use the EventReport context manager to create the same event that
# the dest compute will create but in the source cell DB so we do not
# have to explicitly copy it over from target to source DB.
event_name = 'compute_revert_snapshot_based_resize_at_dest'
with compute_utils.EventReporter(
source_cell_instance._context, event_name,
self.migration.dest_compute, self.instance.uuid):
self.compute_rpcapi.revert_snapshot_based_resize_at_dest(
self.context, self.instance, self.migration)
# NOTE(mriedem): revert_snapshot_based_resize_at_dest updates the
# target cell instance so if we need to do something with it here
# in the future before destroying it, it should be refreshed.
# Destroy the instance and its related records from the target cell DB.
LOG.info('Deleting instance record from target cell %s',
self.context.cell_uuid, instance=source_cell_instance)
# This needs to be a hard delete because if we retry the resize to the
# target cell we could hit a duplicate entry unique constraint error.
self.instance.destroy(hard_delete=True)
# Launch the guest at the source host with the old flavor.
LOG.debug('Calling source host %s to finish reverting cross-cell '
'resize.', self.migration.source_compute,
instance=self.instance)
self.compute_rpcapi.finish_revert_snapshot_based_resize_at_source(
source_cell_instance._context, source_cell_instance,
source_cell_migration)
# finish_revert_snapshot_based_resize_at_source updates the source cell
# instance so refresh it here so we have the latest copy.
source_cell_instance.refresh()
# Finish the conductor_revert_snapshot_based_resize event in the source
# cell DB. ComputeTaskManager.revert_snapshot_based_resize uses the
# wrap_instance_event decorator to create this action/event in the
# target cell DB but now that the target cell instance is gone the
# event needs to show up in the source cell DB.
objects.InstanceActionEvent.event_finish(
source_cell_instance._context, source_cell_instance.uuid,
'conductor_revert_snapshot_based_resize', want_result=False)
# Send the resize.revert.end notification using the instance from
# the source cell since we end there.
self._send_resize_revert_notification(
source_cell_instance, fields.NotificationPhase.END)
def rollback(self, ex):
with excutils.save_and_reraise_exception():
# If we have updated the instance mapping to point at the source
# cell we update the records in the source cell, otherwise we
# update the records in the target cell.
instance_at_source = self._source_cell_migration is not None
migration = self._source_cell_migration or self.migration
instance = self._source_cell_instance or self.instance
# NOTE(mriedem): This exception log is fairly generic. We could
# probably make this more targeted based on what we know of the
# state of the system if we want to make it more detailed, e.g.
# the execute method could "record" checkpoints to be used here
# or we could check to see if the instance was deleted from the
# target cell by trying to refresh it and handle InstanceNotFound.
LOG.exception(
'An error occurred while reverting the resize for instance. '
'The instance is mapped to the %s cell %s. If the instance '
'was deleted from the target cell %s then the target host %s '
'was already cleaned up. If the instance is back in the '
'source cell then you can try hard-rebooting it to recover.',
('source' if instance_at_source else 'target'),
migration._context.cell_uuid, self.context.cell_uuid,
migration.dest_compute, instance=instance)
# If anything failed set the migration status to 'error'.
migration.status = 'error'
migration.save()
# Put the instance into ERROR status, record a fault and send an
# error notification.
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
request_spec = objects.RequestSpec.get_by_instance_uuid(
self.context, instance.uuid)
scheduler_utils.set_vm_state_and_notify(
instance._context, instance.uuid, 'compute_task',
'migrate_server', updates, ex, request_spec)