Cyborg evacuate support

This change extends the conductor manager
to append the cyborg resource request to the
request spec when performing an evacuate.

This change passes the ARQs to spawn during rebuild
and evacuate. On evacuate the existing ARQs will be deleted
and new ARQs will be created and bound, during rebuild the
existing ARQs are reused.

This change extends the rebuild_instance compute rpcapi
function to carry the arq_uuids. This eliminates the
need to lookup the uuids associated with the arqs assinged
to the instance by quering cyborg.

Co-Authored-By: Wenping Song <songwenping@inspur.com>
Co-Authored-By: Brin Zhang <zhangbailin@inspur.com>

Implements: blueprint cyborg-rebuild-and-evacuate
Change-Id: I147bf4d95e6d86ff1f967a8ce37260730f21d236
This commit is contained in:
Sean Mooney 2020-03-26 23:01:35 +00:00 committed by Balazs Gibizer
parent 75b5535e34
commit 1356ef5b57
17 changed files with 414 additions and 191 deletions

View File

@ -29,9 +29,8 @@ appropriate privileges) must do the following:
openstack server create --flavor $myflavor --image $myimage $servername
As of 21.0.0 (Ussuri), nova supports only specific operations for instances
with accelerators. The lists of supported and unsupported operations are as
below:
Nova supports only specific operations for instances with accelerators.
The lists of supported and unsupported operations are as below:
* Supported operations.
@ -42,17 +41,21 @@ below:
* Take a snapshot.
* Backup.
* Rescue and unrescue.
* Rebuild.
* Evacuate.
* Unsupported operations
* Rebuild.
* Resize.
* Evacuate.
* Suspend and resume.
* Shelve and unshelve.
* Cold migration.
* Live migration.
.. versionchanged:: 22.0.0(Victoria)
Added support for rebuild and evacuate operations.
Some operations, such as lock and unlock, work as they are effectively
no-ops for accelerators.

View File

@ -302,3 +302,12 @@ class _CyborgClient(object):
if err_msg:
# No point raising an exception.
LOG.error('Failed to delete ARQs %s', arq_uuid_str)
def get_arq_uuids_for_instance(self, instance):
"""Get ARQ UUIDs for the instance.
:param instance: Instance Object
:return: ARQ UUIDs.
"""
return [arq['uuid']
for arq in self.get_arqs_for_instance(instance.uuid)]

View File

@ -109,6 +109,8 @@ MIN_COMPUTE_SAME_HOST_COLD_MIGRATE = 48
# TODO(huaqiang): Remove in Wallaby
MIN_VER_NOVA_COMPUTE_MIXED_POLICY = 52
SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD = 53
# FIXME(danms): Keep a global cache of the cells we find the
# first time we look. This needs to be refreshed on a timer or
# trigger.
@ -307,14 +309,27 @@ def _get_image_meta_obj(image_meta_dict):
return image_meta
def block_accelerators(func):
@functools.wraps(func)
def wrapper(self, context, instance, *args, **kwargs):
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
if dp_name:
raise exception.ForbiddenWithAccelerators()
return func(self, context, instance, *args, **kwargs)
return wrapper
def block_accelerators(until_service=None):
def inner(func):
@functools.wraps(func)
def wrapper(self, context, instance, *args, **kwargs):
# NOTE(brinzhang): Catch a request operating a mixed instance,
# make sure all nova-compute services have been upgraded and
# support the accelerators.
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
service_support = False
if not dp_name:
service_support = True
elif until_service:
min_version = objects.service.get_minimum_version_all_cells(
nova_context.get_admin_context(), ['nova-compute'])
if min_version >= until_service:
service_support = True
if not service_support:
raise exception.ForbiddenWithAccelerators()
return func(self, context, instance, *args, **kwargs)
return wrapper
return inner
@profiler.trace_cls("compute_api")
@ -3393,7 +3408,7 @@ class API(base.Base):
fields_obj.Architecture.canonicalize(img_arch)
@reject_vtpm_instances(instance_actions.REBUILD)
@block_accelerators
@block_accelerators(until_service=SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD)
# TODO(stephenfin): We should expand kwargs out to named args
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
@ -3930,7 +3945,7 @@ class API(base.Base):
# finally split resize and cold migration into separate code paths
# TODO(stephenfin): The 'block_accelerators' decorator doesn't take into
# account the accelerators requested in the new flavor
@block_accelerators
@block_accelerators()
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
@check_instance_host(check_is_up=True)
@ -4159,7 +4174,7 @@ class API(base.Base):
return allow_same_host
@reject_vtpm_instances(instance_actions.SHELVE)
@block_accelerators
@block_accelerators()
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.PAUSED, vm_states.SUSPENDED])
@ -4324,7 +4339,7 @@ class API(base.Base):
return self.compute_rpcapi.get_instance_diagnostics(context,
instance=instance)
@block_accelerators
@block_accelerators()
@reject_sev_instances(instance_actions.SUSPEND)
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE])
@ -5028,7 +5043,7 @@ class API(base.Base):
diff=diff)
return _metadata
@block_accelerators
@block_accelerators()
@reject_vtpm_instances(instance_actions.LIVE_MIGRATION)
@reject_sev_instances(instance_actions.LIVE_MIGRATION)
@check_instance_lock
@ -5160,7 +5175,7 @@ class API(base.Base):
instance, migration.id)
@reject_vtpm_instances(instance_actions.EVACUATE)
@block_accelerators
@block_accelerators(until_service=SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD)
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.ERROR])
def evacuate(self, context, instance, host, on_shared_storage,

View File

@ -530,7 +530,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='5.11')
target = messaging.Target(version='5.12')
def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
@ -3256,18 +3256,29 @@ class ComputeManager(manager.Manager):
migration.status = status
migration.save()
def _rebuild_default_impl(self, context, instance, image_meta,
injected_files, admin_password, allocations,
bdms, detach_block_devices, attach_block_devices,
network_info=None,
evacuate=False, block_device_info=None,
preserve_ephemeral=False):
def _rebuild_default_impl(
self, context, instance, image_meta, injected_files,
admin_password, allocations, bdms, detach_block_devices,
attach_block_devices, network_info=None, evacuate=False,
block_device_info=None, preserve_ephemeral=False,
accel_uuids=None):
if preserve_ephemeral:
# The default code path does not support preserving ephemeral
# partitions.
raise exception.PreserveEphemeralNotSupported()
accel_info = []
if evacuate:
if instance.flavor.extra_specs.get('accel:device_profile'):
try:
accel_info = self._get_bound_arq_resources(
context, instance, accel_uuids or [])
except (Exception, eventlet.timeout.Timeout) as exc:
LOG.exception(exc)
self._build_resources_cleanup(instance, network_info)
msg = _('Failure getting accelerator resources.')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
detach_block_devices(context, bdms)
else:
self._power_off_instance(instance, clean_shutdown=True)
@ -3275,6 +3286,14 @@ class ComputeManager(manager.Manager):
self.driver.destroy(context, instance,
network_info=network_info,
block_device_info=block_device_info)
try:
accel_info = self._get_accel_info(context, instance)
except Exception as exc:
LOG.exception(exc)
self._build_resources_cleanup(instance, network_info)
msg = _('Failure getting accelerator resources.')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
instance.task_state = task_states.REBUILD_BLOCK_DEVICE_MAPPING
instance.save(expected_task_state=[task_states.REBUILDING])
@ -3289,7 +3308,8 @@ class ComputeManager(manager.Manager):
self.driver.spawn(context, instance, image_meta, injected_files,
admin_password, allocations,
network_info=network_info,
block_device_info=new_block_device_info)
block_device_info=new_block_device_info,
accel_info=accel_info)
def _notify_instance_rebuild_error(self, context, instance, error, bdms):
self._notify_about_instance_usage(context, instance,
@ -3298,7 +3318,8 @@ class ComputeManager(manager.Manager):
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=error, bdms=bdms)
@messaging.expected_exceptions(exception.PreserveEphemeralNotSupported)
@messaging.expected_exceptions(exception.PreserveEphemeralNotSupported,
exception.BuildAbortException)
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@ -3307,7 +3328,7 @@ class ComputeManager(manager.Manager):
injected_files, new_pass, orig_sys_metadata,
bdms, recreate, on_shared_storage,
preserve_ephemeral, migration,
scheduled_node, limits, request_spec):
scheduled_node, limits, request_spec, accel_uuids):
"""Destroy and re-make this instance.
A 'rebuild' effectively purges all existing data from the system and
@ -3338,6 +3359,7 @@ class ComputeManager(manager.Manager):
:param limits: Overcommit limits set by the scheduler. If a host was
specified by the user, this will be None
:param request_spec: a RequestSpec object used to schedule the instance
:param accel_uuids: a list of cyborg ARQ uuids.
"""
# recreate=True means the instance is being evacuated from a failed
@ -3402,7 +3424,7 @@ class ComputeManager(manager.Manager):
image_meta, injected_files, new_pass, orig_sys_metadata,
bdms, evacuate, on_shared_storage, preserve_ephemeral,
migration, request_spec, allocs, rebuild_claim,
scheduled_node, limits)
scheduled_node, limits, accel_uuids)
except (exception.ComputeResourcesUnavailable,
exception.RescheduledException) as e:
if isinstance(e, exception.ComputeResourcesUnavailable):
@ -3469,7 +3491,7 @@ class ComputeManager(manager.Manager):
self, context, instance, orig_image_ref, image_meta,
injected_files, new_pass, orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral, migration, request_spec,
allocations, rebuild_claim, scheduled_node, limits):
allocations, rebuild_claim, scheduled_node, limits, accel_uuids):
"""Helper to avoid deep nesting in the top-level method."""
provider_mapping = None
@ -3490,7 +3512,7 @@ class ComputeManager(manager.Manager):
context, instance, orig_image_ref, image_meta, injected_files,
new_pass, orig_sys_metadata, bdms, evacuate, on_shared_storage,
preserve_ephemeral, migration, request_spec, allocations,
provider_mapping)
provider_mapping, accel_uuids)
@staticmethod
def _get_image_name(image_meta):
@ -3499,12 +3521,12 @@ class ComputeManager(manager.Manager):
else:
return ''
def _do_rebuild_instance(self, context, instance, orig_image_ref,
image_meta, injected_files, new_pass,
orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral,
migration, request_spec, allocations,
request_group_resource_providers_mapping):
def _do_rebuild_instance(
self, context, instance, orig_image_ref, image_meta,
injected_files, new_pass, orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral, migration, request_spec,
allocations, request_group_resource_providers_mapping,
accel_uuids):
orig_vm_state = instance.vm_state
if evacuate:
@ -3645,7 +3667,8 @@ class ComputeManager(manager.Manager):
block_device_info=block_device_info,
network_info=network_info,
preserve_ephemeral=preserve_ephemeral,
evacuate=evacuate)
evacuate=evacuate,
accel_uuids=accel_uuids)
try:
with instance.mutated_migration_context():
self.driver.rebuild(**kwargs)

View File

@ -378,6 +378,8 @@ class ComputeAPI(object):
* 5.10 - Add finish_revert_snapshot_based_resize_at_source()
* 5.11 - Add accel_uuids (accelerator requests) parameter to
build_and_run_instance()
* 5.12 - Add accel_uuids (accelerator requests) parameter to
rebuild_instance()
'''
VERSION_ALIASES = {
@ -1056,20 +1058,29 @@ class ComputeAPI(object):
block_device_info=block_device_info,
reboot_type=reboot_type)
def rebuild_instance(self, ctxt, instance, new_pass, injected_files,
def rebuild_instance(
self, ctxt, instance, new_pass, injected_files,
image_ref, orig_image_ref, orig_sys_metadata, bdms,
recreate, on_shared_storage, host, node,
preserve_ephemeral, migration, limits, request_spec):
preserve_ephemeral, migration, limits, request_spec, accel_uuids):
# NOTE(edleafe): compute nodes can only use the dict form of limits.
if isinstance(limits, objects.SchedulerLimits):
limits = limits.to_dict()
msg_args = {'preserve_ephemeral': preserve_ephemeral,
'migration': migration,
'scheduled_node': node,
'limits': limits,
'request_spec': request_spec}
version = '5.0'
msg_args = {
'preserve_ephemeral': preserve_ephemeral,
'migration': migration,
'scheduled_node': node,
'limits': limits,
'request_spec': request_spec,
'accel_uuids': accel_uuids
}
version = '5.12'
client = self.router.client(ctxt)
if not client.can_send_version(version):
del msg_args['accel_uuids']
version = '5.0'
cctxt = client.prepare(server=_compute_host(host, instance),
version=version)
cctxt.cast(ctxt, 'rebuild_instance',

View File

@ -1147,14 +1147,21 @@ class ComputeTaskManager(base.Base):
# is not forced to be the original host
request_spec.reset_forced_destinations()
port_res_req = (
external_resources = []
external_resources += (
self.network_api.get_requested_resource_for_instance(
context, instance.uuid))
# NOTE(gibi): When cyborg or other module wants to handle
# similar non-nova resources then here we have to collect
# all the external resource requests in a single list and
extra_specs = request_spec.flavor.extra_specs
device_profile = extra_specs.get('accel:device_profile')
external_resources.extend(
cyborg.get_device_profile_request_groups(
context, device_profile)
if device_profile else [])
# NOTE(gibi): When other modules want to handle similar
# non-nova resources then here we have to collect all
# the external resource requests in a single list and
# add them to the RequestSpec.
request_spec.requested_resources = port_res_req
request_spec.requested_resources = external_resources
try:
# if this is a rebuild of instance on the same host with
@ -1219,21 +1226,49 @@ class ComputeTaskManager(base.Base):
instance.availability_zone = (
availability_zones.get_host_availability_zone(
context, host))
try:
accel_uuids = self._rebuild_cyborg_arq(
context, instance, host, request_spec, evacuate)
except exception.AcceleratorRequestBindingFailed as exc:
cyclient = cyborg.get_client(context)
cyclient.delete_arqs_by_uuid(exc.arqs)
LOG.exception('Failed to rebuild. Reason: %s', exc)
raise exc
self.compute_rpcapi.rebuild_instance(context,
instance=instance,
new_pass=new_pass,
injected_files=injected_files,
image_ref=image_ref,
orig_image_ref=orig_image_ref,
orig_sys_metadata=orig_sys_metadata,
bdms=bdms,
recreate=evacuate,
on_shared_storage=on_shared_storage,
preserve_ephemeral=preserve_ephemeral,
migration=migration,
host=host, node=node, limits=limits,
request_spec=request_spec)
self.compute_rpcapi.rebuild_instance(
context,
instance=instance,
new_pass=new_pass,
injected_files=injected_files,
image_ref=image_ref,
orig_image_ref=orig_image_ref,
orig_sys_metadata=orig_sys_metadata,
bdms=bdms,
recreate=evacuate,
on_shared_storage=on_shared_storage,
preserve_ephemeral=preserve_ephemeral,
migration=migration,
host=host,
node=node,
limits=limits,
request_spec=request_spec,
accel_uuids=accel_uuids)
def _rebuild_cyborg_arq(
self, context, instance, host, request_spec, evacuate):
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
if not dp_name:
return []
cyclient = cyborg.get_client(context)
if not evacuate:
return cyclient.get_arq_uuids_for_instance(instance)
cyclient.delete_arqs_for_instance(instance.uuid)
resource_provider_mapping = request_spec.get_request_group_mapping()
return self._create_and_bind_arqs(
context, instance.uuid, instance.flavor.extra_specs,
host, resource_provider_mapping)
def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
"""Validates that the traits specified in the image can be satisfied

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 52
SERVICE_VERSION = 53
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -187,6 +187,9 @@ SERVICE_VERSION_HISTORY = (
{'compute_rpc': '5.11'},
# Version 52: Add support for the 'mixed' CPU allocation policy
{'compute_rpc': '5.11'},
# Version 53: Compute RPC v5.12:
# Add accel_uuids (accelerator requests) param to rebuild_instance
{'compute_rpc': '5.12'},
)

View File

@ -477,6 +477,13 @@ class InstanceHelperMixin:
self.api.post_server_action(server['id'], {'unshelve': {}})
return self._wait_for_state_change(server, expected_state)
def _evacuate_server(self, server, host, expected_state='ACTIVE'):
"""Evacuate a server."""
self.api.post_server_action(server['id'], {'evacuate': {}})
self._wait_for_server_parameter(
self.server, {'OS-EXT-SRV-ATTR:host': host,
'status': expected_state})
class PlacementHelperMixin:
"""A helper mixin for interacting with placement."""

View File

@ -8086,6 +8086,20 @@ class AcceleratorServerOpsTest(AcceleratorServerBase):
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
networks='none', expected_state='ACTIVE')
def _test_evacuate(self, server, num_hosts):
server_hostname = server['OS-EXT-SRV-ATTR:host']
for i in range(num_hosts):
if self.compute_services[i].host == server_hostname:
compute_to_stop = self.compute_services[i]
else:
compute_to_evacuate = self.compute_services[i]
# Stop and force down the compute service.
compute_id = self.admin_api.get_services(
host=server_hostname, binary='nova-compute')[0]['id']
compute_to_stop.stop()
self.admin_api.put_service(compute_id, {'forced_down': 'true'})
return compute_to_stop, compute_to_evacuate
def test_soft_reboot_ok(self):
self._reboot_server(self.server)
self._check_allocations_usage(self.server)
@ -8160,6 +8174,30 @@ class AcceleratorServerOpsTest(AcceleratorServerBase):
self.api.post_server_action(self.server['id'], {'unrescue': {}})
self._check_allocations_usage(self.server)
def test_evacuate_ok(self):
server_hostname = self.server['OS-EXT-SRV-ATTR:host']
arqs = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
compute_to_stop, compute_to_evacuate = self._test_evacuate(
self.server, self.NUM_HOSTS)
self._evacuate_server(self.server, compute_to_evacuate.host)
compute_to_stop.start()
self.server = self.api.get_server(self.server['id'])
arqs_new = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
evac_hostname = self.server['OS-EXT-SRV-ATTR:host']
self.assertNotEqual(server_hostname, evac_hostname)
self.assertEqual(server_hostname, arqs[0]['hostname'])
self.assertEqual(evac_hostname, arqs_new[0]['hostname'])
def test_rebuild_ok(self):
rebuild_image_ref = fake_image.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID
self.api.post_server_action(self.server['id'],
{'rebuild': {
'imageRef': rebuild_image_ref,
'OS-DCF:diskConfig': 'AUTO'}})
fake_notifier.wait_for_versioned_notifications('instance.rebuild.end')
self._wait_for_state_change(self.server, 'ACTIVE')
self._check_allocations_usage(self.server)
def test_resize_fails(self):
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
@ -8186,30 +8224,29 @@ class AcceleratorServerOpsTest(AcceleratorServerBase):
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def test_evacuate_fails(self):
server_hostname = self.server['OS-EXT-SRV-ATTR:host']
for i in range(self.NUM_HOSTS):
hostname = 'accel_host' + str(i)
if hostname != server_hostname:
other_hostname = hostname
if self.compute_services[i].host == server_hostname:
compute_to_stop = self.compute_services[i]
# Stop and force down the compute service.
compute_id = self.admin_api.get_services(
host=server_hostname, binary='nova-compute')[0]['id']
compute_to_stop.stop()
self.admin_api.put_service(compute_id, {'forced_down': 'true'})
@mock.patch.object(objects.service, 'get_minimum_version_all_cells')
def test_evacuate_old_compute(self, old_compute_version):
"""Tests when the source compute service is too old to call
evacuate so OpenStackApiException is raised.
"""
old_compute_version.return_value = 52
_, compute_to_evacuate = self._test_evacuate(
self.server, self.NUM_HOSTS)
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
{'evacuate': {
'host': other_hostname,
'host': compute_to_evacuate.host,
'adminPass': 'MySecretPass'}})
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def test_rebuild_fails(self):
@mock.patch.object(objects.service, 'get_minimum_version_all_cells')
def test_rebuild_old_compute(self, old_compute_version):
"""Tests when the source compute service is too old to call
rebuild so OpenStackApiException is raised.
"""
old_compute_version.return_value = 52
rebuild_image_ref = fake_image.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],

View File

@ -23,6 +23,7 @@ from oslo_serialization import jsonutils
from nova.accelerator import cyborg
from nova import context
from nova import exception
from nova import objects
from nova.objects import request_spec
from nova import test
from nova.tests.unit import fake_requests
@ -394,3 +395,25 @@ class CyborgTestCase(test.NoDBTestCase):
self.client.ARQ_URL, params={'arqs': arq_uuid_str})
mock_log.assert_called_once_with('Failed to delete ARQs %s',
arq_uuid_str)
@mock.patch('keystoneauth1.adapter.Adapter.get')
def test_get_arq_uuids_for_instance(self, mock_cyborg_get):
# Happy path, without only_resolved=True
_, bound_arqs = self._get_bound_arqs()
instance_uuid = bound_arqs[0]['instance_uuid']
flavor = objects.Flavor(extra_specs={'accel:device_profile': 'dp1'})
instance = objects.Instance(flavor=flavor,
uuid=instance_uuid)
query = {"instance": instance_uuid}
content = jsonutils.dumps({'arqs': bound_arqs})
resp = fake_requests.FakeResponse(200, content)
mock_cyborg_get.return_value = resp
ret_arqs = self.client.get_arq_uuids_for_instance(instance)
mock_cyborg_get.assert_called_once_with(
self.client.ARQ_URL, params=query)
bound_arqs = [bound_arq['uuid'] for bound_arq in bound_arqs]
bound_arqs.sort()
ret_arqs.sort()
self.assertEqual(bound_arqs, ret_arqs)

View File

@ -2122,7 +2122,7 @@ class _ComputeAPIUnitTestMixIn(object):
fake_inst = self._create_instance_obj()
fake_inst.flavor = cur_flavor
new_flavor = objects.Flavor(id=2, name='bar', vcpus=1, memory_mb=2048,
root_gb=10, disabled=False)
root_gb=10, disabled=False, extra_specs={})
mock_get.return_value = new_flavor
mock_check.side_effect = exception.OverQuota(
overs=['ram'], quotas={'cores': 1, 'ram': 2048},
@ -7569,8 +7569,9 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
mock_get_min_ver.assert_called_once_with(
self.context, ['nova-compute'])
def _test_block_accelerators(self, instance, args_info):
@compute_api.block_accelerators
def _test_block_accelerators(self, instance, args_info,
until_service=None):
@compute_api.block_accelerators(until_service=until_service)
def myfunc(self, context, instance, *args, **kwargs):
args_info['args'] = (context, instance, *args)
args_info['kwargs'] = dict(**kwargs)
@ -7604,6 +7605,37 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
# myfunc was not called
self.assertEqual({}, args_info)
@mock.patch('nova.objects.service.get_minimum_version_all_cells',
return_value=54)
def test_block_accelerators_until_service(self, mock_get_min):
"""Support operating server with acclerators until compute service
more than the version of 53.
"""
extra_specs = {'accel:device_profile': 'mydp'}
flavor = self._create_flavor(extra_specs=extra_specs)
instance = self._create_instance_obj(flavor=flavor)
args_info = {}
expected_args, kwargs = self._test_block_accelerators(
instance, args_info, until_service=53)
self.assertEqual(expected_args, args_info['args'])
self.assertEqual(kwargs, args_info['kwargs'])
@mock.patch('nova.objects.service.get_minimum_version_all_cells',
return_value=52)
def test_block_accelerators_until_service_forbidden(self, mock_get_min):
"""Ensure a 'ForbiddenWithAccelerators' exception raises if any
compute service less than the version of 53.
"""
extra_specs = {'accel:device_profile': 'mydp'}
flavor = self._create_flavor(extra_specs=extra_specs)
instance = self._create_instance_obj(flavor=flavor)
args_info = {}
self.assertRaisesRegex(exception.ForbiddenWithAccelerators,
'Forbidden with instances that have accelerators.',
self._test_block_accelerators, instance, args_info, 53)
# myfunc was not called
self.assertEqual({}, args_info)
# TODO(huaqiang): Remove in Wallaby
@mock.patch('nova.objects.service.get_minimum_version_all_cells')
def test__check_compute_service_for_mixed_instance(self, mock_ver):

View File

@ -2743,16 +2743,12 @@ class ComputeTestCase(BaseTestCase,
block_device_mapping=[])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=[],
new_pass="new_password",
orig_sys_metadata=sys_metadata,
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False,
migration=None, scheduled_node=None,
limits={}, request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref, injected_files=[],
new_pass="new_password", orig_sys_metadata=sys_metadata,
bdms=[], recreate=False, on_shared_storage=False,
preserve_ephemeral=False, migration=None, scheduled_node=None,
limits={}, request_spec=None, accel_uuids=[])
self.compute.terminate_instance(self.context, instance, [])
def test_rebuild_driver(self):
@ -2777,16 +2773,12 @@ class ComputeTestCase(BaseTestCase,
block_device_mapping=[])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=[],
new_pass="new_password",
orig_sys_metadata=sys_metadata,
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={},
request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref, injected_files=[],
new_pass="new_password", orig_sys_metadata=sys_metadata,
bdms=[], recreate=False, on_shared_storage=False,
preserve_ephemeral=False, migration=None, scheduled_node=None,
limits={}, request_spec=None, accel_uuids=[])
self.assertTrue(called['rebuild'])
self.compute.terminate_instance(self.context, instance, [])
@ -2833,16 +2825,12 @@ class ComputeTestCase(BaseTestCase,
block_device_mapping=[])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=[],
new_pass="new_password",
orig_sys_metadata=sys_metadata,
bdms=bdms, recreate=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={},
on_shared_storage=False,
request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref, injected_files=[],
new_pass="new_password", orig_sys_metadata=sys_metadata,
bdms=bdms, recreate=False, preserve_ephemeral=False,
migration=None, scheduled_node=None, limits={},
on_shared_storage=False, request_spec=None, accel_uuids=[])
self.assertTrue(called['rebuild'])
self.compute.terminate_instance(self.context, instance, [])
@ -2856,14 +2844,12 @@ class ComputeTestCase(BaseTestCase,
block_device_mapping=[])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
'', '', injected_files=[],
new_pass="new_password",
orig_sys_metadata=sys_metadata, bdms=[],
recreate=False, on_shared_storage=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits=None,
request_spec=None)
self.compute.rebuild_instance(
self.context, instance, '', '', injected_files=[],
new_pass="new_password", orig_sys_metadata=sys_metadata, bdms=[],
recreate=False, on_shared_storage=False, preserve_ephemeral=False,
migration=None, scheduled_node=None, limits=None,
request_spec=None, accel_uuids=[])
self.compute.terminate_instance(self.context, instance, [])
def test_rebuild_launched_at_time(self):
@ -2879,16 +2865,12 @@ class ComputeTestCase(BaseTestCase,
time_fixture.advance_time_delta(cur_time - old_time)
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=[],
new_pass="new_password",
orig_sys_metadata={},
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={},
request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref, injected_files=[],
new_pass="new_password", orig_sys_metadata={}, bdms=[],
recreate=False, on_shared_storage=False, preserve_ephemeral=False,
migration=None, scheduled_node=None, limits={}, request_spec=None,
accel_uuids=[])
instance.refresh()
self.assertEqual(cur_time,
instance['launched_at'].replace(tzinfo=None))
@ -2916,16 +2898,12 @@ class ComputeTestCase(BaseTestCase,
instance['uuid'])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=injected_files,
new_pass="new_password",
orig_sys_metadata=sys_metadata,
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False,
migration=None, scheduled_node=None,
limits={}, request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref,
injected_files=injected_files, new_pass="new_password",
orig_sys_metadata=sys_metadata, bdms=[], recreate=False,
on_shared_storage=False, preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={}, request_spec=None, accel_uuids=[])
self.compute.terminate_instance(self.context, instance, [])
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
@ -4654,7 +4632,8 @@ class ComputeTestCase(BaseTestCase,
'scheduled_node': None,
'limits': {},
'request_spec': None,
'on_shared_storage': False}),
'on_shared_storage': False,
'accel_uuids': ()}),
("set_admin_password", task_states.UPDATING_PASSWORD,
{'new_pass': None}),
("rescue_instance", task_states.RESCUING,
@ -5166,17 +5145,12 @@ class ComputeTestCase(BaseTestCase,
inst_ref.task_state = task_states.REBUILDING
inst_ref.save()
self.compute.rebuild_instance(self.context,
inst_ref,
image_ref, new_image_ref,
injected_files=[],
new_pass=password,
orig_sys_metadata=orig_sys_metadata,
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={},
request_spec=None)
self.compute.rebuild_instance(
self.context, inst_ref, image_ref, new_image_ref,
injected_files=[], new_pass=password,
orig_sys_metadata=orig_sys_metadata, bdms=[], recreate=False,
on_shared_storage=False, preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={}, request_spec=None, accel_uuids=[])
inst_ref.refresh()
@ -12818,7 +12792,7 @@ class EvacuateHostTestCase(BaseTestCase):
image_ref, injected_files, 'newpass', {}, bdms, recreate=True,
on_shared_storage=on_shared_storage, migration=migration,
preserve_ephemeral=False, scheduled_node=node, limits=limits,
request_spec=None)
request_spec=None, accel_uuids=[])
if vm_states_is_stopped:
mock_notify_rebuild.assert_has_calls([
mock.call(ctxt, self.inst, self.inst.host, phase='start',
@ -13007,7 +12981,7 @@ class EvacuateHostTestCase(BaseTestCase):
test.MatchType(objects.ImageMeta),
mock.ANY, 'newpass', mock.ANY,
network_info=mock.ANY,
block_device_info=mock.ANY)
block_device_info=mock.ANY, accel_info=mock.ANY)
@mock.patch.object(fake.FakeDriver, 'spawn')
def test_rebuild_on_host_without_shared_storage(self, mock_spawn):
@ -13025,7 +12999,7 @@ class EvacuateHostTestCase(BaseTestCase):
test.MatchType(objects.ImageMeta),
mock.ANY, 'newpass', mock.ANY,
network_info=mock.ANY,
block_device_info=mock.ANY)
block_device_info=mock.ANY, accel_info=mock.ANY)
def test_rebuild_on_host_instance_exists(self):
"""Rebuild if instance exists raises an exception."""
@ -13069,7 +13043,7 @@ class EvacuateHostTestCase(BaseTestCase):
test.MatchObjPrims(self.inst.image_meta),
mock.ANY, 'newpass', mock.ANY,
network_info=mock.ANY,
block_device_info=mock.ANY)
block_device_info=mock.ANY, accel_info=mock.ANY)
@mock.patch.object(fake.FakeDriver, 'spawn')
def test_on_shared_storage_not_provided_host_with_shared_storage(self,
@ -13085,7 +13059,7 @@ class EvacuateHostTestCase(BaseTestCase):
test.MatchType(objects.ImageMeta),
mock.ANY, 'newpass', mock.ANY,
network_info=mock.ANY,
block_device_info=mock.ANY)
block_device_info=mock.ANY, accel_info=mock.ANY)
def test_rebuild_migration_passed_in(self):
migration = mock.Mock(spec=objects.Migration)
@ -13128,7 +13102,7 @@ class EvacuateHostTestCase(BaseTestCase):
'nova.scheduler.client.report.SchedulerReportClient.'
'remove_provider_tree_from_instance_allocation')
with patch_spawn, patch_on_disk, patch_claim, patch_remove_allocs:
self.assertRaises(exception.BuildAbortException,
self.assertRaises(messaging.ExpectedException,
self._rebuild, migration=migration,
send_node=True)
self.assertEqual("failed", migration.status)
@ -13162,7 +13136,7 @@ class EvacuateHostTestCase(BaseTestCase):
# NOTE(ndipanov): Make sure that we pass the topology from the context
def fake_spawn(context, instance, image_meta, injected_files,
admin_password, allocations, network_info=None,
block_device_info=None):
block_device_info=None, accel_info=None):
self.assertIsNone(instance.numa_topology)
self.inst.numa_topology = numa_topology

View File

@ -5138,10 +5138,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_rebuild.side_effect = exc
self.compute.rebuild_instance(self.context, instance, None, None, None,
None, None, None, recreate,
False, False, None, scheduled_node, {},
None)
self.compute.rebuild_instance(
self.context, instance, None, None, None, None, None, None,
recreate, False, False, None, scheduled_node, {}, None, [])
mock_set.assert_called_once_with(None, 'failed')
mock_notify_about_instance_usage.assert_called_once_with(
mock.ANY, instance, 'rebuild.error', fault=mock_rebuild.side_effect
@ -5205,7 +5204,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
instance = fake_instance.fake_instance_obj(self.context)
instance.vm_state = vm_states.ACTIVE
ex = exception.ComputeResourcesUnavailable(reason='out of foo')
self.assertRaises(exception.BuildAbortException,
self.assertRaises(messaging.ExpectedException,
self._test_rebuild_ex, instance, ex)
# Make sure the instance vm_state did not change.
self.assertEqual(vm_states.ACTIVE, instance.vm_state)
@ -5252,7 +5251,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
None, recreate=True, on_shared_storage=None,
preserve_ephemeral=False, migration=None,
scheduled_node='fake-node',
limits={}, request_spec=request_spec)
limits={}, request_spec=request_spec, accel_uuids=[])
mock_validate_policy.assert_called_once_with(
elevated_context, instance, {'group': [uuids.group]})
@ -5287,11 +5286,11 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_validate_policy.side_effect = exc
self.assertRaises(
exception.BuildAbortException, self.compute.rebuild_instance,
messaging.ExpectedException, self.compute.rebuild_instance,
self.context, instance, None, None, None, None, None, None,
recreate=True, on_shared_storage=None, preserve_ephemeral=False,
migration=None, scheduled_node='fake-node', limits={},
request_spec=request_spec)
request_spec=request_spec, accel_uuids=[])
mock_validate_policy.assert_called_once_with(
elevated_context, instance, {'group': [uuids.group]})
@ -5313,9 +5312,10 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock.patch.object(objects.Instance, 'save'),
mock.patch.object(self.compute, '_set_migration_status'),
) as (mock_get, mock_rebuild, mock_save, mock_set):
self.compute.rebuild_instance(self.context, instance, None, None,
None, None, None, None, False,
False, False, None, None, {}, None)
self.compute.rebuild_instance(
self.context, instance, None, None,
None, None, None, None, False,
False, False, None, None, {}, None, [])
self.assertFalse(mock_get.called)
self.assertEqual(node, instance.node)
mock_set.assert_called_once_with(None, 'done')
@ -5335,9 +5335,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock.patch.object(self.compute, '_set_migration_status'),
) as (mock_get, mock_rebuild, mock_save, mock_set):
mock_get.return_value.hypervisor_hostname = 'new-node'
self.compute.rebuild_instance(self.context, instance, None, None,
None, None, None, None, True,
False, False, None, None, {}, None)
self.compute.rebuild_instance(
self.context, instance, None, None, None, None, None,
None, True, False, False, None, None, {}, None, [])
mock_get.assert_called_once_with(mock.ANY, self.compute.host)
self.assertEqual('new-node', instance.node)
mock_set.assert_called_once_with(None, 'done')
@ -5419,7 +5419,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
recreate, on_shared_storage,
preserve_ephemeral, {}, {},
self.allocations,
mock.sentinel.mapping)
mock.sentinel.mapping, [])
mock_notify_usage.assert_has_calls(
[mock.call(self.context, instance, "rebuild.start",
@ -5450,7 +5450,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
def _spawn(context, instance, image_meta, injected_files,
admin_password, allocations, network_info=None,
block_device_info=None):
block_device_info=None, accel_info=None):
self.assertEqual(block_device_info['block_device_mapping'],
'shared_block_storage')
@ -5462,12 +5462,15 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock.patch.object(objects.Instance, 'save',
return_value=None),
mock.patch.object(self.compute, '_power_off_instance',
return_value=None)
return_value=None),
mock.patch.object(self.compute, '_get_accel_info',
return_value=[])
) as(
mock_destroy,
mock_spawn,
mock_save,
mock_power_off
mock_power_off,
mock_accel_info
):
instance = fake_instance.fake_instance_obj(self.context)
instance.migration_context = None
@ -5518,7 +5521,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
preserve_ephemeral=False, migration=objects.Migration(),
request_spec=objects.RequestSpec(),
allocations=self.allocations,
request_group_resource_providers_mapping=mock.sentinel.mapping)
request_group_resource_providers_mapping=mock.sentinel.mapping,
accel_uuids=[])
self.assertIn('Trusted image certificates provided on host',
six.text_type(ex))

View File

@ -693,12 +693,55 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
reboot_type='type')
def test_rebuild_instance(self):
self._test_compute_api('rebuild_instance', 'cast', new_pass='None',
injected_files='None', image_ref='None', orig_image_ref='None',
bdms=[], instance=self.fake_instance_obj, host='new_host',
orig_sys_metadata=None, recreate=True, on_shared_storage=True,
preserve_ephemeral=True, migration=None, node=None,
limits=None, request_spec=None, version='5.0')
# With rpcapi 5.12, when a list of accel_uuids is passed as a param,
# that list must be passed to the client. That is tested in
# _test_compute_api with rpc_mock.assert, where expected_kwargs
# must have the accel_uuids.
self._test_compute_api(
'rebuild_instance', 'cast', new_pass='None',
injected_files='None', image_ref='None', orig_image_ref='None',
bdms=[], instance=self.fake_instance_obj, host='new_host',
orig_sys_metadata=None, recreate=True, on_shared_storage=True,
preserve_ephemeral=True, migration=None, node=None,
limits=None, request_spec=None, accel_uuids=[], version='5.12')
def test_rebuild_instance_old_rpcapi(self):
# With rpcapi < 5.12, accel_uuids must be dropped in the client call.
ctxt = context.RequestContext('fake_user', 'fake_project')
compute_api = compute_rpcapi.ComputeAPI()
compute_api.router.client = mock.Mock()
mock_client = mock.MagicMock()
compute_api.router.client.return_value = mock_client
# Force can_send_version to False, so that 5.0 version is used.
mock_client.can_send_version.return_value = False
mock_cctx = mock.MagicMock()
mock_client.prepare.return_value = mock_cctx
rebuild_args = {
'new_pass': 'admin_password',
'injected_files': 'files_to_inject',
'image_ref': uuids.image_ref,
'orig_image_ref': uuids.orig_image_ref,
'orig_sys_metadata': 'orig_sys_meta',
'bdms': {},
'recreate': False,
'on_shared_storage': False,
'preserve_ephemeral': False,
'request_spec': None,
'migration': None,
'limits': None
}
compute_api.rebuild_instance(
ctxt, instance=self.fake_instance_obj,
accel_uuids=['938af7f9-f136-4e5a-bdbe-3b6feab54311'],
node=None, host=None, **rebuild_args)
mock_client.can_send_version.assert_called_once_with('5.12')
mock_client.prepare.assert_called_with(
server=self.fake_instance_obj.host, version='5.0')
mock_cctx.cast.assert_called_with( # No accel_uuids
ctxt, 'rebuild_instance',
instance=self.fake_instance_obj,
scheduled_node=None, **rebuild_args)
def test_reserve_block_device_name(self):
self.flags(long_rpc_timeout=1234)

View File

@ -390,6 +390,7 @@ class _BaseTaskTestCase(object):
compute_rebuild_args['migration'] = migration
compute_rebuild_args['node'] = node
compute_rebuild_args['limits'] = limits
compute_rebuild_args['accel_uuids'] = []
return rebuild_args, compute_rebuild_args
@ -1780,6 +1781,7 @@ class _BaseTaskTestCase(object):
rebuild_args, _ = self._prepare_rebuild_args(
{'host': None, 'recreate': True})
fake_spec = objects.RequestSpec()
fake_spec.flavor = inst_obj.flavor
rebuild_args['request_spec'] = fake_spec
with test.nested(
@ -1847,6 +1849,7 @@ class _BaseTaskTestCase(object):
fake_selection = objects.Selection(service_host=expected_host,
nodename=expected_node, limits=None)
fake_spec = objects.RequestSpec(ignore_hosts=[uuids.ignored_host])
fake_spec.flavor = inst_obj.flavor
rebuild_args, compute_args = self._prepare_rebuild_args(
{'host': None, 'node': expected_node, 'limits': expected_limits,
'request_spec': fake_spec, 'recreate': True})

View File

@ -286,7 +286,7 @@ class ComputeDriver(object):
admin_password, allocations, bdms, detach_block_devices,
attach_block_devices, network_info=None,
evacuate=False, block_device_info=None,
preserve_ephemeral=False):
preserve_ephemeral=False, accel_uuids=None):
"""Destroy and re-make this instance.
A 'rebuild' effectively purges all existing data from the system and
@ -323,6 +323,7 @@ class ComputeDriver(object):
attached to the instance.
:param preserve_ephemeral: True if the default ephemeral storage
partition must be preserved on rebuild
:param accel_uuids: Accelerator UUIDs.
"""
raise NotImplementedError()

View File

@ -1670,7 +1670,7 @@ class IronicDriver(virt_driver.ComputeDriver):
admin_password, allocations, bdms, detach_block_devices,
attach_block_devices, network_info=None,
evacuate=False, block_device_info=None,
preserve_ephemeral=False):
preserve_ephemeral=False, accel_uuids=None):
"""Rebuild/redeploy an instance.
This version of rebuild() allows for supporting the option to
@ -1710,7 +1710,7 @@ class IronicDriver(virt_driver.ComputeDriver):
information. Ignored by this driver.
:param preserve_ephemeral: Boolean value; if True the ephemeral
must be preserved on rebuild.
:param accel_uuids: Accelerator UUIDs. Ignored by this driver.
"""
LOG.debug('Rebuild called for instance', instance=instance)