Create and bind Cyborg ARQs.
* Call Cyborg with device profile name to get ARQs (Accelerator Requests). Each ARQ corresponds to a single device profile group, which corrresponds to a single request group in request spec. * Match each ARQ to associated request group, and thereby obtain the corresponding RP for that ARQ. * Call Cyborg to bind the ARQ to that host/device-RP. * When Cyborg sends the ARQ bind notification events, wait for those events with a timeout. Change-Id: I0f8b6bf2b4f4510da6c84fede532533602b6af7f Blueprint: nova-cyborg-interaction
This commit is contained in:
parent
0c52730f6a
commit
cc630b4eb6
|
@ -80,6 +80,7 @@ def get_device_profile_request_groups(context, dp_name):
|
|||
|
||||
class _CyborgClient(object):
|
||||
DEVICE_PROFILE_URL = "/device_profiles"
|
||||
ARQ_URL = "/accelerator_requests"
|
||||
|
||||
def __init__(self, context):
|
||||
auth = service_auth.get_auth_plugin(context)
|
||||
|
@ -145,3 +146,120 @@ class _CyborgClient(object):
|
|||
rg.add_trait(trait_name=name, trait_type=val)
|
||||
request_groups.append(rg)
|
||||
return request_groups
|
||||
|
||||
def _create_arqs(self, dp_name):
|
||||
data = {"device_profile_name": dp_name}
|
||||
resp, err_msg = self._call_cyborg(self._client.post,
|
||||
self.ARQ_URL, json=data)
|
||||
|
||||
if err_msg:
|
||||
raise exception.AcceleratorRequestOpFailed(
|
||||
op=_('create'), msg=err_msg)
|
||||
|
||||
return resp.json().get('arqs')
|
||||
|
||||
def create_arqs_and_match_resource_providers(self, dp_name, rg_rp_map):
|
||||
"""Create ARQs, match them with request groups and thereby
|
||||
determine their corresponding RPs.
|
||||
|
||||
:param dp_name: Device profile name
|
||||
:param rg_rp_map: Request group - Resource Provider map
|
||||
{requester_id: [resource_provider_uuid]}
|
||||
:returns:
|
||||
[arq], with each ARQ associated with an RP
|
||||
:raises: DeviceProfileError, AcceleratorRequestOpFailed
|
||||
"""
|
||||
LOG.info('Creating ARQs for device profile %s', dp_name)
|
||||
arqs = self._create_arqs(dp_name)
|
||||
if not arqs or len(arqs) == 0:
|
||||
msg = _('device profile name %s') % dp_name
|
||||
raise exception.AcceleratorRequestOpFailed(op=_('create'), msg=msg)
|
||||
for arq in arqs:
|
||||
dp_group_id = arq['device_profile_group_id']
|
||||
arq['device_rp_uuid'] = None
|
||||
requester_id = (
|
||||
get_device_profile_group_requester_id(dp_group_id))
|
||||
arq['device_rp_uuid'] = rg_rp_map[requester_id][0]
|
||||
return arqs
|
||||
|
||||
def bind_arqs(self, bindings):
|
||||
"""Initiate Cyborg bindings.
|
||||
|
||||
Handles RFC 6902-compliant JSON patching, sparing
|
||||
calling Nova code from those details.
|
||||
|
||||
:param bindings:
|
||||
{ "$arq_uuid": {
|
||||
"hostname": STRING
|
||||
"device_rp_uuid": UUID
|
||||
"instance_uuid": UUID
|
||||
},
|
||||
...
|
||||
}
|
||||
:returns: nothing
|
||||
:raises: AcceleratorRequestOpFailed
|
||||
"""
|
||||
LOG.info('Binding ARQs.')
|
||||
# Create a JSON patch in RFC 6902 format
|
||||
patch_list = {}
|
||||
for arq_uuid, binding in bindings.items():
|
||||
patch = [{"path": "/" + field,
|
||||
"op": "add",
|
||||
"value": value
|
||||
} for field, value in binding.items()]
|
||||
patch_list[arq_uuid] = patch
|
||||
|
||||
resp, err_msg = self._call_cyborg(self._client.patch,
|
||||
self.ARQ_URL, json=patch_list)
|
||||
if err_msg:
|
||||
msg = _(' Binding failed for ARQ UUIDs: ')
|
||||
err_msg = err_msg + msg + ','.join(bindings.keys())
|
||||
raise exception.AcceleratorRequestOpFailed(
|
||||
op=_('bind'), msg=err_msg)
|
||||
|
||||
def get_arqs_for_instance(self, instance_uuid, only_resolved=False):
|
||||
"""Get ARQs for the instance.
|
||||
|
||||
:param instance_uuid: Instance UUID
|
||||
:param only_resolved: flag to return only resolved ARQs
|
||||
:returns: List of ARQs for the instance:
|
||||
if only_resolved: only those ARQs which have completed binding
|
||||
else: all ARQs
|
||||
The format of the returned data structure is as below:
|
||||
[
|
||||
{'uuid': $arq_uuid,
|
||||
'device_profile_name': $dp_name,
|
||||
'device_profile_group_id': $dp_request_group_index,
|
||||
'state': 'Bound',
|
||||
'device_rp_uuid': $resource_provider_uuid,
|
||||
'hostname': $host_nodename,
|
||||
'instance_uuid': $instance_uuid,
|
||||
'attach_handle_info': { # PCI bdf
|
||||
'bus': '0c', 'device': '0',
|
||||
'domain': '0000', 'function': '0'},
|
||||
'attach_handle_type': 'PCI'
|
||||
# or 'TEST_PCI' for Cyborg fake driver
|
||||
}
|
||||
]
|
||||
:raises: AcceleratorRequestOpFailed
|
||||
"""
|
||||
query = {"instance": instance_uuid}
|
||||
resp, err_msg = self._call_cyborg(self._client.get,
|
||||
self.ARQ_URL, params=query)
|
||||
|
||||
if err_msg:
|
||||
err_msg = err_msg + _(' Instance %s') % instance_uuid
|
||||
raise exception.AcceleratorRequestOpFailed(
|
||||
op=_('get'), msg=err_msg)
|
||||
|
||||
arqs = resp.json().get('arqs')
|
||||
if not arqs:
|
||||
err_msg = _('Cyborg returned no accelerator requests for '
|
||||
'instance %s') % instance_uuid
|
||||
raise exception.AcceleratorRequestOpFailed(
|
||||
op=_('get'), msg=err_msg)
|
||||
|
||||
if only_resolved:
|
||||
arqs = [arq for arq in arqs if
|
||||
arq['state'] in ['Bound', 'BindFailed', 'Deleting']]
|
||||
return arqs
|
||||
|
|
|
@ -56,6 +56,7 @@ from oslo_utils import units
|
|||
import six
|
||||
from six.moves import range
|
||||
|
||||
from nova.accelerator import cyborg
|
||||
from nova import block_device
|
||||
from nova.compute import api as compute
|
||||
from nova.compute import build_results
|
||||
|
@ -2512,6 +2513,14 @@ class ComputeManager(manager.Manager):
|
|||
self.host, phase=fields.NotificationPhase.END,
|
||||
bdms=block_device_mapping)
|
||||
|
||||
def _build_resources_cleanup(self, instance, network_info):
|
||||
# Make sure the async call finishes
|
||||
if network_info is not None:
|
||||
network_info.wait(do_raise=False)
|
||||
self.driver.clean_networks_preparation(instance,
|
||||
network_info)
|
||||
self.driver.failed_spawn_cleanup(instance)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _build_resources(self, context, instance, requested_networks,
|
||||
security_groups, image_meta, block_device_mapping,
|
||||
|
@ -2566,33 +2575,34 @@ class ComputeManager(manager.Manager):
|
|||
except (exception.InstanceNotFound,
|
||||
exception.UnexpectedDeletingTaskStateError):
|
||||
with excutils.save_and_reraise_exception():
|
||||
# Make sure the async call finishes
|
||||
if network_info is not None:
|
||||
network_info.wait(do_raise=False)
|
||||
self.driver.clean_networks_preparation(instance,
|
||||
network_info)
|
||||
self.driver.failed_spawn_cleanup(instance)
|
||||
self._build_resources_cleanup(instance, network_info)
|
||||
except (exception.UnexpectedTaskStateError,
|
||||
exception.OverQuota, exception.InvalidBDM) as e:
|
||||
# Make sure the async call finishes
|
||||
if network_info is not None:
|
||||
network_info.wait(do_raise=False)
|
||||
self.driver.clean_networks_preparation(instance, network_info)
|
||||
self.driver.failed_spawn_cleanup(instance)
|
||||
self._build_resources_cleanup(instance, network_info)
|
||||
raise exception.BuildAbortException(instance_uuid=instance.uuid,
|
||||
reason=e.format_message())
|
||||
except Exception:
|
||||
LOG.exception('Failure prepping block device',
|
||||
instance=instance)
|
||||
# Make sure the async call finishes
|
||||
if network_info is not None:
|
||||
network_info.wait(do_raise=False)
|
||||
self.driver.clean_networks_preparation(instance, network_info)
|
||||
self.driver.failed_spawn_cleanup(instance)
|
||||
self._build_resources_cleanup(instance, network_info)
|
||||
msg = _('Failure prepping block device.')
|
||||
raise exception.BuildAbortException(instance_uuid=instance.uuid,
|
||||
reason=msg)
|
||||
|
||||
arqs = []
|
||||
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
|
||||
try:
|
||||
if dp_name:
|
||||
arqs = self._get_bound_arq_resources(
|
||||
context, dp_name, instance)
|
||||
except (Exception, eventlet.timeout.Timeout) as exc:
|
||||
LOG.exception(exc.format_message())
|
||||
self._build_resources_cleanup(instance, network_info)
|
||||
msg = _('Failure getting accelerator requests.')
|
||||
raise exception.BuildAbortException(instance_uuid=instance.uuid,
|
||||
reason=msg)
|
||||
resources['accel_info'] = arqs
|
||||
|
||||
try:
|
||||
yield resources
|
||||
except Exception as exc:
|
||||
|
@ -2623,6 +2633,48 @@ class ComputeManager(manager.Manager):
|
|||
instance_uuid=instance.uuid,
|
||||
reason=six.text_type(exc))
|
||||
|
||||
def _get_bound_arq_resources(self, context, dp_name, instance):
|
||||
"""Get bound accelerator requests.
|
||||
|
||||
The ARQ binding was kicked off in the conductor as an async
|
||||
operation. Here we wait for the notification from Cyborg.
|
||||
|
||||
If the notification arrived before this point, which can happen
|
||||
in many/most cases (see [1]), it will be lost. To handle that,
|
||||
we use exit_wait_early.
|
||||
[1] https://review.opendev.org/#/c/631244/46/nova/compute/
|
||||
manager.py@2627
|
||||
|
||||
:param dp_name: Device profile name. Caller ensures this is valid.
|
||||
:param instance: instance object
|
||||
:returns: List of ARQs for which bindings have completed,
|
||||
successfully or otherwise
|
||||
"""
|
||||
|
||||
cyclient = cyborg.get_client(context)
|
||||
arqs = cyclient.get_arqs_for_instance(instance.uuid)
|
||||
events = [('accelerator-request-bound', arq['uuid']) for arq in arqs]
|
||||
timeout = CONF.arq_binding_timeout
|
||||
with self.virtapi.wait_for_instance_event(
|
||||
instance, events, deadline=timeout):
|
||||
resolved_arqs = cyclient.get_arqs_for_instance(
|
||||
instance.uuid, only_resolved=True)
|
||||
# Events for these resolved ARQs may have already arrived.
|
||||
# Such 'early' events need to be ignored.
|
||||
early_events = [('accelerator-request-bound', arq['uuid'])
|
||||
for arq in resolved_arqs]
|
||||
if early_events:
|
||||
self.virtapi.exit_wait_early(early_events)
|
||||
|
||||
# Since a timeout in wait_for_instance_event will raise, we get
|
||||
# here only if all binding events have been received.
|
||||
if sorted(resolved_arqs) != sorted(arqs):
|
||||
# Query Cyborg to get all.
|
||||
arqs = cyclient.get_arqs_for_instance(instance.uuid)
|
||||
else:
|
||||
arqs = resolved_arqs # latter has the right arq.state
|
||||
return arqs
|
||||
|
||||
def _cleanup_allocated_networks(self, context, instance,
|
||||
requested_networks):
|
||||
"""Cleanup networks allocated for instance.
|
||||
|
|
|
@ -31,6 +31,7 @@ from oslo_utils import timeutils
|
|||
from oslo_utils import versionutils
|
||||
import six
|
||||
|
||||
from nova.accelerator import cyborg
|
||||
from nova import availability_zones
|
||||
from nova.compute import instance_actions
|
||||
from nova.compute import rpcapi as compute_rpcapi
|
||||
|
@ -835,6 +836,18 @@ class ComputeTaskManager(base.Base):
|
|||
LOG.debug("Selected host: %s; Selected node: %s; Alternates: %s",
|
||||
host.service_host, host.nodename, alts, instance=instance)
|
||||
|
||||
try:
|
||||
resource_provider_mapping = (
|
||||
local_reqspec.get_request_group_mapping())
|
||||
self._create_and_bind_arqs(
|
||||
context, instance.uuid, instance.flavor.extra_specs,
|
||||
host.nodename, resource_provider_mapping)
|
||||
except Exception as exc:
|
||||
LOG.exception('Failed to reschedule. Reason: %s', exc)
|
||||
self._cleanup_when_reschedule_fails(context, instance, exc,
|
||||
legacy_request_spec, requested_networks)
|
||||
continue
|
||||
|
||||
self.compute_rpcapi.build_and_run_instance(context,
|
||||
instance=instance, host=host.service_host, image=image,
|
||||
request_spec=local_reqspec,
|
||||
|
@ -1604,6 +1617,21 @@ class ComputeTaskManager(base.Base):
|
|||
# this one.
|
||||
continue
|
||||
|
||||
try:
|
||||
resource_provider_mapping = (
|
||||
request_spec.get_request_group_mapping())
|
||||
# Using nodename instead of hostname. See:
|
||||
# http://lists.openstack.org/pipermail/openstack-discuss/2019-November/011044.html # noqa
|
||||
self._create_and_bind_arqs(
|
||||
context, instance.uuid, instance.flavor.extra_specs,
|
||||
host.nodename, resource_provider_mapping)
|
||||
except Exception as exc:
|
||||
# If anything failed here we need to cleanup and bail out.
|
||||
with excutils.save_and_reraise_exception():
|
||||
self._cleanup_build_artifacts(
|
||||
context, exc, instances, build_requests, request_specs,
|
||||
block_device_mapping, tags, cell_mapping_cache)
|
||||
|
||||
# NOTE(danms): Compute RPC expects security group names or ids
|
||||
# not objects, so convert this to a list of names until we can
|
||||
# pass the objects.
|
||||
|
@ -1622,6 +1650,33 @@ class ComputeTaskManager(base.Base):
|
|||
host=host.service_host, node=host.nodename,
|
||||
limits=host.limits, host_list=host_list)
|
||||
|
||||
def _create_and_bind_arqs(self, context, instance_uuid, extra_specs,
|
||||
hostname, resource_provider_mapping):
|
||||
"""Create ARQs, determine their RPs and initiate ARQ binding.
|
||||
|
||||
The binding is asynchronous; Cyborg will notify on completion.
|
||||
The notification will be handled in the compute manager.
|
||||
"""
|
||||
dp_name = extra_specs.get('accel:device_profile')
|
||||
if not dp_name:
|
||||
return
|
||||
|
||||
LOG.debug('Calling Cyborg to get ARQs. dp_name=%s instance=%s',
|
||||
dp_name, instance_uuid)
|
||||
cyclient = cyborg.get_client(context)
|
||||
arqs = cyclient.create_arqs_and_match_resource_providers(
|
||||
dp_name, resource_provider_mapping)
|
||||
LOG.debug('Got ARQs with resource provider mapping %s', arqs)
|
||||
|
||||
bindings = {arq['uuid']:
|
||||
{"hostname": hostname,
|
||||
"device_rp_uuid": arq['device_rp_uuid'],
|
||||
"instance_uuid": instance_uuid
|
||||
}
|
||||
for arq in arqs}
|
||||
# Initiate Cyborg binding asynchronously
|
||||
cyclient.bind_arqs(bindings=bindings)
|
||||
|
||||
@staticmethod
|
||||
def _map_instance_to_cell(context, instance, cell):
|
||||
"""Update the instance mapping to point at the given cell.
|
||||
|
|
|
@ -176,6 +176,17 @@ Related options:
|
|||
* vif_plugging_is_fatal - If ``vif_plugging_timeout`` is set to zero and
|
||||
``vif_plugging_is_fatal`` is False, events should not be expected to
|
||||
arrive at all.
|
||||
"""),
|
||||
cfg.IntOpt('arq_binding_timeout',
|
||||
default=300,
|
||||
min=1,
|
||||
help="""
|
||||
Timeout for Accelerator Request (ARQ) bind event message arrival.
|
||||
|
||||
Number of seconds to wait for ARQ bind resolution event to arrive.
|
||||
The event indicates that every ARQ for an instance has either bound
|
||||
successfully or failed to bind. If it does not arrive, instance bringup
|
||||
is aborted with an exception.
|
||||
"""),
|
||||
cfg.StrOpt('injected_network_template',
|
||||
default=paths.basedir_def('nova/virt/interfaces.template'),
|
||||
|
|
|
@ -2293,3 +2293,7 @@ class UnexpectedResourceProviderNameForPCIRequest(NovaException):
|
|||
|
||||
class DeviceProfileError(NovaException):
|
||||
msg_fmt = _("Device profile name %(name)s: %(msg)s")
|
||||
|
||||
|
||||
class AcceleratorRequestOpFailed(NovaException):
|
||||
msg_fmt = _("Failed to %(op)s accelerator requests: %(msg)s")
|
||||
|
|
|
@ -2526,14 +2526,136 @@ def _get_device_profile(dp_name, trait):
|
|||
return dp
|
||||
|
||||
|
||||
def get_arqs(dp_name):
|
||||
arq = {
|
||||
'uuid': 'b59d34d3-787b-4fb0-a6b9-019cd81172f8',
|
||||
'device_profile_name': dp_name,
|
||||
'device_profile_group_id': 0,
|
||||
'state': 'Initial',
|
||||
'device_rp_uuid': None,
|
||||
'hostname': None,
|
||||
'instance_uuid': None,
|
||||
'attach_handle_info': {},
|
||||
'attach_handle_type': '',
|
||||
}
|
||||
bound_arq = copy.deepcopy(arq)
|
||||
bound_arq.update(
|
||||
{'state': 'Bound',
|
||||
'attach_handle_type': 'TEST_PCI',
|
||||
'attach_handle_info': {
|
||||
'bus': '0c',
|
||||
'device': '0',
|
||||
'domain': '0000',
|
||||
'function': '0'
|
||||
},
|
||||
})
|
||||
return [arq], [bound_arq]
|
||||
|
||||
|
||||
class CyborgFixture(fixtures.Fixture):
|
||||
"""Fixture that mocks Cyborg APIs used by nova/accelerator/cyborg.py"""
|
||||
|
||||
dp_name = 'fakedev-dp'
|
||||
trait = 'CUSTOM_FAKE_DEVICE'
|
||||
arq_list, bound_arq_list = get_arqs(dp_name)
|
||||
|
||||
# NOTE(Sundar): The bindings passed to the fake_bind_arqs() from the
|
||||
# conductor are indexed by ARQ UUID and include the host name, device
|
||||
# RP UUID and instance UUID. (See params to fake_bind_arqs below.)
|
||||
#
|
||||
# Later, when the compute manager calls fake_get_arqs_for_instance() with
|
||||
# the instance UUID, the returned ARQs must contain the host name and
|
||||
# device RP UUID. But these can vary from test to test.
|
||||
#
|
||||
# So, fake_bind_arqs() below takes bindings indexed by ARQ UUID and
|
||||
# converts them to bindings indexed by instance UUID, which are then
|
||||
# stored in the dict below. This dict looks like:
|
||||
# { $instance_uuid: [
|
||||
# {'hostname': $hostname,
|
||||
# 'device_rp_uuid': $device_rp_uuid,
|
||||
# 'arq_uuid': $arq_uuid
|
||||
# }
|
||||
# ]
|
||||
# }
|
||||
# Since it is indexed by instance UUID, and that is presumably unique
|
||||
# across concurrently executing tests, this should be safe for
|
||||
# concurrent access.
|
||||
bindings_by_instance = {}
|
||||
|
||||
@staticmethod
|
||||
def fake_bind_arqs(bindings):
|
||||
"""Simulate Cyborg ARQ bindings.
|
||||
|
||||
Since Nova calls Cyborg for binding on per-instance basis, the
|
||||
instance UUIDs would be the same for all ARQs in a single call.
|
||||
|
||||
This function converts bindings indexed by ARQ UUID to bindings
|
||||
indexed by instance UUID, so that fake_get_arqs_for_instance can
|
||||
retrieve them later.
|
||||
|
||||
:param bindings:
|
||||
{ "$arq_uuid": {
|
||||
"hostname": STRING
|
||||
"device_rp_uuid": UUID
|
||||
"instance_uuid": UUID
|
||||
},
|
||||
...
|
||||
}
|
||||
:returns: None
|
||||
"""
|
||||
binding_by_instance = collections.defaultdict(list)
|
||||
for index, arq_uuid in enumerate(bindings):
|
||||
arq_binding = bindings[arq_uuid]
|
||||
# instance_uuid is same for all ARQs in a single call.
|
||||
instance_uuid = arq_binding['instance_uuid']
|
||||
newbinding = {
|
||||
'hostname': arq_binding['hostname'],
|
||||
'device_rp_uuid': arq_binding['device_rp_uuid'],
|
||||
'arq_uuid': arq_uuid,
|
||||
}
|
||||
binding_by_instance[instance_uuid].append(newbinding)
|
||||
|
||||
CyborgFixture.bindings_by_instance.update(binding_by_instance)
|
||||
|
||||
@staticmethod
|
||||
def fake_get_arqs_for_instance(instance_uuid, only_resolved=False):
|
||||
"""Get list of bound ARQs for this instance.
|
||||
|
||||
This function uses bindings indexed by instance UUID to
|
||||
populate the bound ARQ templates in CyborgFixture.bound_arq_list.
|
||||
"""
|
||||
arq_host_rp_list = CyborgFixture.bindings_by_instance[instance_uuid]
|
||||
# The above looks like:
|
||||
# [{'hostname': $hostname,
|
||||
# 'device_rp_uuid': $device_rp_uuid,
|
||||
# 'arq_uuid': $arq_uuid
|
||||
# }]
|
||||
|
||||
bound_arq_list = copy.deepcopy(CyborgFixture.bound_arq_list)
|
||||
for arq in bound_arq_list:
|
||||
match = [(arq_host_rp['hostname'],
|
||||
arq_host_rp['device_rp_uuid'],
|
||||
instance_uuid)
|
||||
for arq_host_rp in arq_host_rp_list
|
||||
if arq_host_rp['arq_uuid'] == arq['uuid']
|
||||
]
|
||||
# Only 1 ARQ UUID would match, so len(match) == 1
|
||||
arq['hostname'], arq['device_rp_uuid'], arq['instance_uuid'] = (
|
||||
match[0][0], match[0][1], match[0][2])
|
||||
return bound_arq_list
|
||||
|
||||
def setUp(self):
|
||||
super(CyborgFixture, self).setUp()
|
||||
self.mock_get_dp = self.useFixture(fixtures.MockPatch(
|
||||
'nova.accelerator.cyborg._CyborgClient._get_device_profile_list',
|
||||
return_value=_get_device_profile(self.dp_name, self.trait))).mock
|
||||
self.mock_create_arqs = self.useFixture(fixtures.MockPatch(
|
||||
'nova.accelerator.cyborg._CyborgClient._create_arqs',
|
||||
return_value=self.arq_list)).mock
|
||||
self.mock_bind_arqs = self.useFixture(fixtures.MockPatch(
|
||||
'nova.accelerator.cyborg._CyborgClient.bind_arqs',
|
||||
side_effect=self.fake_bind_arqs)).mock
|
||||
self.mock_get_arqs = self.useFixture(fixtures.MockPatch(
|
||||
'nova.accelerator.cyborg._CyborgClient.'
|
||||
'get_arqs_for_instance',
|
||||
side_effect=self.fake_get_arqs_for_instance)).mock
|
||||
|
|
|
@ -7835,6 +7835,20 @@ class AcceleratorServerBase(integrated_helpers.ProviderUsageBaseTestCase):
|
|||
extra_spec=extra_specs)
|
||||
return flavor_id
|
||||
|
||||
def _check_allocations_usage(self, server_uuid):
|
||||
host_rp_uuid = self.compute_rp_uuids[0]
|
||||
device_rp_uuid = self.device_rp_map[host_rp_uuid]
|
||||
expected_host_alloc = {
|
||||
'resources': {'VCPU': 2, 'MEMORY_MB': 2048, 'DISK_GB': 20},
|
||||
}
|
||||
expected_device_alloc = {'resources': {'FPGA': 1}}
|
||||
|
||||
host_alloc = self._get_allocations_by_provider_uuid(host_rp_uuid)
|
||||
self.assertEqual(expected_host_alloc, host_alloc[server_uuid])
|
||||
|
||||
device_alloc = self._get_allocations_by_provider_uuid(device_rp_uuid)
|
||||
self.assertEqual(expected_device_alloc, device_alloc[server_uuid])
|
||||
|
||||
|
||||
class AcceleratorServerTest(AcceleratorServerBase):
|
||||
def setUp(self):
|
||||
|
@ -7844,10 +7858,110 @@ class AcceleratorServerTest(AcceleratorServerBase):
|
|||
def test_create_server(self):
|
||||
flavor_id = self._create_acc_flavor()
|
||||
server_name = 'accel_server1'
|
||||
self._create_server(
|
||||
server = self._create_server(
|
||||
server_name, flavor_id=flavor_id,
|
||||
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||
networks='none', expected_state='ACTIVE')
|
||||
|
||||
self.cyborg.mock_get_dp.assert_called_once_with(self.cyborg.dp_name)
|
||||
# TODO(Sundar): Add checks for Placement allocations.
|
||||
# Verify that the host name and the device rp UUID are set properly.
|
||||
# Other fields in the ARQ are hardcoded data from the fixture.
|
||||
arqs = self.cyborg.fake_get_arqs_for_instance(server['id'])
|
||||
self.assertEqual(self.device_rp_uuids[0], arqs[0]['device_rp_uuid'])
|
||||
self.assertEqual(server['OS-EXT-SRV-ATTR:host'], arqs[0]['hostname'])
|
||||
|
||||
# Check allocations and usage
|
||||
self._check_allocations_usage(server['id'])
|
||||
|
||||
|
||||
class AcceleratorServerReschedTest(AcceleratorServerBase):
|
||||
|
||||
def setUp(self):
|
||||
self.NUM_HOSTS = 2
|
||||
super(AcceleratorServerReschedTest, self).setUp()
|
||||
|
||||
def _check_allocations_usage_resched(self, server):
|
||||
# Check allocations on host where instance is running
|
||||
server_uuid = server['id']
|
||||
|
||||
hostname = server['OS-EXT-SRV-ATTR:host']
|
||||
server_host_rp_uuid = self._get_provider_uuid_by_host(hostname)
|
||||
expected_host_alloc = {
|
||||
'resources': {'VCPU': 2, 'MEMORY_MB': 2048, 'DISK_GB': 20},
|
||||
}
|
||||
expected_device_alloc = {'resources': {'FPGA': 1}}
|
||||
|
||||
for i in range(self.NUM_HOSTS):
|
||||
compute_uuid = self.compute_rp_uuids[i]
|
||||
device_uuid = self.device_rp_map[compute_uuid]
|
||||
host_alloc = self._get_allocations_by_provider_uuid(compute_uuid)
|
||||
device_alloc = self._get_allocations_by_provider_uuid(device_uuid)
|
||||
if compute_uuid == server_host_rp_uuid:
|
||||
self.assertEqual(expected_host_alloc, host_alloc[server_uuid])
|
||||
self.assertEqual(expected_device_alloc,
|
||||
device_alloc[server_uuid])
|
||||
else:
|
||||
self.assertEqual({}, host_alloc)
|
||||
self.assertEqual({}, device_alloc)
|
||||
|
||||
# NOTE(Sundar): ARQs for an instance could come from different
|
||||
# devices in the same host, in general. But, in this test case,
|
||||
# there is only one device in the host. So, we check for that.
|
||||
device_rp_uuid = self.device_rp_map[server_host_rp_uuid]
|
||||
expected_arq_bind_info = set([('Bound', hostname,
|
||||
device_rp_uuid, server_uuid)])
|
||||
arqs = nova_fixtures.CyborgFixture.fake_get_arqs_for_instance(
|
||||
server_uuid)
|
||||
# The state is hardcoded but other fields come from the test case.
|
||||
arq_bind_info = {(arq['state'], arq['hostname'],
|
||||
arq['device_rp_uuid'], arq['instance_uuid'])
|
||||
for arq in arqs}
|
||||
self.assertSetEqual(expected_arq_bind_info, arq_bind_info)
|
||||
|
||||
def _check_no_allocations(self, server_uuid):
|
||||
allocs = self._get_allocations_by_server_uuid(server_uuid)
|
||||
self.assertEqual(allocs, {})
|
||||
|
||||
for i in range(self.NUM_HOSTS):
|
||||
usage = self._get_provider_usages(
|
||||
self.device_rp_uuids[i]).get('FPGA')
|
||||
self.assertEqual(usage, 0)
|
||||
|
||||
def test_resched(self):
|
||||
orig_spawn = fake.FakeDriver.spawn
|
||||
|
||||
def fake_spawn(*args, **kwargs):
|
||||
fake_spawn.count += 1
|
||||
if fake_spawn.count == 1:
|
||||
raise exception.ComputeResourcesUnavailable(
|
||||
reason='First host fake fail.', instance_uuid='fake')
|
||||
else:
|
||||
orig_spawn(*args, **kwargs)
|
||||
fake_spawn.count = 0
|
||||
|
||||
with mock.patch('nova.virt.fake.FakeDriver.spawn', new=fake_spawn):
|
||||
flavor_id = self._create_acc_flavor()
|
||||
server_name = 'accel_server1'
|
||||
server = self._create_server(
|
||||
server_name, flavor_id=flavor_id,
|
||||
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||
networks='none', expected_state='ACTIVE')
|
||||
|
||||
self.assertEqual(2, fake_spawn.count)
|
||||
self._check_allocations_usage_resched(server)
|
||||
|
||||
def test_resched_fails(self):
|
||||
|
||||
def throw_error(*args, **kwargs):
|
||||
raise exception.ComputeResourcesUnavailable(reason='',
|
||||
instance_uuid='fake')
|
||||
|
||||
self.stub_out('nova.virt.fake.FakeDriver.spawn', throw_error)
|
||||
|
||||
flavor_id = self._create_acc_flavor()
|
||||
server_name = 'accel_server1'
|
||||
server = self._create_server(
|
||||
server_name, flavor_id=flavor_id,
|
||||
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||
networks='none', expected_state='ERROR')
|
||||
|
||||
self._check_no_allocations(server['id'])
|
||||
|
|
|
@ -12,16 +12,20 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import itertools
|
||||
import mock
|
||||
|
||||
from keystoneauth1 import exceptions as ks_exc
|
||||
from requests.models import Response
|
||||
|
||||
from oslo_serialization import jsonutils
|
||||
|
||||
from nova.accelerator import cyborg
|
||||
from nova import context
|
||||
from nova import exception
|
||||
from nova.objects import request_spec
|
||||
from nova import test
|
||||
from nova.tests.unit import fake_requests
|
||||
|
||||
|
||||
class CyborgTestCase(test.NoDBTestCase):
|
||||
|
@ -130,3 +134,216 @@ class CyborgTestCase(test.NoDBTestCase):
|
|||
self.assertRaises(exception.DeviceProfileError,
|
||||
self.client.get_device_profile_groups,
|
||||
dp_name='mydp')
|
||||
|
||||
def _get_arqs_and_request_groups(self):
|
||||
arq_common = {
|
||||
# All ARQs for an instance have same device profile name.
|
||||
"device_profile_name": "noprog-dp",
|
||||
"device_rp_uuid": "",
|
||||
"hostname": "",
|
||||
"instance_uuid": "",
|
||||
"state": "Initial",
|
||||
}
|
||||
arq_variants = [
|
||||
{"device_profile_group_id": 0,
|
||||
"uuid": "edbba496-3cc8-4256-94ca-dfe3413348eb"},
|
||||
{"device_profile_group_id": 1,
|
||||
"uuid": "20125bcb-9f55-4e13-8e8c-3fee30e54cca"},
|
||||
]
|
||||
arqs = [dict(arq_common, **variant) for variant in arq_variants]
|
||||
rg_rp_map = {
|
||||
'device_profile_0': ['c532cf11-02ed-4b03-9dd8-3e9a454131dc'],
|
||||
'device_profile_1': ['2c332d7b-daaf-4726-a80d-ecf5212da4b8'],
|
||||
}
|
||||
return arqs, rg_rp_map
|
||||
|
||||
def _get_bound_arqs(self):
|
||||
arqs, rg_rp_map = self._get_arqs_and_request_groups()
|
||||
common = {
|
||||
'host_name': 'myhost',
|
||||
'instance_uuid': '15d3acf8-df76-400b-bfc9-484a5208daa1',
|
||||
}
|
||||
bindings = {
|
||||
arqs[0]['uuid']: dict(
|
||||
common, device_rp_uuid=rg_rp_map['device_profile_0'][0]),
|
||||
arqs[1]['uuid']: dict(
|
||||
common, device_rp_uuid=rg_rp_map['device_profile_1'][0]),
|
||||
}
|
||||
bound_arq_common = {
|
||||
"attach_handle_info": {
|
||||
"bus": "01",
|
||||
"device": "00",
|
||||
"domain": "0000",
|
||||
"function": "0" # will vary function ID later
|
||||
},
|
||||
"attach_handle_type": "PCI",
|
||||
"state": "Bound",
|
||||
# Devic eprofile name is common to all bound ARQs
|
||||
"device_profile_name": arqs[0]["device_profile_name"],
|
||||
**common
|
||||
}
|
||||
bound_arqs = [
|
||||
{'uuid': arq['uuid'],
|
||||
'device_profile_group_id': arq['device_profile_group_id'],
|
||||
'device_rp_uuid': bindings[arq['uuid']]['device_rp_uuid'],
|
||||
**bound_arq_common} for arq in arqs]
|
||||
for index, bound_arq in enumerate(bound_arqs):
|
||||
bound_arq['attach_handle_info']['function'] = index # fix func ID
|
||||
return bindings, bound_arqs
|
||||
|
||||
@mock.patch('keystoneauth1.adapter.Adapter.post')
|
||||
def test_create_arqs_failure(self, mock_cyborg_post):
|
||||
# If Cyborg returns invalid response, raise exception.
|
||||
mock_cyborg_post.return_value = None
|
||||
self.assertRaises(exception.AcceleratorRequestOpFailed,
|
||||
self.client._create_arqs,
|
||||
dp_name='mydp')
|
||||
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
|
||||
'_create_arqs')
|
||||
def test_create_arq_and_match_rps(self, mock_create_arqs):
|
||||
# Happy path
|
||||
arqs, rg_rp_map = self._get_arqs_and_request_groups()
|
||||
dp_name = arqs[0]["device_profile_name"]
|
||||
|
||||
mock_create_arqs.return_value = arqs
|
||||
|
||||
ret_arqs = self.client.create_arqs_and_match_resource_providers(
|
||||
dp_name, rg_rp_map)
|
||||
|
||||
# Each value in rg_rp_map is a list. We merge them into a single list.
|
||||
expected_rp_uuids = sorted(list(
|
||||
itertools.chain.from_iterable(rg_rp_map.values())))
|
||||
ret_rp_uuids = sorted([arq['device_rp_uuid'] for arq in ret_arqs])
|
||||
self.assertEqual(expected_rp_uuids, ret_rp_uuids)
|
||||
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
|
||||
'_create_arqs')
|
||||
def test_create_arq_and_match_rps_exception(self, mock_create_arqs):
|
||||
# If Cyborg response does not contain ARQs, raise
|
||||
arqs, rg_rp_map = self._get_arqs_and_request_groups()
|
||||
dp_name = arqs[0]["device_profile_name"]
|
||||
|
||||
mock_create_arqs.return_value = None
|
||||
self.assertRaises(
|
||||
exception.AcceleratorRequestOpFailed,
|
||||
self.client.create_arqs_and_match_resource_providers,
|
||||
dp_name, rg_rp_map)
|
||||
|
||||
@mock.patch('keystoneauth1.adapter.Adapter.patch')
|
||||
def test_bind_arqs(self, mock_cyborg_patch):
|
||||
bindings, bound_arqs = self._get_bound_arqs()
|
||||
arq_uuid = bound_arqs[0]['uuid']
|
||||
|
||||
patch_list = {}
|
||||
for arq_uuid, binding in bindings.items():
|
||||
patch = [{"path": "/" + field,
|
||||
"op": "add",
|
||||
"value": value
|
||||
} for field, value in binding.items()]
|
||||
patch_list[arq_uuid] = patch
|
||||
|
||||
self.client.bind_arqs(bindings)
|
||||
|
||||
mock_cyborg_patch.assert_called_once_with(
|
||||
self.client.ARQ_URL, json=mock.ANY)
|
||||
called_params = mock_cyborg_patch.call_args.kwargs['json']
|
||||
self.assertEqual(sorted(called_params), sorted(patch_list))
|
||||
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient._call_cyborg')
|
||||
def test_bind_arqs_exception(self, mock_call_cyborg):
|
||||
# If Cyborg returns invalid response, raise exception.
|
||||
bindings, _ = self._get_bound_arqs()
|
||||
mock_call_cyborg.return_value = None, 'Some error'
|
||||
self.assertRaises(exception.AcceleratorRequestOpFailed,
|
||||
self.client.bind_arqs, bindings=bindings)
|
||||
|
||||
@mock.patch('keystoneauth1.adapter.Adapter.get')
|
||||
def test_get_arqs_for_instance(self, mock_cyborg_get):
|
||||
# Happy path, without only_resolved=True
|
||||
_, bound_arqs = self._get_bound_arqs()
|
||||
instance_uuid = bound_arqs[0]['instance_uuid']
|
||||
|
||||
query = {"instance": instance_uuid}
|
||||
content = jsonutils.dumps({'arqs': bound_arqs})
|
||||
resp = fake_requests.FakeResponse(200, content)
|
||||
mock_cyborg_get.return_value = resp
|
||||
|
||||
ret_arqs = self.client.get_arqs_for_instance(instance_uuid)
|
||||
|
||||
mock_cyborg_get.assert_called_once_with(
|
||||
self.client.ARQ_URL, params=query)
|
||||
|
||||
bound_arqs.sort(key=lambda x: x['uuid'])
|
||||
ret_arqs.sort(key=lambda x: x['uuid'])
|
||||
for ret_arq, bound_arq in zip(ret_arqs, bound_arqs):
|
||||
self.assertDictEqual(ret_arq, bound_arq)
|
||||
|
||||
@mock.patch('keystoneauth1.adapter.Adapter.get')
|
||||
def test_get_arqs_for_instance_exception(self, mock_cyborg_get):
|
||||
# If Cyborg returns an error code, raise exception
|
||||
_, bound_arqs = self._get_bound_arqs()
|
||||
instance_uuid = bound_arqs[0]['instance_uuid']
|
||||
|
||||
resp = fake_requests.FakeResponse(404, content='')
|
||||
mock_cyborg_get.return_value = resp
|
||||
self.assertRaises(
|
||||
exception.AcceleratorRequestOpFailed,
|
||||
self.client.get_arqs_for_instance, instance_uuid)
|
||||
|
||||
@mock.patch('keystoneauth1.adapter.Adapter.get')
|
||||
def test_get_arqs_for_instance_exception_no_resp(self, mock_cyborg_get):
|
||||
# If Cyborg returns an error code, raise exception
|
||||
_, bound_arqs = self._get_bound_arqs()
|
||||
instance_uuid = bound_arqs[0]['instance_uuid']
|
||||
|
||||
content = jsonutils.dumps({'noarqs': 'oops'})
|
||||
resp = fake_requests.FakeResponse(200, content)
|
||||
mock_cyborg_get.return_value = resp
|
||||
self.assertRaisesRegex(
|
||||
exception.AcceleratorRequestOpFailed,
|
||||
'Cyborg returned no accelerator requests for ',
|
||||
self.client.get_arqs_for_instance, instance_uuid)
|
||||
|
||||
@mock.patch('keystoneauth1.adapter.Adapter.get')
|
||||
def test_get_arqs_for_instance_all_resolved(self, mock_cyborg_get):
|
||||
# If all ARQs are resolved, return full list
|
||||
_, bound_arqs = self._get_bound_arqs()
|
||||
instance_uuid = bound_arqs[0]['instance_uuid']
|
||||
|
||||
query = {"instance": instance_uuid}
|
||||
content = jsonutils.dumps({'arqs': bound_arqs})
|
||||
resp = fake_requests.FakeResponse(200, content)
|
||||
mock_cyborg_get.return_value = resp
|
||||
|
||||
ret_arqs = self.client.get_arqs_for_instance(
|
||||
instance_uuid, only_resolved=True)
|
||||
|
||||
mock_cyborg_get.assert_called_once_with(
|
||||
self.client.ARQ_URL, params=query)
|
||||
|
||||
bound_arqs.sort(key=lambda x: x['uuid'])
|
||||
ret_arqs.sort(key=lambda x: x['uuid'])
|
||||
for ret_arq, bound_arq in zip(ret_arqs, bound_arqs):
|
||||
self.assertDictEqual(ret_arq, bound_arq)
|
||||
|
||||
@mock.patch('keystoneauth1.adapter.Adapter.get')
|
||||
def test_get_arqs_for_instance_some_resolved(self, mock_cyborg_get):
|
||||
# If only some ARQs are resolved, return just the resolved ones
|
||||
unbound_arqs, _ = self._get_arqs_and_request_groups()
|
||||
_, bound_arqs = self._get_bound_arqs()
|
||||
# Create a amixture of unbound and bound ARQs
|
||||
arqs = [unbound_arqs[0], bound_arqs[0]]
|
||||
instance_uuid = bound_arqs[0]['instance_uuid']
|
||||
|
||||
query = {"instance": instance_uuid}
|
||||
content = jsonutils.dumps({'arqs': arqs})
|
||||
resp = fake_requests.FakeResponse(200, content)
|
||||
mock_cyborg_get.return_value = resp
|
||||
|
||||
ret_arqs = self.client.get_arqs_for_instance(
|
||||
instance_uuid, only_resolved=True)
|
||||
|
||||
mock_cyborg_get.assert_called_once_with(
|
||||
self.client.ARQ_URL, params=query)
|
||||
self.assertEqual(ret_arqs, [bound_arqs[0]])
|
||||
|
|
|
@ -5993,6 +5993,188 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
|||
mock_hooks.setdefault().run_post.assert_called_once_with(
|
||||
'build_instance', result, mock.ANY, mock.ANY, f=None)
|
||||
|
||||
@mock.patch.object(objects.Instance, 'save')
|
||||
@mock.patch.object(nova.compute.manager.ComputeManager,
|
||||
'_default_block_device_names')
|
||||
@mock.patch.object(nova.compute.manager.ComputeManager,
|
||||
'_prep_block_device')
|
||||
@mock.patch.object(virt_driver.ComputeDriver,
|
||||
'prepare_for_spawn')
|
||||
@mock.patch.object(nova.compute.manager.ComputeManager,
|
||||
'_build_networks_for_instance')
|
||||
@mock.patch.object(virt_driver.ComputeDriver,
|
||||
'prepare_networks_before_block_device_mapping')
|
||||
def _test_accel_build_resources(self, mock_prep_net, mock_build_net,
|
||||
mock_prep_spawn, mock_prep_bd, mock_bdnames, mock_save):
|
||||
|
||||
args = (self.context, self.instance, self.requested_networks,
|
||||
self.security_groups, self.image, self.block_device_mapping,
|
||||
self.resource_provider_mapping)
|
||||
|
||||
resources = []
|
||||
with self.compute._build_resources(*args) as resources:
|
||||
pass
|
||||
|
||||
return resources
|
||||
|
||||
@mock.patch.object(nova.compute.manager.ComputeManager,
|
||||
'_get_bound_arq_resources')
|
||||
def test_accel_build_resources_no_device_profile(self, mock_get_arqs):
|
||||
# If dp_name is None, accel path is a no-op.
|
||||
self.instance.flavor.extra_specs = {}
|
||||
self._test_accel_build_resources()
|
||||
mock_get_arqs.assert_not_called()
|
||||
|
||||
@mock.patch.object(nova.compute.manager.ComputeManager,
|
||||
'_get_bound_arq_resources')
|
||||
def test_accel_build_resources(self, mock_get_arqs):
|
||||
# Happy path for accels in build_resources
|
||||
dp_name = "mydp"
|
||||
self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
|
||||
arq_list = fixtures.CyborgFixture.bound_arq_list
|
||||
mock_get_arqs.return_value = arq_list
|
||||
|
||||
resources = self._test_accel_build_resources()
|
||||
|
||||
mock_get_arqs.assert_called_once_with(self.context,
|
||||
dp_name, self.instance)
|
||||
self.assertEqual(sorted(resources['accel_info']), sorted(arq_list))
|
||||
|
||||
@mock.patch.object(virt_driver.ComputeDriver,
|
||||
'clean_networks_preparation')
|
||||
@mock.patch.object(nova.compute.manager.ComputeManager,
|
||||
'_get_bound_arq_resources')
|
||||
def test_accel_build_resources_exception(self, mock_get_arqs,
|
||||
mock_clean_net):
|
||||
dp_name = "mydp"
|
||||
self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
|
||||
mock_get_arqs.side_effect = (
|
||||
exception.AcceleratorRequestOpFailed(op='get', msg=''))
|
||||
|
||||
self.assertRaises(exception.NovaException,
|
||||
self._test_accel_build_resources)
|
||||
mock_clean_net.assert_called_once()
|
||||
|
||||
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
|
||||
'exit_wait_early')
|
||||
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
|
||||
'wait_for_instance_event')
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
|
||||
'get_arqs_for_instance')
|
||||
def test_arq_bind_wait_exit_early(self, mock_get_arqs,
|
||||
mock_wait_inst_ev, mock_exit_wait_early):
|
||||
# Bound ARQs available on first query, quit early.
|
||||
dp_name = fixtures.CyborgFixture.dp_name
|
||||
arq_list = fixtures.CyborgFixture.bound_arq_list
|
||||
self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
|
||||
arq_events = [('accelerator-request-bound', arq['uuid'])
|
||||
for arq in arq_list]
|
||||
|
||||
# get_arqs_for_instance() is called twice, once to get all ARQs
|
||||
# for the instance, once to get only the resolved ARQs
|
||||
mock_get_arqs.return_value = arq_list
|
||||
|
||||
ret_arqs = self.compute._get_bound_arq_resources(
|
||||
self.context, dp_name, self.instance)
|
||||
|
||||
mock_wait_inst_ev.assert_called_once_with(
|
||||
self.instance, arq_events, deadline=mock.ANY)
|
||||
mock_exit_wait_early.assert_called_once_with(arq_events)
|
||||
|
||||
mock_get_arqs.assert_has_calls([
|
||||
mock.call(self.instance.uuid),
|
||||
mock.call(self.instance.uuid, only_resolved=True)])
|
||||
|
||||
self.assertEqual(sorted(ret_arqs), sorted(arq_list))
|
||||
|
||||
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
|
||||
'exit_wait_early')
|
||||
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
|
||||
'wait_for_instance_event')
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
|
||||
'get_arqs_for_instance')
|
||||
def test_arq_bind_wait(self, mock_get_arqs,
|
||||
mock_wait_inst_ev, mock_exit_wait_early):
|
||||
# If binding is in progress, must wait.
|
||||
dp_name = fixtures.CyborgFixture.dp_name
|
||||
arq_list = fixtures.CyborgFixture.bound_arq_list
|
||||
self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
|
||||
arq_events = [('accelerator-request-bound', arq['uuid'])
|
||||
for arq in arq_list]
|
||||
# get_arqs_for_instance gets called 3 times, returning the full
|
||||
# ARQ list first, [] as resolved ARQs next, and the full list finally
|
||||
mock_get_arqs.side_effect = [arq_list, [], arq_list]
|
||||
|
||||
ret_arqs = self.compute._get_bound_arq_resources(
|
||||
self.context, dp_name, self.instance)
|
||||
|
||||
mock_wait_inst_ev.assert_called_once_with(
|
||||
self.instance, arq_events, deadline=mock.ANY)
|
||||
mock_exit_wait_early.assert_not_called()
|
||||
self.assertEqual(sorted(ret_arqs), sorted(arq_list))
|
||||
mock_get_arqs.assert_has_calls([
|
||||
mock.call(self.instance.uuid),
|
||||
mock.call(self.instance.uuid, only_resolved=True),
|
||||
mock.call(self.instance.uuid)])
|
||||
|
||||
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
|
||||
'exit_wait_early')
|
||||
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
|
||||
'wait_for_instance_event')
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
|
||||
'get_arqs_for_instance')
|
||||
def test_arq_bind_timeout(self, mock_get_arqs,
|
||||
mock_wait_inst_ev, mock_exit_wait_early):
|
||||
# If binding fails even after wait, exception is thrown
|
||||
dp_name = fixtures.CyborgFixture.dp_name
|
||||
arq_list = fixtures.CyborgFixture.bound_arq_list
|
||||
self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
|
||||
arq_events = [('accelerator-request-bound', arq['uuid'])
|
||||
for arq in arq_list]
|
||||
|
||||
mock_get_arqs.return_value = arq_list
|
||||
mock_wait_inst_ev.side_effect = eventlet_timeout.Timeout
|
||||
|
||||
self.assertRaises(eventlet_timeout.Timeout,
|
||||
self.compute._get_bound_arq_resources,
|
||||
self.context, dp_name, self.instance)
|
||||
|
||||
mock_wait_inst_ev.assert_called_once_with(
|
||||
self.instance, arq_events, deadline=mock.ANY)
|
||||
mock_exit_wait_early.assert_not_called()
|
||||
mock_get_arqs.assert_called_once_with(self.instance.uuid)
|
||||
|
||||
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
|
||||
'exit_wait_early')
|
||||
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
|
||||
'wait_for_instance_event')
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
|
||||
'get_arqs_for_instance')
|
||||
def test_arq_bind_exception(self, mock_get_arqs,
|
||||
mock_wait_inst_ev, mock_exit_wait_early):
|
||||
# If the code inside the context manager of _get_bound_arq_resources
|
||||
# raises an exception, that exception must be handled.
|
||||
dp_name = fixtures.CyborgFixture.dp_name
|
||||
arq_list = fixtures.CyborgFixture.bound_arq_list
|
||||
self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
|
||||
arq_events = [('accelerator-request-bound', arq['uuid'])
|
||||
for arq in arq_list]
|
||||
|
||||
mock_get_arqs.side_effect = [
|
||||
arq_list,
|
||||
exception.AcceleratorRequestOpFailed(op='', msg='')]
|
||||
|
||||
self.assertRaises(exception.AcceleratorRequestOpFailed,
|
||||
self.compute._get_bound_arq_resources,
|
||||
self.context, dp_name, self.instance)
|
||||
|
||||
mock_wait_inst_ev.assert_called_once_with(
|
||||
self.instance, arq_events, deadline=mock.ANY)
|
||||
mock_exit_wait_early.assert_not_called()
|
||||
mock_get_arqs.assert_has_calls([
|
||||
mock.call(self.instance.uuid),
|
||||
mock.call(self.instance.uuid, only_resolved=True)])
|
||||
|
||||
def test_build_and_run_instance_called_with_proper_args(self):
|
||||
self._test_build_and_run_instance()
|
||||
|
||||
|
|
|
@ -437,6 +437,8 @@ class _BaseTaskTestCase(object):
|
|||
def test_cold_migrate_forced_shutdown(self):
|
||||
self._test_cold_migrate(clean_shutdown=False)
|
||||
|
||||
@mock.patch.object(conductor_manager.ComputeTaskManager,
|
||||
'_create_and_bind_arqs')
|
||||
@mock.patch.object(compute_rpcapi.ComputeAPI, 'build_and_run_instance')
|
||||
@mock.patch.object(db, 'block_device_mapping_get_all_by_instance',
|
||||
return_value=[])
|
||||
|
@ -448,7 +450,7 @@ class _BaseTaskTestCase(object):
|
|||
@mock.patch.object(objects.RequestSpec, 'from_primitives')
|
||||
def test_build_instances(self, mock_fp, mock_save, mock_getaz,
|
||||
mock_buildreq, mock_schedule, mock_bdm,
|
||||
mock_build):
|
||||
mock_build, mock_create_bind_arqs):
|
||||
"""Tests creating two instances and the scheduler returns a unique
|
||||
host/node combo for each instance.
|
||||
"""
|
||||
|
@ -530,6 +532,86 @@ class _BaseTaskTestCase(object):
|
|||
security_groups='security_groups',
|
||||
block_device_mapping=mock.ANY,
|
||||
node='node2', limits=None, host_list=sched_return[1])])
|
||||
mock_create_bind_arqs.assert_has_calls([
|
||||
mock.call(self.context, instances[0].uuid,
|
||||
instances[0].flavor.extra_specs, 'node1', mock.ANY),
|
||||
mock.call(self.context, instances[1].uuid,
|
||||
instances[1].flavor.extra_specs, 'node2', mock.ANY),
|
||||
])
|
||||
|
||||
@mock.patch.object(conductor_manager.ComputeTaskManager,
|
||||
'_cleanup_when_reschedule_fails')
|
||||
@mock.patch.object(conductor_manager.ComputeTaskManager,
|
||||
'_create_and_bind_arqs')
|
||||
@mock.patch.object(compute_rpcapi.ComputeAPI, 'build_and_run_instance')
|
||||
@mock.patch.object(db, 'block_device_mapping_get_all_by_instance',
|
||||
return_value=[])
|
||||
@mock.patch.object(conductor_manager.ComputeTaskManager,
|
||||
'_schedule_instances')
|
||||
@mock.patch('nova.objects.BuildRequest.get_by_instance_uuid')
|
||||
@mock.patch('nova.availability_zones.get_host_availability_zone')
|
||||
@mock.patch('nova.objects.Instance.save')
|
||||
@mock.patch.object(objects.RequestSpec, 'from_primitives')
|
||||
def test_build_instances_arq_failure(self, mock_fp, mock_save, mock_getaz,
|
||||
mock_buildreq, mock_schedule, mock_bdm,
|
||||
mock_build, mock_create_bind_arqs, mock_cleanup):
|
||||
"""If _create_and_bind_arqs throws an exception,
|
||||
_destroy_build_request must be called for each instance.
|
||||
"""
|
||||
fake_spec = objects.RequestSpec()
|
||||
mock_fp.return_value = fake_spec
|
||||
instance_type = objects.Flavor.get_by_name(self.context, 'm1.small')
|
||||
# NOTE(danms): Avoid datetime timezone issues with converted flavors
|
||||
instance_type.created_at = None
|
||||
instances = [objects.Instance(context=self.context,
|
||||
id=i,
|
||||
uuid=uuids.fake,
|
||||
flavor=instance_type) for i in range(2)]
|
||||
instance_properties = obj_base.obj_to_primitive(instances[0])
|
||||
instance_properties['system_metadata'] = flavors.save_flavor_info(
|
||||
{}, instance_type)
|
||||
|
||||
sched_return = copy.deepcopy(fake_host_lists2)
|
||||
mock_schedule.return_value = sched_return
|
||||
|
||||
# build_instances() is a cast, we need to wait for it to complete
|
||||
self.useFixture(cast_as_call.CastAsCall(self))
|
||||
|
||||
mock_getaz.return_value = 'myaz'
|
||||
mock_create_bind_arqs.side_effect = (
|
||||
exc.AcceleratorRequestOpFailed(op='', msg=''))
|
||||
|
||||
self.conductor.build_instances(self.context,
|
||||
instances=instances,
|
||||
image={'fake_data': 'should_pass_silently'},
|
||||
filter_properties={},
|
||||
admin_password='admin_password',
|
||||
injected_files='injected_files',
|
||||
requested_networks=None,
|
||||
security_groups='security_groups',
|
||||
block_device_mapping='block_device_mapping',
|
||||
legacy_bdm=False, host_lists=None)
|
||||
mock_create_bind_arqs.assert_has_calls([
|
||||
mock.call(self.context, instances[0].uuid,
|
||||
instances[0].flavor.extra_specs, 'node1', mock.ANY),
|
||||
mock.call(self.context, instances[1].uuid,
|
||||
instances[1].flavor.extra_specs, 'node2', mock.ANY),
|
||||
])
|
||||
# Comparing instances fails because the instance objects have changed
|
||||
# in the above flow. So, we compare the fields instead.
|
||||
mock_cleanup.assert_has_calls([
|
||||
mock.call(self.context, test.MatchType(objects.Instance),
|
||||
test.MatchType(exc.AcceleratorRequestOpFailed),
|
||||
test.MatchType(dict), None),
|
||||
mock.call(self.context, test.MatchType(objects.Instance),
|
||||
test.MatchType(exc.AcceleratorRequestOpFailed),
|
||||
test.MatchType(dict), None),
|
||||
])
|
||||
call_list = mock_cleanup.call_args_list
|
||||
for idx, instance in enumerate(instances):
|
||||
actual_inst = call_list[idx][0][1]
|
||||
self.assertEqual(actual_inst['uuid'], instance['uuid'])
|
||||
self.assertEqual(actual_inst['flavor']['extra_specs'], {})
|
||||
|
||||
@mock.patch.object(scheduler_utils, 'build_request_spec')
|
||||
@mock.patch.object(scheduler_utils, 'setup_instance_group')
|
||||
|
@ -1880,6 +1962,45 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
|
|||
self.params = params
|
||||
self.flavor = objects.Flavor.get_by_name(self.ctxt, 'm1.tiny')
|
||||
|
||||
@mock.patch('nova.accelerator.cyborg.get_client')
|
||||
def test_create_bind_arqs_no_device_profile(self, mock_get_client):
|
||||
# If no device profile name, it is a no op.
|
||||
hostname = 'myhost'
|
||||
instance = fake_instance.fake_instance_obj(self.context)
|
||||
|
||||
instance.flavor.extra_specs = {}
|
||||
self.conductor._create_and_bind_arqs(self.context,
|
||||
instance.uuid, instance.flavor.extra_specs,
|
||||
hostname, resource_provider_mapping=mock.ANY)
|
||||
mock_get_client.assert_not_called()
|
||||
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient.bind_arqs')
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
|
||||
'create_arqs_and_match_resource_providers')
|
||||
def test_create_bind_arqs(self, mock_create, mock_bind):
|
||||
# Happy path
|
||||
hostname = 'myhost'
|
||||
instance = fake_instance.fake_instance_obj(self.context)
|
||||
dp_name = 'mydp'
|
||||
instance.flavor.extra_specs = {'accel:device_profile': dp_name}
|
||||
|
||||
in_arq_list, _ = fixtures.get_arqs(dp_name)
|
||||
mock_create.return_value = in_arq_list
|
||||
|
||||
self.conductor._create_and_bind_arqs(self.context,
|
||||
instance.uuid, instance.flavor.extra_specs,
|
||||
hostname, resource_provider_mapping=mock.ANY)
|
||||
|
||||
mock_create.assert_called_once_with(dp_name, mock.ANY)
|
||||
|
||||
expected_bindings = {
|
||||
'b59d34d3-787b-4fb0-a6b9-019cd81172f8':
|
||||
{'hostname': hostname,
|
||||
'device_rp_uuid': mock.ANY,
|
||||
'instance_uuid': instance.uuid}
|
||||
}
|
||||
mock_bind.assert_called_once_with(bindings=expected_bindings)
|
||||
|
||||
@mock.patch('nova.availability_zones.get_host_availability_zone')
|
||||
@mock.patch('nova.compute.rpcapi.ComputeAPI.build_and_run_instance')
|
||||
@mock.patch('nova.scheduler.rpcapi.SchedulerAPI.select_destinations')
|
||||
|
@ -2399,6 +2520,44 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
|
|||
self.params['request_specs'][0].requested_resources = []
|
||||
self._do_schedule_and_build_instances_test(self.params)
|
||||
|
||||
@mock.patch.object(conductor_manager.ComputeTaskManager,
|
||||
'_create_and_bind_arqs')
|
||||
def test_schedule_and_build_instances_with_arqs_bind_ok(
|
||||
self, mock_create_bind_arqs):
|
||||
extra_specs = {'accel:device_profile': 'mydp'}
|
||||
instance = self.params['build_requests'][0].instance
|
||||
instance.flavor.extra_specs = extra_specs
|
||||
|
||||
self._do_schedule_and_build_instances_test(self.params)
|
||||
|
||||
# NOTE(Sundar): At this point, the instance has not been
|
||||
# associated with a host yet. The default host.nodename is
|
||||
# 'node1'.
|
||||
mock_create_bind_arqs.assert_called_once_with(
|
||||
self.params['context'], instance.uuid, extra_specs,
|
||||
'node1', mock.ANY)
|
||||
|
||||
@mock.patch.object(conductor_manager.ComputeTaskManager,
|
||||
'_cleanup_build_artifacts')
|
||||
@mock.patch.object(conductor_manager.ComputeTaskManager,
|
||||
'_create_and_bind_arqs')
|
||||
def test_schedule_and_build_instances_with_arqs_bind_exception(
|
||||
self, mock_create_bind_arqs, mock_cleanup):
|
||||
# Exceptions in _create_and_bind_arqs result in cleanup
|
||||
mock_create_bind_arqs.side_effect = (
|
||||
exc.AcceleratorRequestOpFailed(op='', msg=''))
|
||||
|
||||
try:
|
||||
self._do_schedule_and_build_instances_test(self.params)
|
||||
except exc.AcceleratorRequestOpFailed:
|
||||
pass
|
||||
|
||||
mock_cleanup.assert_called_once_with(
|
||||
self.params['context'], mock.ANY, mock.ANY,
|
||||
self.params['build_requests'], self.params['request_specs'],
|
||||
self.params['block_device_mapping'], self.params['tags'],
|
||||
mock.ANY)
|
||||
|
||||
def test_map_instance_to_cell_already_mapped(self):
|
||||
"""Tests a scenario where an instance is already mapped to a cell
|
||||
during scheduling.
|
||||
|
|
Loading…
Reference in New Issue