383e2a8bdc
Replace six.text_type with str. A subsequent patch will replace other six.text_type. Change-Id: I23bb9e539d08f5c6202909054c2dd49b6c7a7a0e Implements: blueprint six-removal Signed-off-by: Takashi Natsume <takanattie@gmail.com>
312 lines
12 KiB
Python
312 lines
12 KiB
Python
# Copyright 2019 Intel
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from oslo_log import log as logging
|
|
|
|
from keystoneauth1 import exceptions as ks_exc
|
|
|
|
from nova import exception
|
|
from nova.i18n import _
|
|
from nova import objects
|
|
from nova.scheduler import utils as schedutils
|
|
from nova import service_auth
|
|
from nova import utils
|
|
|
|
"""
|
|
Note on object relationships:
|
|
1 device profile (DP) has D >= 1 request groups (just as a flavor
|
|
has many request groups).
|
|
Each DP request group corresponds to exactly 1 numbered request
|
|
group (RG) in the request spec.
|
|
Each numbered RG corresponds to exactly one resource provider (RP).
|
|
A DP request group may request A >= 1 accelerators, and so result
|
|
in the creation of A ARQs.
|
|
Each ARQ corresponds to exactly 1 DP request group.
|
|
|
|
A device profile is a dictionary:
|
|
{ "name": "mydpname",
|
|
"uuid": <uuid>,
|
|
"groups": [ <device_profile_request_group> ]
|
|
}
|
|
|
|
A device profile group is a dictionary too:
|
|
{ "resources:CUSTOM_ACCELERATOR_FPGA": "2",
|
|
"resources:CUSTOM_LOCAL_MEMORY": "1",
|
|
"trait:CUSTOM_INTEL_PAC_ARRIA10": "required",
|
|
"trait:CUSTOM_FUNCTION_NAME_FALCON_GZIP_1_1": "required",
|
|
# 0 or more Cyborg properties
|
|
"accel:bitstream_id": "FB021995_BF21_4463_936A_02D49D4DB5E5"
|
|
}
|
|
|
|
See cyborg/cyborg/objects/device_profile.py for more details.
|
|
"""
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
def get_client(context):
|
|
return _CyborgClient(context)
|
|
|
|
|
|
def get_device_profile_group_requester_id(dp_group_id):
|
|
"""Return the value to use in objects.RequestGroup.requester_id.
|
|
|
|
The requester_id is used to match device profile groups from
|
|
Cyborg to the request groups in request spec.
|
|
|
|
:param dp_group_id: The index of the request group in the device profile.
|
|
"""
|
|
req_id = "device_profile_" + str(dp_group_id)
|
|
return req_id
|
|
|
|
|
|
def get_device_profile_request_groups(context, dp_name):
|
|
cyclient = get_client(context)
|
|
return cyclient.get_device_profile_groups(dp_name)
|
|
|
|
|
|
class _CyborgClient(object):
|
|
DEVICE_PROFILE_URL = "/device_profiles"
|
|
ARQ_URL = "/accelerator_requests"
|
|
|
|
def __init__(self, context):
|
|
auth = service_auth.get_auth_plugin(context)
|
|
self._client = utils.get_ksa_adapter('accelerator', ksa_auth=auth)
|
|
|
|
def _call_cyborg(self, func, *args, **kwargs):
|
|
resp = err_msg = None
|
|
try:
|
|
resp = func(*args, **kwargs)
|
|
if not resp:
|
|
msg = _('Invalid response from Cyborg: ')
|
|
err_msg = msg + str(resp)
|
|
except ks_exc.ClientException as exc:
|
|
err_msg = _('Could not communicate with Cyborg.')
|
|
LOG.exception('%s: %s', err_msg, str(exc))
|
|
|
|
return resp, err_msg
|
|
|
|
def _get_device_profile_list(self, dp_name):
|
|
query = {"name": dp_name}
|
|
err_msg = None
|
|
|
|
resp, err_msg = self._call_cyborg(self._client.get,
|
|
self.DEVICE_PROFILE_URL, params=query)
|
|
|
|
if err_msg:
|
|
raise exception.DeviceProfileError(name=dp_name, msg=err_msg)
|
|
|
|
return resp.json().get('device_profiles')
|
|
|
|
def get_device_profile_groups(self, dp_name):
|
|
"""Get list of profile group objects from the device profile.
|
|
|
|
Cyborg API returns: {"device_profiles": [<device_profile>]}
|
|
See module notes above for further details.
|
|
|
|
:param dp_name: string: device profile name
|
|
Expected to be valid, not None or ''.
|
|
:returns: [objects.RequestGroup]
|
|
:raises: DeviceProfileError
|
|
"""
|
|
dp_list = self._get_device_profile_list(dp_name)
|
|
if not dp_list:
|
|
msg = _('Expected 1 device profile but got nothing.')
|
|
raise exception.DeviceProfileError(name=dp_name, msg=msg)
|
|
if len(dp_list) != 1:
|
|
err = _('Expected 1 device profile but got %s.') % len(dp_list)
|
|
raise exception.DeviceProfileError(name=dp_name, msg=err)
|
|
|
|
dp_groups = dp_list[0]['groups']
|
|
request_groups = []
|
|
for dp_group_id, dp_group in enumerate(dp_groups):
|
|
req_id = get_device_profile_group_requester_id(dp_group_id)
|
|
rg = objects.RequestGroup(requester_id=req_id)
|
|
for key, val in dp_group.items():
|
|
match = schedutils.ResourceRequest.XS_KEYPAT.match(key)
|
|
if not match:
|
|
continue # could be 'accel:foo=bar', skip it
|
|
prefix, _ignore, name = match.groups()
|
|
if prefix == schedutils.ResourceRequest.XS_RES_PREFIX:
|
|
rg.add_resource(rclass=name, amount=val)
|
|
elif prefix == schedutils.ResourceRequest.XS_TRAIT_PREFIX:
|
|
rg.add_trait(trait_name=name, trait_type=val)
|
|
request_groups.append(rg)
|
|
return request_groups
|
|
|
|
def _create_arqs(self, dp_name):
|
|
data = {"device_profile_name": dp_name}
|
|
resp, err_msg = self._call_cyborg(self._client.post,
|
|
self.ARQ_URL, json=data)
|
|
|
|
if err_msg:
|
|
raise exception.AcceleratorRequestOpFailed(
|
|
op=_('create'), msg=err_msg)
|
|
|
|
return resp.json().get('arqs')
|
|
|
|
def create_arqs_and_match_resource_providers(self, dp_name, rg_rp_map):
|
|
"""Create ARQs, match them with request groups and thereby
|
|
determine their corresponding RPs.
|
|
|
|
:param dp_name: Device profile name
|
|
:param rg_rp_map: Request group - Resource Provider map
|
|
{requester_id: [resource_provider_uuid]}
|
|
:returns:
|
|
[arq], with each ARQ associated with an RP
|
|
:raises: DeviceProfileError, AcceleratorRequestOpFailed
|
|
"""
|
|
LOG.info('Creating ARQs for device profile %s', dp_name)
|
|
arqs = self._create_arqs(dp_name)
|
|
if not arqs or len(arqs) == 0:
|
|
msg = _('device profile name %s') % dp_name
|
|
raise exception.AcceleratorRequestOpFailed(op=_('create'), msg=msg)
|
|
for arq in arqs:
|
|
dp_group_id = arq['device_profile_group_id']
|
|
arq['device_rp_uuid'] = None
|
|
requester_id = (
|
|
get_device_profile_group_requester_id(dp_group_id))
|
|
arq['device_rp_uuid'] = rg_rp_map[requester_id][0]
|
|
return arqs
|
|
|
|
def bind_arqs(self, bindings):
|
|
"""Initiate Cyborg bindings.
|
|
|
|
Handles RFC 6902-compliant JSON patching, sparing
|
|
calling Nova code from those details.
|
|
|
|
:param bindings:
|
|
{ "$arq_uuid": {
|
|
"hostname": STRING
|
|
"device_rp_uuid": UUID
|
|
"instance_uuid": UUID
|
|
},
|
|
...
|
|
}
|
|
:returns: nothing
|
|
:raises: AcceleratorRequestOpFailed
|
|
"""
|
|
LOG.info('Binding ARQs.')
|
|
# Create a JSON patch in RFC 6902 format
|
|
patch_list = {}
|
|
for arq_uuid, binding in bindings.items():
|
|
patch = [{"path": "/" + field,
|
|
"op": "add",
|
|
"value": value
|
|
} for field, value in binding.items()]
|
|
patch_list[arq_uuid] = patch
|
|
|
|
resp, err_msg = self._call_cyborg(self._client.patch,
|
|
self.ARQ_URL, json=patch_list)
|
|
if err_msg:
|
|
arq_uuids = bindings.keys()
|
|
msg = _(' Binding failed for ARQ UUIDs: ')
|
|
err_msg = err_msg + msg + ','.join(arq_uuids)
|
|
raise exception.AcceleratorRequestBindingFailed(
|
|
arqs=arq_uuids, msg=err_msg)
|
|
|
|
def get_arqs_for_instance(self, instance_uuid, only_resolved=False):
|
|
"""Get ARQs for the instance.
|
|
|
|
:param instance_uuid: Instance UUID
|
|
:param only_resolved: flag to return only resolved ARQs
|
|
:returns: List of ARQs for the instance:
|
|
if only_resolved: only those ARQs which have completed binding
|
|
else: all ARQs
|
|
The format of the returned data structure is as below:
|
|
[
|
|
{'uuid': $arq_uuid,
|
|
'device_profile_name': $dp_name,
|
|
'device_profile_group_id': $dp_request_group_index,
|
|
'state': 'Bound',
|
|
'device_rp_uuid': $resource_provider_uuid,
|
|
'hostname': $host_nodename,
|
|
'instance_uuid': $instance_uuid,
|
|
'attach_handle_info': { # PCI bdf
|
|
'bus': '0c', 'device': '0',
|
|
'domain': '0000', 'function': '0'},
|
|
'attach_handle_type': 'PCI'
|
|
# or 'TEST_PCI' for Cyborg fake driver
|
|
}
|
|
]
|
|
:raises: AcceleratorRequestOpFailed
|
|
"""
|
|
query = {"instance": instance_uuid}
|
|
resp, err_msg = self._call_cyborg(self._client.get,
|
|
self.ARQ_URL, params=query)
|
|
|
|
if err_msg:
|
|
err_msg = err_msg + _(' Instance %s') % instance_uuid
|
|
raise exception.AcceleratorRequestOpFailed(
|
|
op=_('get'), msg=err_msg)
|
|
|
|
arqs = resp.json().get('arqs')
|
|
if not arqs:
|
|
err_msg = _('Cyborg returned no accelerator requests for '
|
|
'instance %s') % instance_uuid
|
|
raise exception.AcceleratorRequestOpFailed(
|
|
op=_('get'), msg=err_msg)
|
|
|
|
if only_resolved:
|
|
arqs = [arq for arq in arqs if
|
|
arq['state'] in ['Bound', 'BindFailed', 'Deleting']]
|
|
return arqs
|
|
|
|
def delete_arqs_for_instance(self, instance_uuid):
|
|
"""Delete ARQs for instance, after unbinding if needed.
|
|
|
|
:param instance_uuid: Instance UUID
|
|
:raises: AcceleratorRequestOpFailed
|
|
"""
|
|
# Unbind and delete the ARQs
|
|
params = {"instance": instance_uuid}
|
|
resp, err_msg = self._call_cyborg(self._client.delete,
|
|
self.ARQ_URL, params=params)
|
|
if err_msg:
|
|
msg = err_msg + _(' Instance %s') % instance_uuid
|
|
raise exception.AcceleratorRequestOpFailed(
|
|
op=_('delete'), msg=msg)
|
|
|
|
def delete_arqs_by_uuid(self, arq_uuids):
|
|
"""Delete the specified ARQs, unbinding them if needed.
|
|
|
|
This is meant to be used to clean up ARQs that have failed to bind
|
|
to an instance. So delete_arqs_for_instance() is not applicable.
|
|
|
|
This Cyborg API call is NOT idempotent, i.e., if called more than
|
|
once, the 2nd and later calls will throw errors.
|
|
|
|
If this fails, an error is logged but no exception is raised
|
|
because this cleans up Cyborg resources, but should otherwise
|
|
not affect instance spawn.
|
|
|
|
:params arq_uuids: dict_keys() of ARQ UUIDs
|
|
"""
|
|
arq_uuid_str = ','.join(arq_uuids)
|
|
params = {'arqs': arq_uuid_str}
|
|
resp, err_msg = self._call_cyborg(self._client.delete,
|
|
self.ARQ_URL, params=params)
|
|
if err_msg:
|
|
# No point raising an exception.
|
|
LOG.error('Failed to delete ARQs %s', arq_uuid_str)
|
|
|
|
def get_arq_uuids_for_instance(self, instance):
|
|
"""Get ARQ UUIDs for the instance.
|
|
|
|
:param instance: Instance Object
|
|
:return: ARQ UUIDs.
|
|
"""
|
|
return [arq['uuid']
|
|
for arq in self.get_arqs_for_instance(instance.uuid)]
|