diff --git a/cyborg/accelerator/common/utils.py b/cyborg/accelerator/common/utils.py index 06abaf08..26cf00f7 100644 --- a/cyborg/accelerator/common/utils.py +++ b/cyborg/accelerator/common/utils.py @@ -14,12 +14,19 @@ import collections import os +import re from oslo_serialization import jsonutils from cyborg.common import exception +_PCI_ADDRESS_PATTERN = ("^(hex{4}):(hex{2}):(hex{2}).(oct{1})$". + replace("hex", r"[\da-fA-F]"). + replace("oct", "[0-7]")) +_PCI_ADDRESS_REGEX = re.compile(_PCI_ADDRESS_PATTERN) + + def pci_str_to_json(pci_address, physnet=None): dbs, func = pci_address.split('.') domain, bus, slot = dbs.split(':') @@ -106,3 +113,24 @@ def get_vendor_maps(): "1099": "samsung", "1cf2": "zte" } + + +def mdev_str_to_json(pci_address, asked_type, vgpu_mark): + dbs, func = pci_address.split('.') + domain, bus, slot = dbs.split(':') + keys = ["domain", "bus", "device", "function", "asked_type", "vgpu_mark"] + values = [domain, bus, slot, func, asked_type, vgpu_mark] + bdf_dict = dict(zip(keys, values)) + ordered_dict = collections.OrderedDict(sorted(bdf_dict.items())) + bdf_json = jsonutils.dumps(ordered_dict) + return bdf_json + + +def parse_address(address): + """Returns (domain, bus, slot, function) from PCI address that is set + in configuration + """ + m = _PCI_ADDRESS_REGEX.match(address) + if not m: + raise exception.PciDeviceWrongAddressFormat(address=address) + return m.groups() diff --git a/cyborg/accelerator/drivers/gpu/nvidia/driver.py b/cyborg/accelerator/drivers/gpu/nvidia/driver.py index 51ba1ff0..5ea46dfc 100644 --- a/cyborg/accelerator/drivers/gpu/nvidia/driver.py +++ b/cyborg/accelerator/drivers/gpu/nvidia/driver.py @@ -1,3 +1,4 @@ +# Modifications Copyright (C) 2020 ZTE Corporation # Copyright 2018 Beijing Lenovo Software Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -26,6 +27,7 @@ class NVIDIAGPUDriver(GPUDriver): Vendor should implement their specific drivers in this class. """ VENDOR = "nvidia" + VENDOR_ID = "10de" def discover(self): - return sysinfo.gpu_tree() + return sysinfo.discover(self.VENDOR_ID) diff --git a/cyborg/accelerator/drivers/gpu/nvidia/sysinfo.py b/cyborg/accelerator/drivers/gpu/nvidia/sysinfo.py index 2def05ae..4849eea7 100644 --- a/cyborg/accelerator/drivers/gpu/nvidia/sysinfo.py +++ b/cyborg/accelerator/drivers/gpu/nvidia/sysinfo.py @@ -1,3 +1,4 @@ +# Modifications Copyright (C) 2021 ZTE Corporation # Copyright 2018 Beijing Lenovo Software Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -16,11 +17,271 @@ """ Cyborg NVIDIA GPU driver implementation. """ +from oslo_log import log as logging +from oslo_serialization import jsonutils -from cyborg.accelerator.drivers.gpu import utils -VENDOR_ID = "10de" +import collections +import os + +import cyborg.conf + +from cyborg.accelerator.common import utils +from cyborg.accelerator.drivers.gpu import utils as gpu_utils +from cyborg.common import constants +from cyborg.common import exception +from cyborg.conf import CONF +from cyborg.objects.driver_objects import driver_attach_handle +from cyborg.objects.driver_objects import driver_attribute +from cyborg.objects.driver_objects import driver_controlpath_id +from cyborg.objects.driver_objects import driver_deployable +from cyborg.objects.driver_objects import driver_device + +LOG = logging.getLogger(__name__) -def gpu_tree(): - devs = utils.discover_gpus(VENDOR_ID) +def _get_traits(vendor_id, product_id, vgpu_type_name=None): + """Generate traits for GPUs. + : param vendor_id: vendor_id of PGPU/VGPU, eg."10de" + : param product_id: product_id of PGPU/VGPU, eg."1eb8". + : param vgpu_type_name: vgpu type name, eg."T4_1B". + Example VGPU traits: + {traits:["OWNER_CYBORG", "CUSTOM_NVIDIA_1EB8_T4_2B"]} + Example PGPU traits: + {traits:["OWNER_CYBORG", "CUSTOM_NVIDIA_1EB8"]} + """ + traits = ["OWNER_CYBORG"] + # PGPU trait + gpu_trait = "_".join( + ('CUSTOM', gpu_utils.VENDOR_MAPS.get(vendor_id, "").upper(), + product_id.upper())) + # VGPU trait + if vgpu_type_name: + gpu_trait = "_".join((gpu_trait, vgpu_type_name.upper())) + traits.append(gpu_trait) + return {"traits": traits} + + +def _generate_attribute_list(gpu): + attr_list = [] + index = 0 + for k, v in gpu.items(): + if k == "rc": + driver_attr = driver_attribute.DriverAttribute() + driver_attr.key, driver_attr.value = k, v + attr_list.append(driver_attr) + if k == "traits": + values = gpu.get(k, []) + for val in values: + driver_attr = driver_attribute.DriverAttribute( + key="trait" + str(index), value=val) + index = index + 1 + attr_list.append(driver_attr) + return attr_list + + +def _generate_attach_handle(gpu, num=None): + driver_ah = driver_attach_handle.DriverAttachHandle() + driver_ah.in_use = False + if gpu["rc"] == "PGPU": + driver_ah.attach_type = constants.AH_TYPE_PCI + driver_ah.attach_info = utils.pci_str_to_json(gpu["devices"]) + else: + vgpu_mark = gpu["vGPU_type"] + '_' + str(num) + driver_ah.attach_type = constants.AH_TYPE_MDEV + driver_ah.attach_info = utils.mdev_str_to_json( + gpu["devices"], gpu["vGPU_type"], vgpu_mark) + return driver_ah + + +def _generate_dep_list(gpu): + dep_list = [] + driver_dep = driver_deployable.DriverDeployable() + driver_dep.attribute_list = _generate_attribute_list(gpu) + driver_dep.attach_handle_list = [] + # NOTE(wangzhh): The name of deployable should be unique, its format is + # under disscussion, may looks like + # ___ + # NOTE(yumeng) Since Wallaby release, the deplpyable_name is named as + # _ + driver_dep.name = gpu.get('hostname', '') + '_' + gpu["devices"] + driver_dep.driver_name = \ + gpu_utils.VENDOR_MAPS.get(gpu["vendor_id"], '').upper() + # if is pGPU, num_accelerators = 1 + if gpu["rc"] == "PGPU": + driver_dep.num_accelerators = 1 + driver_dep.attach_handle_list = \ + [_generate_attach_handle(gpu)] + else: + # if is vGPU, num_accelerators is the total vGPU capability of + # the asked vGPU type + vGPU_path = os.path.expandvars( + '/sys/bus/pci/devices/{0}/mdev_supported_types/{1}/' + .format(gpu["devices"], gpu["vGPU_type"])) + num_available = 0 + with open(vGPU_path + 'available_instances', 'r') as f: + num_available = int(f.read().strip()) + num_created = len(os.listdir(vGPU_path + 'devices')) + driver_dep.num_accelerators = num_available + num_created + # example: 1 pGPU has 16 vGPUs is represented as + # 16 attach_handles, 1 deployable, 1 resource_provider + # NOTE(yumeng): cyborg use attach_handle_uuid + # to create each vGPU without the need to generate a new uuid + # example: echo "attach_handle_uuid" > nvidia-223/create + for num in range(driver_dep.num_accelerators): + driver_dep.attach_handle_list.append( + _generate_attach_handle(gpu, num)) + dep_list.append(driver_dep) + return dep_list + + +def _generate_controlpath_id(gpu): + driver_cpid = driver_controlpath_id.DriverControlPathID() + driver_cpid.cpid_type = "PCI" + driver_cpid.cpid_info = utils.pci_str_to_json(gpu["devices"]) + return driver_cpid + + +def _generate_driver_device(gpu): + driver_device_obj = driver_device.DriverDevice() + driver_device_obj.vendor = gpu['vendor_id'] + driver_device_obj.model = gpu.get('model', 'miss model info') + std_board_info = {'product_id': gpu.get('product_id'), + 'controller': gpu.get('controller'), } + vendor_board_info = {'vendor_info': gpu.get('vendor_info', + 'gpu_vb_info')} + driver_device_obj.std_board_info = jsonutils.dumps(std_board_info) + driver_device_obj.vendor_board_info = jsonutils.dumps( + vendor_board_info) + driver_device_obj.type = constants.DEVICE_GPU + driver_device_obj.stub = gpu.get('stub', False) + driver_device_obj.controlpath_id = _generate_controlpath_id(gpu) + driver_device_obj.deployable_list = _generate_dep_list(gpu) + return driver_device_obj + + +def _get_supported_vgpu_types(): + """Gets supported vgpu_types from cyborg.conf. + + Retrieves supported vgpu_types set by the operator and generates a + record of vgpu_type and pgpu in the dict constant: pgpu_type_mapping. + + Returns: + A list of all vgpu_types set in CONF.gpu_devices.enabled_vgpu_types. + + Raises: + InvalidGPUConfig: An error occurred if same PCI appear twice + or PCI address is not valid. + """ + pgpu_type_mapping = collections.defaultdict(str) + pgpu_type_mapping.clear() + if not CONF.gpu_devices.enabled_vgpu_types: + return [], pgpu_type_mapping + + for vgpu_type in CONF.gpu_devices.enabled_vgpu_types: + group = getattr(CONF, 'vgpu_%s' % vgpu_type, None) + if group is None or not group.device_addresses: + # Device addresses must be configured explictly now for every + # enabled vgpu type. Will improve after the disable and enable + # devices interfaces implemented. + raise exception.InvalidvGPUConfig( + reason="Missing device addresses config for vgpu type %s" + % vgpu_type + ) + for device_address in group.device_addresses: + if device_address in pgpu_type_mapping: + raise exception.InvalidvGPUConfig( + reason="Duplicate types for PCI address %s" + % device_address + ) + # Just checking whether the operator fat-fingered the address. + # If it's wrong, it will return an exception + try: + # Validates whether it's a PCI ID... + utils.parse_address(device_address) + except exception.PciDeviceWrongAddressFormat: + raise exception.InvalidvGPUConfig( + reason="Incorrect PCI address: %s" % device_address + ) + pgpu_type_mapping[device_address] = vgpu_type + return CONF.gpu_devices.enabled_vgpu_types, pgpu_type_mapping + + +def _get_vgpu_type_per_pgpu(device_address, supported_vgpu_types, + pgpu_type_mapping): + """Provides the vGPU type the pGPU supports. + + :param device_address: the PCI device address in config, + eg.'0000:af:00.0' + """ + supported_vgpu_types, pgpu_type_mapping = _get_supported_vgpu_types() + # Bail out quickly if we don't support vGPUs + if not supported_vgpu_types: + LOG.error('Unable to load vGPU_type from [gpu_devices] ' + 'Ensure "enabled_vgpu_types" is set.') + return + + try: + # Validates whether it's a PCI ID... + utils.parse_address(device_address) + except (exception.PciDeviceWrongAddressFormat, IndexError): + # this is not a valid PCI address + LOG.warning("The PCI address %s was invalid for getting the" + "related vGPU type", device_address) + return + return pgpu_type_mapping.get(device_address) + + +def _discover_gpus(vendor_id): + """param: vendor_id=VENDOR_ID means only discover Nvidia GPU on the host + """ + # init vGPU conf + cyborg.conf.devices.register_dynamic_opts(CONF) + supported_vgpu_types, pgpu_type_mapping = _get_supported_vgpu_types() + # discover gpu devices by "lspci" + gpu_list = [] + gpus = gpu_utils.get_pci_devices(gpu_utils.GPU_FLAGS, vendor_id) + # report trait,rc and generate driver object + for gpu in gpus: + m = gpu_utils.GPU_INFO_PATTERN.match(gpu) + if m: + gpu_dict = m.groupdict() + # get hostname for deployable_name usage + gpu_dict['hostname'] = CONF.host + # get vgpu_type from cyborg.conf, otherwise vgpu_type=None + vgpu_type = _get_vgpu_type_per_pgpu( + gpu_dict["devices"], supported_vgpu_types, pgpu_type_mapping) + # generate rc and trait for pGPU + if not vgpu_type: + gpu_dict["rc"] = constants.RESOURCES["PGPU"] + traits = _get_traits(gpu_dict["vendor_id"], + gpu_dict["product_id"]) + # generate rc and trait for vGPU + else: + # get rc + gpu_dict["rc"] = constants.RESOURCES["VGPU"] + mdev_path = os.path.expandvars( + '/sys/bus/pci/devices/{0}/mdev_supported_types'. + format(gpu_dict["devices"])) + valid_types = os.listdir(mdev_path) + if vgpu_type not in valid_types: + raise exception.InvalidVGPUType(name=vgpu_type) + gpu_dict["vGPU_type"] = vgpu_type + vGPU_path = os.path.expandvars( + '/sys/bus/pci/devices/{0}/mdev_supported_types/{1}/' + .format(gpu_dict["devices"], gpu_dict["vGPU_type"])) + # transfer vgpu_type to vgpu_type_name. + # eg. transfer 'nvidia-223' to 'T4_1B' + with open(vGPU_path + 'name', 'r') as f: + name = f.read().strip() + vgpu_type_name = name.split(' ')[1].replace('-', '_') + traits = _get_traits(gpu_dict["vendor_id"], + gpu_dict["product_id"], + vgpu_type_name) + gpu_dict.update(traits) + gpu_list.append(_generate_driver_device(gpu_dict)) + return gpu_list + + +def discover(vendor_id): + devs = _discover_gpus(vendor_id) return devs diff --git a/cyborg/accelerator/drivers/gpu/utils.py b/cyborg/accelerator/drivers/gpu/utils.py index ca631299..a21615c0 100644 --- a/cyborg/accelerator/drivers/gpu/utils.py +++ b/cyborg/accelerator/drivers/gpu/utils.py @@ -1,5 +1,4 @@ -# Copyright 2018 Beijing Lenovo Software Ltd. -# +# Modifications Copyright (C) 2021 ZTE Corporation # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at @@ -12,24 +11,15 @@ # License for the specific language governing permissions and limitations # under the License. - """ Utils for GPU driver. """ from oslo_concurrency import processutils from oslo_log import log as logging -from oslo_serialization import jsonutils import re -from cyborg.accelerator.common import utils -from cyborg.common import constants -from cyborg.conf import CONF -from cyborg.objects.driver_objects import driver_attach_handle -from cyborg.objects.driver_objects import driver_attribute -from cyborg.objects.driver_objects import driver_controlpath_id -from cyborg.objects.driver_objects import driver_deployable -from cyborg.objects.driver_objects import driver_device +import cyborg.conf import cyborg.privsep LOG = logging.getLogger(__name__) @@ -42,6 +32,7 @@ GPU_INFO_PATTERN = re.compile(r"(?P[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:" r"{4}):(?P[0-9a-fA-F]{4})].*") VENDOR_MAPS = {"10de": "nvidia", "102b": "matrox"} +PRODUCT_ID_MAPS = {"1eb8": "T4", "15f7": "P100_PCIE_12GB"} @cyborg.privsep.sys_admin_pctxt.entrypoint @@ -62,19 +53,6 @@ def get_pci_devices(pci_flags, vendor_id=None): return device_for_vendor_out if vendor_id else all_device_out -def get_traits(vendor_id, product_id): - """Generate traits for GPUs. - : param vendor_id: vendor_id of PGPU/VGPU, eg."10de" - : param product_id: product_id of PGPU/VGPU, eg."1eb8". - Example VGPU traits: - {traits:["CUSTOM_GPU_NVIDIA", "CUSTOM_GPU_PRODUCT_ID_1EB8"]} - """ - traits = [] - traits.append("CUSTOM_GPU_" + VENDOR_MAPS.get(vendor_id, "").upper()) - traits.append("CUSTOM_GPU_PRODUCT_ID_" + product_id.upper()) - return {"traits": traits} - - def discover_vendors(): vendors = set() gpus = get_pci_devices(GPU_FLAGS) @@ -84,93 +62,3 @@ def discover_vendors(): vendor_id = m.groupdict().get("vendor_id") vendors.add(vendor_id) return vendors - - -def discover_gpus(vendor_id=None): - gpu_list = [] - gpus = get_pci_devices(GPU_FLAGS, vendor_id) - for gpu in gpus: - m = GPU_INFO_PATTERN.match(gpu) - if m: - gpu_dict = m.groupdict() - # generate hostname for deployable_name usage - gpu_dict['hostname'] = CONF.host - # generate traits info - # TODO(yumeng) support and test VGPU rc generation soon. - traits = get_traits(gpu_dict["vendor_id"], gpu_dict["product_id"]) - gpu_dict["rc"] = constants.RESOURCES["PGPU"] - gpu_dict.update(traits) - gpu_list.append(_generate_driver_device(gpu_dict)) - return gpu_list - - -def _generate_driver_device(gpu): - driver_device_obj = driver_device.DriverDevice() - driver_device_obj.vendor = gpu["vendor_id"] - driver_device_obj.model = gpu.get('model', 'miss model info') - std_board_info = {'product_id': gpu.get('product_id'), - 'controller': gpu.get('controller')} - vendor_board_info = {'vendor_info': gpu.get('vendor_info', 'gpu_vb_info')} - driver_device_obj.std_board_info = jsonutils.dumps(std_board_info) - driver_device_obj.vendor_board_info = jsonutils.dumps(vendor_board_info) - driver_device_obj.type = constants.DEVICE_GPU - driver_device_obj.stub = gpu.get('stub', False) - driver_device_obj.controlpath_id = _generate_controlpath_id(gpu) - driver_device_obj.deployable_list = _generate_dep_list(gpu) - return driver_device_obj - - -def _generate_controlpath_id(gpu): - driver_cpid = driver_controlpath_id.DriverControlPathID() - # NOTE: GPUs (either pGPU or vGPU), they all report "PCI" as - # their cpid_type, while attach_handle_type of them are different. - driver_cpid.cpid_type = "PCI" - driver_cpid.cpid_info = utils.pci_str_to_json(gpu["devices"]) - return driver_cpid - - -def _generate_dep_list(gpu): - dep_list = [] - driver_dep = driver_deployable.DriverDeployable() - driver_dep.attribute_list = _generate_attribute_list(gpu) - driver_dep.attach_handle_list = [] - # NOTE(yumeng) Now simply named as _ - # once cyborg needs to support GPU devices discovered from a baremetal - # node, we might need to support more formats. - driver_dep.name = gpu.get('hostname', '') + '_' + gpu["devices"] - driver_dep.driver_name = VENDOR_MAPS.get(gpu["vendor_id"], '').upper() - # driver_dep.num_accelerators for PGPU is 1, for VGPU should be the - # available_instances of the vGPU device. - # TODO(yumeng) support VGPU num report soon - driver_dep.num_accelerators = 1 - driver_dep.attach_handle_list = \ - [_generate_attach_handle(gpu)] - dep_list.append(driver_dep) - return dep_list - - -def _generate_attach_handle(gpu): - driver_ah = driver_attach_handle.DriverAttachHandle() - if gpu["rc"] == "PGPU": - driver_ah.attach_type = constants.AH_TYPE_PCI - else: - driver_ah.attach_type = constants.AH_TYPE_MDEV - driver_ah.in_use = False - driver_ah.attach_info = utils.pci_str_to_json(gpu["devices"]) - return driver_ah - - -def _generate_attribute_list(gpu): - attr_list = [] - for k, v in gpu.items(): - if k == "rc": - driver_attr = driver_attribute.DriverAttribute() - driver_attr.key, driver_attr.value = k, v - attr_list.append(driver_attr) - if k == "traits": - values = gpu.get(k, []) - for index, val in enumerate(values): - driver_attr = driver_attribute.DriverAttribute( - key="trait" + str(index), value=val) - attr_list.append(driver_attr) - return attr_list diff --git a/cyborg/common/exception.py b/cyborg/common/exception.py index 6acaafeb..c5f0edf5 100644 --- a/cyborg/common/exception.py +++ b/cyborg/common/exception.py @@ -389,6 +389,18 @@ class InvalidDriver(Invalid): _msg_fmt = _("Found an invalid driver: %(name)s") +class InvalidVGPUType(Invalid): + _msg_fmt = _("Invalid requested vGPU type: %(name)s") + + +class InvalidvGPUConfig(Invalid): + _msg_fmt = _("Invalid vGPU config: %(reason)s") + + +class PciDeviceWrongAddressFormat(Invalid): + _msg_fmt = _("The PCI address %(address)s has an incorrect format.") + + class InvalidType(Invalid): _msg_fmt = _("Invalid type for %(obj)s: %(type)s." "Expected: %(expected)s") diff --git a/cyborg/tests/unit/accelerator/drivers/gpu/test_utils.py b/cyborg/tests/unit/accelerator/drivers/gpu/test_utils.py index 38dd5347..0d6fb0cb 100644 --- a/cyborg/tests/unit/accelerator/drivers/gpu/test_utils.py +++ b/cyborg/tests/unit/accelerator/drivers/gpu/test_utils.py @@ -12,21 +12,42 @@ # License for the specific language governing permissions and limitations # under the License. +import sys from unittest import mock from oslo_serialization import jsonutils +import cyborg +from cyborg.accelerator.drivers.gpu.nvidia.driver import NVIDIAGPUDriver from cyborg.accelerator.drivers.gpu import utils from cyborg.tests import base + +CONF = cyborg.conf.CONF + NVIDIA_GPU_INFO = "0000:00:06.0 3D controller [0302]: NVIDIA Corporation " \ "GP100GL [Tesla P100 PCIe 12GB] [10de:15f7] (rev a1)" +NVIDIA_T4_GPU_INFO = "0000:af:00.0 3D controller [0302]: NVIDIA Corporation "\ + "TU104GL [Tesla T4] [10de:1eb8] (rev a1)" + +NVIDIA_T4_SUPPORTED_MDEV_TYPES = ['nvidia-222', 'nvidia-223', 'nvidia-224', + 'nvidia-225', 'nvidia-226', 'nvidia-227', + 'nvidia-228', 'nvidia-229', 'nvidia-230', + 'nvidia-231', 'nvidia-232', 'nvidia-233', + 'nvidia-234', 'nvidia-252', 'nvidia-319', + 'nvidia-320', 'nvidia-321'] + +BUILTIN = '__builtin__' if (sys.version_info[0] < 3) else '__builtins__' + class stdout(object): def readlines(self): return [NVIDIA_GPU_INFO] + def readlines_T4(self): + return [NVIDIA_T4_GPU_INFO] + class p(object): def __init__(self): @@ -49,11 +70,14 @@ class TestGPUDriverUtils(base.TestCase): self.assertEqual(1, len(gpu_vendors)) @mock.patch('cyborg.accelerator.drivers.gpu.utils.lspci_privileged') - def test_discover_gpus(self, mock_devices_for_vendor): + def test_discover_gpus_report_pGPU(self, mock_devices_for_vendor): + """test nvidia pGPU discover""" mock_devices_for_vendor.return_value = self.p.stdout.readlines() self.set_defaults(host='host-192-168-32-195', debug=True) - vendor_id = '10de' - gpu_list = utils.discover_gpus(vendor_id) + + nvidia = NVIDIAGPUDriver() + gpu_list = nvidia.discover() + self.assertEqual(1, len(gpu_list)) attach_handle_list = [ {'attach_type': 'PCI', @@ -65,8 +89,8 @@ class TestGPUDriverUtils(base.TestCase): ] attribute_list = [ {'key': 'rc', 'value': 'PGPU'}, - {'key': 'trait0', 'value': 'CUSTOM_GPU_NVIDIA'}, - {'key': 'trait1', 'value': 'CUSTOM_GPU_PRODUCT_ID_15F7'}, + {'key': 'trait0', 'value': 'OWNER_CYBORG'}, + {'key': 'trait1', 'value': 'CUSTOM_NVIDIA_15F7'}, ] expected = { 'vendor': '10de', @@ -118,3 +142,107 @@ class TestGPUDriverUtils(base.TestCase): self.assertEqual(attach_handle_list[0], gpu_attach_handle_list[0].as_dict()) self.assertEqual(attribute_list, attribute_actual_data) + + @mock.patch('builtins.open') + @mock.patch('os.listdir') + @mock.patch('os.path.exists') + @mock.patch('cyborg.accelerator.drivers.gpu.utils.lspci_privileged') + def test_discover_gpus_report_vGPU(self, mock_devices_for_vendor, + mock_path_exists, + mock_supported_mdev_types, + mock_open): + """test nvidia vGPU discover""" + mock_devices_for_vendor.return_value = self.p.stdout.readlines_T4() + mock_path_exists.return_value = True + mock_supported_mdev_types.return_value = NVIDIA_T4_SUPPORTED_MDEV_TYPES + file_content_list = ['GRID T4-1B', '1'] + mock_open.side_effect = multi_mock_open(*file_content_list) + self.set_defaults(host='host-192-168-32-195', debug=True) + self.set_defaults(enabled_vgpu_types='nvidia-223', group='gpu_devices') + cyborg.conf.devices.register_dynamic_opts(CONF) + self.set_defaults( + device_addresses=['0000:af:00.0'], group='vgpu_nvidia-223') + nvidia = NVIDIAGPUDriver() + gpu_list = nvidia.discover() + + self.assertEqual(1, len(gpu_list)) + attach_handle_list = [ + {'attach_type': 'MDEV', + 'attach_info': '{"asked_type": "nvidia-223", ' + '"bus": "af", ' + '"device": "00", ' + '"domain": "0000", ' + '"function": "0", ' + '"vgpu_mark": "nvidia-223_0"}', + 'in_use': False} + ] * 8 + attribute_list = [ + {'key': 'rc', 'value': 'VGPU'}, + {'key': 'trait0', 'value': 'OWNER_CYBORG'}, + {'key': 'trait1', 'value': 'CUSTOM_NVIDIA_1EB8_T4_1B'}, + ] + expected = { + 'vendor': '10de', + 'type': 'GPU', + 'std_board_info': + {"controller": "3D controller", "product_id": "1eb8"}, + 'vendor_board_info': {"vendor_info": "gpu_vb_info"}, + 'deployable_list': + [ + { + 'num_accelerators': 18, + 'driver_name': 'NVIDIA', + 'name': 'host-192-168-32-195_0000:af:00.0', + 'attach_handle_list': attach_handle_list, + 'attribute_list': attribute_list + }, + ], + 'controlpath_id': {'cpid_info': '{"bus": "af", ' + '"device": "00", ' + '"domain": "0000", ' + '"function": "0"}', + 'cpid_type': 'PCI'} + } + gpu_obj = gpu_list[0] + gpu_dict = gpu_obj.as_dict() + gpu_dep_list = gpu_dict['deployable_list'] + gpu_attach_handle_list = gpu_dep_list[0].as_dict()[ + 'attach_handle_list'] + gpu_attribute_list = gpu_dep_list[0].as_dict()['attribute_list'] + attri_obj_data = [] + [attri_obj_data.append(attr.as_dict()) for attr in gpu_attribute_list] + attribute_actual_data = sorted(attri_obj_data, key=lambda i: i['key']) + self.assertEqual(expected['vendor'], gpu_dict['vendor']) + self.assertEqual(expected['controlpath_id'], + gpu_dict['controlpath_id']) + self.assertEqual(expected['std_board_info'], + jsonutils.loads(gpu_dict['std_board_info'])) + self.assertEqual(expected['vendor_board_info'], + jsonutils.loads(gpu_dict['vendor_board_info'])) + self.assertEqual(expected['deployable_list'][0]['num_accelerators'], + gpu_dep_list[0].as_dict()['num_accelerators']) + self.assertEqual(expected['deployable_list'][0]['name'], + gpu_dep_list[0].as_dict()['name']) + self.assertEqual(expected['deployable_list'][0]['driver_name'], + gpu_dep_list[0].as_dict()['driver_name']) + self.assertEqual(attach_handle_list[0], + gpu_attach_handle_list[0].as_dict()) + self.assertEqual(attribute_list, attribute_actual_data) + + +def multi_mock_open(*file_contents): + """Create a mock "open" that will mock open multiple files in sequence. + + : params file_contents: a list of file contents to be returned by open + + : returns: (MagicMock) a mock opener that will return the contents of the + first file when opened the first time, the second file when + opened the second time, etc. + """ + + mock_files = [ + mock.mock_open(read_data=content).return_value for content in + file_contents] + mock_opener = mock.mock_open() + mock_opener.side_effect = mock_files + return mock_opener