Cyborg NVIDIA GPU Driver support vGPU management

The Cyborg NVIDIA GPU Driver has implemented pGPU management in
the Train release, this patch implements the vGPU management
support in the same driver. For specs about this feature, please
refer to [0].

[0]:https://specs.openstack.org/openstack/cyborg-specs/specs/wallaby/approved/vgpu-driver-proposal.html

Co-Authored-By: Wenping Song <songwenping@inspur.com>

implement blueprint enable-vgpu-in-cyborg

Change-Id: I715fdad3e8601186b5c6e8c087f27fb91d679490
This commit is contained in:
Yumeng Bao 2021-01-06 17:50:04 +08:00 committed by songwenping
parent eafcc2fc64
commit 79e1928554
6 changed files with 444 additions and 125 deletions

View File

@ -14,12 +14,19 @@
import collections import collections
import os import os
import re
from oslo_serialization import jsonutils from oslo_serialization import jsonutils
from cyborg.common import exception from cyborg.common import exception
_PCI_ADDRESS_PATTERN = ("^(hex{4}):(hex{2}):(hex{2}).(oct{1})$".
replace("hex", r"[\da-fA-F]").
replace("oct", "[0-7]"))
_PCI_ADDRESS_REGEX = re.compile(_PCI_ADDRESS_PATTERN)
def pci_str_to_json(pci_address, physnet=None): def pci_str_to_json(pci_address, physnet=None):
dbs, func = pci_address.split('.') dbs, func = pci_address.split('.')
domain, bus, slot = dbs.split(':') domain, bus, slot = dbs.split(':')
@ -106,3 +113,24 @@ def get_vendor_maps():
"1099": "samsung", "1099": "samsung",
"1cf2": "zte" "1cf2": "zte"
} }
def mdev_str_to_json(pci_address, asked_type, vgpu_mark):
dbs, func = pci_address.split('.')
domain, bus, slot = dbs.split(':')
keys = ["domain", "bus", "device", "function", "asked_type", "vgpu_mark"]
values = [domain, bus, slot, func, asked_type, vgpu_mark]
bdf_dict = dict(zip(keys, values))
ordered_dict = collections.OrderedDict(sorted(bdf_dict.items()))
bdf_json = jsonutils.dumps(ordered_dict)
return bdf_json
def parse_address(address):
"""Returns (domain, bus, slot, function) from PCI address that is set
in configuration
"""
m = _PCI_ADDRESS_REGEX.match(address)
if not m:
raise exception.PciDeviceWrongAddressFormat(address=address)
return m.groups()

View File

@ -1,3 +1,4 @@
# Modifications Copyright (C) 2020 ZTE Corporation
# Copyright 2018 Beijing Lenovo Software Ltd. # Copyright 2018 Beijing Lenovo Software Ltd.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may # Licensed under the Apache License, Version 2.0 (the "License"); you may
@ -26,6 +27,7 @@ class NVIDIAGPUDriver(GPUDriver):
Vendor should implement their specific drivers in this class. Vendor should implement their specific drivers in this class.
""" """
VENDOR = "nvidia" VENDOR = "nvidia"
VENDOR_ID = "10de"
def discover(self): def discover(self):
return sysinfo.gpu_tree() return sysinfo.discover(self.VENDOR_ID)

View File

@ -1,3 +1,4 @@
# Modifications Copyright (C) 2021 ZTE Corporation
# Copyright 2018 Beijing Lenovo Software Ltd. # Copyright 2018 Beijing Lenovo Software Ltd.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may # Licensed under the Apache License, Version 2.0 (the "License"); you may
@ -16,11 +17,271 @@
""" """
Cyborg NVIDIA GPU driver implementation. Cyborg NVIDIA GPU driver implementation.
""" """
from oslo_log import log as logging
from oslo_serialization import jsonutils
from cyborg.accelerator.drivers.gpu import utils import collections
VENDOR_ID = "10de" import os
import cyborg.conf
from cyborg.accelerator.common import utils
from cyborg.accelerator.drivers.gpu import utils as gpu_utils
from cyborg.common import constants
from cyborg.common import exception
from cyborg.conf import CONF
from cyborg.objects.driver_objects import driver_attach_handle
from cyborg.objects.driver_objects import driver_attribute
from cyborg.objects.driver_objects import driver_controlpath_id
from cyborg.objects.driver_objects import driver_deployable
from cyborg.objects.driver_objects import driver_device
LOG = logging.getLogger(__name__)
def gpu_tree(): def _get_traits(vendor_id, product_id, vgpu_type_name=None):
devs = utils.discover_gpus(VENDOR_ID) """Generate traits for GPUs.
: param vendor_id: vendor_id of PGPU/VGPU, eg."10de"
: param product_id: product_id of PGPU/VGPU, eg."1eb8".
: param vgpu_type_name: vgpu type name, eg."T4_1B".
Example VGPU traits:
{traits:["OWNER_CYBORG", "CUSTOM_NVIDIA_1EB8_T4_2B"]}
Example PGPU traits:
{traits:["OWNER_CYBORG", "CUSTOM_NVIDIA_1EB8"]}
"""
traits = ["OWNER_CYBORG"]
# PGPU trait
gpu_trait = "_".join(
('CUSTOM', gpu_utils.VENDOR_MAPS.get(vendor_id, "").upper(),
product_id.upper()))
# VGPU trait
if vgpu_type_name:
gpu_trait = "_".join((gpu_trait, vgpu_type_name.upper()))
traits.append(gpu_trait)
return {"traits": traits}
def _generate_attribute_list(gpu):
attr_list = []
index = 0
for k, v in gpu.items():
if k == "rc":
driver_attr = driver_attribute.DriverAttribute()
driver_attr.key, driver_attr.value = k, v
attr_list.append(driver_attr)
if k == "traits":
values = gpu.get(k, [])
for val in values:
driver_attr = driver_attribute.DriverAttribute(
key="trait" + str(index), value=val)
index = index + 1
attr_list.append(driver_attr)
return attr_list
def _generate_attach_handle(gpu, num=None):
driver_ah = driver_attach_handle.DriverAttachHandle()
driver_ah.in_use = False
if gpu["rc"] == "PGPU":
driver_ah.attach_type = constants.AH_TYPE_PCI
driver_ah.attach_info = utils.pci_str_to_json(gpu["devices"])
else:
vgpu_mark = gpu["vGPU_type"] + '_' + str(num)
driver_ah.attach_type = constants.AH_TYPE_MDEV
driver_ah.attach_info = utils.mdev_str_to_json(
gpu["devices"], gpu["vGPU_type"], vgpu_mark)
return driver_ah
def _generate_dep_list(gpu):
dep_list = []
driver_dep = driver_deployable.DriverDeployable()
driver_dep.attribute_list = _generate_attribute_list(gpu)
driver_dep.attach_handle_list = []
# NOTE(wangzhh): The name of deployable should be unique, its format is
# under disscussion, may looks like
# <ComputeNodeName>_<NumaNodeName>_<CyborgName>_<NumInHost>
# NOTE(yumeng) Since Wallaby release, the deplpyable_name is named as
# <Compute_hostname>_<Device_address>
driver_dep.name = gpu.get('hostname', '') + '_' + gpu["devices"]
driver_dep.driver_name = \
gpu_utils.VENDOR_MAPS.get(gpu["vendor_id"], '').upper()
# if is pGPU, num_accelerators = 1
if gpu["rc"] == "PGPU":
driver_dep.num_accelerators = 1
driver_dep.attach_handle_list = \
[_generate_attach_handle(gpu)]
else:
# if is vGPU, num_accelerators is the total vGPU capability of
# the asked vGPU type
vGPU_path = os.path.expandvars(
'/sys/bus/pci/devices/{0}/mdev_supported_types/{1}/'
.format(gpu["devices"], gpu["vGPU_type"]))
num_available = 0
with open(vGPU_path + 'available_instances', 'r') as f:
num_available = int(f.read().strip())
num_created = len(os.listdir(vGPU_path + 'devices'))
driver_dep.num_accelerators = num_available + num_created
# example: 1 pGPU has 16 vGPUs is represented as
# 16 attach_handles, 1 deployable, 1 resource_provider
# NOTE(yumeng): cyborg use attach_handle_uuid
# to create each vGPU without the need to generate a new uuid
# example: echo "attach_handle_uuid" > nvidia-223/create
for num in range(driver_dep.num_accelerators):
driver_dep.attach_handle_list.append(
_generate_attach_handle(gpu, num))
dep_list.append(driver_dep)
return dep_list
def _generate_controlpath_id(gpu):
driver_cpid = driver_controlpath_id.DriverControlPathID()
driver_cpid.cpid_type = "PCI"
driver_cpid.cpid_info = utils.pci_str_to_json(gpu["devices"])
return driver_cpid
def _generate_driver_device(gpu):
driver_device_obj = driver_device.DriverDevice()
driver_device_obj.vendor = gpu['vendor_id']
driver_device_obj.model = gpu.get('model', 'miss model info')
std_board_info = {'product_id': gpu.get('product_id'),
'controller': gpu.get('controller'), }
vendor_board_info = {'vendor_info': gpu.get('vendor_info',
'gpu_vb_info')}
driver_device_obj.std_board_info = jsonutils.dumps(std_board_info)
driver_device_obj.vendor_board_info = jsonutils.dumps(
vendor_board_info)
driver_device_obj.type = constants.DEVICE_GPU
driver_device_obj.stub = gpu.get('stub', False)
driver_device_obj.controlpath_id = _generate_controlpath_id(gpu)
driver_device_obj.deployable_list = _generate_dep_list(gpu)
return driver_device_obj
def _get_supported_vgpu_types():
"""Gets supported vgpu_types from cyborg.conf.
Retrieves supported vgpu_types set by the operator and generates a
record of vgpu_type and pgpu in the dict constant: pgpu_type_mapping.
Returns:
A list of all vgpu_types set in CONF.gpu_devices.enabled_vgpu_types.
Raises:
InvalidGPUConfig: An error occurred if same PCI appear twice
or PCI address is not valid.
"""
pgpu_type_mapping = collections.defaultdict(str)
pgpu_type_mapping.clear()
if not CONF.gpu_devices.enabled_vgpu_types:
return [], pgpu_type_mapping
for vgpu_type in CONF.gpu_devices.enabled_vgpu_types:
group = getattr(CONF, 'vgpu_%s' % vgpu_type, None)
if group is None or not group.device_addresses:
# Device addresses must be configured explictly now for every
# enabled vgpu type. Will improve after the disable and enable
# devices interfaces implemented.
raise exception.InvalidvGPUConfig(
reason="Missing device addresses config for vgpu type %s"
% vgpu_type
)
for device_address in group.device_addresses:
if device_address in pgpu_type_mapping:
raise exception.InvalidvGPUConfig(
reason="Duplicate types for PCI address %s"
% device_address
)
# Just checking whether the operator fat-fingered the address.
# If it's wrong, it will return an exception
try:
# Validates whether it's a PCI ID...
utils.parse_address(device_address)
except exception.PciDeviceWrongAddressFormat:
raise exception.InvalidvGPUConfig(
reason="Incorrect PCI address: %s" % device_address
)
pgpu_type_mapping[device_address] = vgpu_type
return CONF.gpu_devices.enabled_vgpu_types, pgpu_type_mapping
def _get_vgpu_type_per_pgpu(device_address, supported_vgpu_types,
pgpu_type_mapping):
"""Provides the vGPU type the pGPU supports.
:param device_address: the PCI device address in config,
eg.'0000:af:00.0'
"""
supported_vgpu_types, pgpu_type_mapping = _get_supported_vgpu_types()
# Bail out quickly if we don't support vGPUs
if not supported_vgpu_types:
LOG.error('Unable to load vGPU_type from [gpu_devices] '
'Ensure "enabled_vgpu_types" is set.')
return
try:
# Validates whether it's a PCI ID...
utils.parse_address(device_address)
except (exception.PciDeviceWrongAddressFormat, IndexError):
# this is not a valid PCI address
LOG.warning("The PCI address %s was invalid for getting the"
"related vGPU type", device_address)
return
return pgpu_type_mapping.get(device_address)
def _discover_gpus(vendor_id):
"""param: vendor_id=VENDOR_ID means only discover Nvidia GPU on the host
"""
# init vGPU conf
cyborg.conf.devices.register_dynamic_opts(CONF)
supported_vgpu_types, pgpu_type_mapping = _get_supported_vgpu_types()
# discover gpu devices by "lspci"
gpu_list = []
gpus = gpu_utils.get_pci_devices(gpu_utils.GPU_FLAGS, vendor_id)
# report trait,rc and generate driver object
for gpu in gpus:
m = gpu_utils.GPU_INFO_PATTERN.match(gpu)
if m:
gpu_dict = m.groupdict()
# get hostname for deployable_name usage
gpu_dict['hostname'] = CONF.host
# get vgpu_type from cyborg.conf, otherwise vgpu_type=None
vgpu_type = _get_vgpu_type_per_pgpu(
gpu_dict["devices"], supported_vgpu_types, pgpu_type_mapping)
# generate rc and trait for pGPU
if not vgpu_type:
gpu_dict["rc"] = constants.RESOURCES["PGPU"]
traits = _get_traits(gpu_dict["vendor_id"],
gpu_dict["product_id"])
# generate rc and trait for vGPU
else:
# get rc
gpu_dict["rc"] = constants.RESOURCES["VGPU"]
mdev_path = os.path.expandvars(
'/sys/bus/pci/devices/{0}/mdev_supported_types'.
format(gpu_dict["devices"]))
valid_types = os.listdir(mdev_path)
if vgpu_type not in valid_types:
raise exception.InvalidVGPUType(name=vgpu_type)
gpu_dict["vGPU_type"] = vgpu_type
vGPU_path = os.path.expandvars(
'/sys/bus/pci/devices/{0}/mdev_supported_types/{1}/'
.format(gpu_dict["devices"], gpu_dict["vGPU_type"]))
# transfer vgpu_type to vgpu_type_name.
# eg. transfer 'nvidia-223' to 'T4_1B'
with open(vGPU_path + 'name', 'r') as f:
name = f.read().strip()
vgpu_type_name = name.split(' ')[1].replace('-', '_')
traits = _get_traits(gpu_dict["vendor_id"],
gpu_dict["product_id"],
vgpu_type_name)
gpu_dict.update(traits)
gpu_list.append(_generate_driver_device(gpu_dict))
return gpu_list
def discover(vendor_id):
devs = _discover_gpus(vendor_id)
return devs return devs

View File

@ -1,5 +1,4 @@
# Copyright 2018 Beijing Lenovo Software Ltd. # Modifications Copyright (C) 2021 ZTE Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may # Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain # not use this file except in compliance with the License. You may obtain
# a copy of the License at # a copy of the License at
@ -12,24 +11,15 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
""" """
Utils for GPU driver. Utils for GPU driver.
""" """
from oslo_concurrency import processutils from oslo_concurrency import processutils
from oslo_log import log as logging from oslo_log import log as logging
from oslo_serialization import jsonutils
import re import re
from cyborg.accelerator.common import utils import cyborg.conf
from cyborg.common import constants
from cyborg.conf import CONF
from cyborg.objects.driver_objects import driver_attach_handle
from cyborg.objects.driver_objects import driver_attribute
from cyborg.objects.driver_objects import driver_controlpath_id
from cyborg.objects.driver_objects import driver_deployable
from cyborg.objects.driver_objects import driver_device
import cyborg.privsep import cyborg.privsep
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -42,6 +32,7 @@ GPU_INFO_PATTERN = re.compile(r"(?P<devices>[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:"
r"{4}):(?P<product_id>[0-9a-fA-F]{4})].*") r"{4}):(?P<product_id>[0-9a-fA-F]{4})].*")
VENDOR_MAPS = {"10de": "nvidia", "102b": "matrox"} VENDOR_MAPS = {"10de": "nvidia", "102b": "matrox"}
PRODUCT_ID_MAPS = {"1eb8": "T4", "15f7": "P100_PCIE_12GB"}
@cyborg.privsep.sys_admin_pctxt.entrypoint @cyborg.privsep.sys_admin_pctxt.entrypoint
@ -62,19 +53,6 @@ def get_pci_devices(pci_flags, vendor_id=None):
return device_for_vendor_out if vendor_id else all_device_out return device_for_vendor_out if vendor_id else all_device_out
def get_traits(vendor_id, product_id):
"""Generate traits for GPUs.
: param vendor_id: vendor_id of PGPU/VGPU, eg."10de"
: param product_id: product_id of PGPU/VGPU, eg."1eb8".
Example VGPU traits:
{traits:["CUSTOM_GPU_NVIDIA", "CUSTOM_GPU_PRODUCT_ID_1EB8"]}
"""
traits = []
traits.append("CUSTOM_GPU_" + VENDOR_MAPS.get(vendor_id, "").upper())
traits.append("CUSTOM_GPU_PRODUCT_ID_" + product_id.upper())
return {"traits": traits}
def discover_vendors(): def discover_vendors():
vendors = set() vendors = set()
gpus = get_pci_devices(GPU_FLAGS) gpus = get_pci_devices(GPU_FLAGS)
@ -84,93 +62,3 @@ def discover_vendors():
vendor_id = m.groupdict().get("vendor_id") vendor_id = m.groupdict().get("vendor_id")
vendors.add(vendor_id) vendors.add(vendor_id)
return vendors return vendors
def discover_gpus(vendor_id=None):
gpu_list = []
gpus = get_pci_devices(GPU_FLAGS, vendor_id)
for gpu in gpus:
m = GPU_INFO_PATTERN.match(gpu)
if m:
gpu_dict = m.groupdict()
# generate hostname for deployable_name usage
gpu_dict['hostname'] = CONF.host
# generate traits info
# TODO(yumeng) support and test VGPU rc generation soon.
traits = get_traits(gpu_dict["vendor_id"], gpu_dict["product_id"])
gpu_dict["rc"] = constants.RESOURCES["PGPU"]
gpu_dict.update(traits)
gpu_list.append(_generate_driver_device(gpu_dict))
return gpu_list
def _generate_driver_device(gpu):
driver_device_obj = driver_device.DriverDevice()
driver_device_obj.vendor = gpu["vendor_id"]
driver_device_obj.model = gpu.get('model', 'miss model info')
std_board_info = {'product_id': gpu.get('product_id'),
'controller': gpu.get('controller')}
vendor_board_info = {'vendor_info': gpu.get('vendor_info', 'gpu_vb_info')}
driver_device_obj.std_board_info = jsonutils.dumps(std_board_info)
driver_device_obj.vendor_board_info = jsonutils.dumps(vendor_board_info)
driver_device_obj.type = constants.DEVICE_GPU
driver_device_obj.stub = gpu.get('stub', False)
driver_device_obj.controlpath_id = _generate_controlpath_id(gpu)
driver_device_obj.deployable_list = _generate_dep_list(gpu)
return driver_device_obj
def _generate_controlpath_id(gpu):
driver_cpid = driver_controlpath_id.DriverControlPathID()
# NOTE: GPUs (either pGPU or vGPU), they all report "PCI" as
# their cpid_type, while attach_handle_type of them are different.
driver_cpid.cpid_type = "PCI"
driver_cpid.cpid_info = utils.pci_str_to_json(gpu["devices"])
return driver_cpid
def _generate_dep_list(gpu):
dep_list = []
driver_dep = driver_deployable.DriverDeployable()
driver_dep.attribute_list = _generate_attribute_list(gpu)
driver_dep.attach_handle_list = []
# NOTE(yumeng) Now simply named as <Compute_hostname>_<Device_address>
# once cyborg needs to support GPU devices discovered from a baremetal
# node, we might need to support more formats.
driver_dep.name = gpu.get('hostname', '') + '_' + gpu["devices"]
driver_dep.driver_name = VENDOR_MAPS.get(gpu["vendor_id"], '').upper()
# driver_dep.num_accelerators for PGPU is 1, for VGPU should be the
# available_instances of the vGPU device.
# TODO(yumeng) support VGPU num report soon
driver_dep.num_accelerators = 1
driver_dep.attach_handle_list = \
[_generate_attach_handle(gpu)]
dep_list.append(driver_dep)
return dep_list
def _generate_attach_handle(gpu):
driver_ah = driver_attach_handle.DriverAttachHandle()
if gpu["rc"] == "PGPU":
driver_ah.attach_type = constants.AH_TYPE_PCI
else:
driver_ah.attach_type = constants.AH_TYPE_MDEV
driver_ah.in_use = False
driver_ah.attach_info = utils.pci_str_to_json(gpu["devices"])
return driver_ah
def _generate_attribute_list(gpu):
attr_list = []
for k, v in gpu.items():
if k == "rc":
driver_attr = driver_attribute.DriverAttribute()
driver_attr.key, driver_attr.value = k, v
attr_list.append(driver_attr)
if k == "traits":
values = gpu.get(k, [])
for index, val in enumerate(values):
driver_attr = driver_attribute.DriverAttribute(
key="trait" + str(index), value=val)
attr_list.append(driver_attr)
return attr_list

View File

@ -389,6 +389,18 @@ class InvalidDriver(Invalid):
_msg_fmt = _("Found an invalid driver: %(name)s") _msg_fmt = _("Found an invalid driver: %(name)s")
class InvalidVGPUType(Invalid):
_msg_fmt = _("Invalid requested vGPU type: %(name)s")
class InvalidvGPUConfig(Invalid):
_msg_fmt = _("Invalid vGPU config: %(reason)s")
class PciDeviceWrongAddressFormat(Invalid):
_msg_fmt = _("The PCI address %(address)s has an incorrect format.")
class InvalidType(Invalid): class InvalidType(Invalid):
_msg_fmt = _("Invalid type for %(obj)s: %(type)s." _msg_fmt = _("Invalid type for %(obj)s: %(type)s."
"Expected: %(expected)s") "Expected: %(expected)s")

View File

@ -12,21 +12,42 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import sys
from unittest import mock from unittest import mock
from oslo_serialization import jsonutils from oslo_serialization import jsonutils
import cyborg
from cyborg.accelerator.drivers.gpu.nvidia.driver import NVIDIAGPUDriver
from cyborg.accelerator.drivers.gpu import utils from cyborg.accelerator.drivers.gpu import utils
from cyborg.tests import base from cyborg.tests import base
CONF = cyborg.conf.CONF
NVIDIA_GPU_INFO = "0000:00:06.0 3D controller [0302]: NVIDIA Corporation " \ NVIDIA_GPU_INFO = "0000:00:06.0 3D controller [0302]: NVIDIA Corporation " \
"GP100GL [Tesla P100 PCIe 12GB] [10de:15f7] (rev a1)" "GP100GL [Tesla P100 PCIe 12GB] [10de:15f7] (rev a1)"
NVIDIA_T4_GPU_INFO = "0000:af:00.0 3D controller [0302]: NVIDIA Corporation "\
"TU104GL [Tesla T4] [10de:1eb8] (rev a1)"
NVIDIA_T4_SUPPORTED_MDEV_TYPES = ['nvidia-222', 'nvidia-223', 'nvidia-224',
'nvidia-225', 'nvidia-226', 'nvidia-227',
'nvidia-228', 'nvidia-229', 'nvidia-230',
'nvidia-231', 'nvidia-232', 'nvidia-233',
'nvidia-234', 'nvidia-252', 'nvidia-319',
'nvidia-320', 'nvidia-321']
BUILTIN = '__builtin__' if (sys.version_info[0] < 3) else '__builtins__'
class stdout(object): class stdout(object):
def readlines(self): def readlines(self):
return [NVIDIA_GPU_INFO] return [NVIDIA_GPU_INFO]
def readlines_T4(self):
return [NVIDIA_T4_GPU_INFO]
class p(object): class p(object):
def __init__(self): def __init__(self):
@ -49,11 +70,14 @@ class TestGPUDriverUtils(base.TestCase):
self.assertEqual(1, len(gpu_vendors)) self.assertEqual(1, len(gpu_vendors))
@mock.patch('cyborg.accelerator.drivers.gpu.utils.lspci_privileged') @mock.patch('cyborg.accelerator.drivers.gpu.utils.lspci_privileged')
def test_discover_gpus(self, mock_devices_for_vendor): def test_discover_gpus_report_pGPU(self, mock_devices_for_vendor):
"""test nvidia pGPU discover"""
mock_devices_for_vendor.return_value = self.p.stdout.readlines() mock_devices_for_vendor.return_value = self.p.stdout.readlines()
self.set_defaults(host='host-192-168-32-195', debug=True) self.set_defaults(host='host-192-168-32-195', debug=True)
vendor_id = '10de'
gpu_list = utils.discover_gpus(vendor_id) nvidia = NVIDIAGPUDriver()
gpu_list = nvidia.discover()
self.assertEqual(1, len(gpu_list)) self.assertEqual(1, len(gpu_list))
attach_handle_list = [ attach_handle_list = [
{'attach_type': 'PCI', {'attach_type': 'PCI',
@ -65,8 +89,8 @@ class TestGPUDriverUtils(base.TestCase):
] ]
attribute_list = [ attribute_list = [
{'key': 'rc', 'value': 'PGPU'}, {'key': 'rc', 'value': 'PGPU'},
{'key': 'trait0', 'value': 'CUSTOM_GPU_NVIDIA'}, {'key': 'trait0', 'value': 'OWNER_CYBORG'},
{'key': 'trait1', 'value': 'CUSTOM_GPU_PRODUCT_ID_15F7'}, {'key': 'trait1', 'value': 'CUSTOM_NVIDIA_15F7'},
] ]
expected = { expected = {
'vendor': '10de', 'vendor': '10de',
@ -118,3 +142,107 @@ class TestGPUDriverUtils(base.TestCase):
self.assertEqual(attach_handle_list[0], self.assertEqual(attach_handle_list[0],
gpu_attach_handle_list[0].as_dict()) gpu_attach_handle_list[0].as_dict())
self.assertEqual(attribute_list, attribute_actual_data) self.assertEqual(attribute_list, attribute_actual_data)
@mock.patch('builtins.open')
@mock.patch('os.listdir')
@mock.patch('os.path.exists')
@mock.patch('cyborg.accelerator.drivers.gpu.utils.lspci_privileged')
def test_discover_gpus_report_vGPU(self, mock_devices_for_vendor,
mock_path_exists,
mock_supported_mdev_types,
mock_open):
"""test nvidia vGPU discover"""
mock_devices_for_vendor.return_value = self.p.stdout.readlines_T4()
mock_path_exists.return_value = True
mock_supported_mdev_types.return_value = NVIDIA_T4_SUPPORTED_MDEV_TYPES
file_content_list = ['GRID T4-1B', '1']
mock_open.side_effect = multi_mock_open(*file_content_list)
self.set_defaults(host='host-192-168-32-195', debug=True)
self.set_defaults(enabled_vgpu_types='nvidia-223', group='gpu_devices')
cyborg.conf.devices.register_dynamic_opts(CONF)
self.set_defaults(
device_addresses=['0000:af:00.0'], group='vgpu_nvidia-223')
nvidia = NVIDIAGPUDriver()
gpu_list = nvidia.discover()
self.assertEqual(1, len(gpu_list))
attach_handle_list = [
{'attach_type': 'MDEV',
'attach_info': '{"asked_type": "nvidia-223", '
'"bus": "af", '
'"device": "00", '
'"domain": "0000", '
'"function": "0", '
'"vgpu_mark": "nvidia-223_0"}',
'in_use': False}
] * 8
attribute_list = [
{'key': 'rc', 'value': 'VGPU'},
{'key': 'trait0', 'value': 'OWNER_CYBORG'},
{'key': 'trait1', 'value': 'CUSTOM_NVIDIA_1EB8_T4_1B'},
]
expected = {
'vendor': '10de',
'type': 'GPU',
'std_board_info':
{"controller": "3D controller", "product_id": "1eb8"},
'vendor_board_info': {"vendor_info": "gpu_vb_info"},
'deployable_list':
[
{
'num_accelerators': 18,
'driver_name': 'NVIDIA',
'name': 'host-192-168-32-195_0000:af:00.0',
'attach_handle_list': attach_handle_list,
'attribute_list': attribute_list
},
],
'controlpath_id': {'cpid_info': '{"bus": "af", '
'"device": "00", '
'"domain": "0000", '
'"function": "0"}',
'cpid_type': 'PCI'}
}
gpu_obj = gpu_list[0]
gpu_dict = gpu_obj.as_dict()
gpu_dep_list = gpu_dict['deployable_list']
gpu_attach_handle_list = gpu_dep_list[0].as_dict()[
'attach_handle_list']
gpu_attribute_list = gpu_dep_list[0].as_dict()['attribute_list']
attri_obj_data = []
[attri_obj_data.append(attr.as_dict()) for attr in gpu_attribute_list]
attribute_actual_data = sorted(attri_obj_data, key=lambda i: i['key'])
self.assertEqual(expected['vendor'], gpu_dict['vendor'])
self.assertEqual(expected['controlpath_id'],
gpu_dict['controlpath_id'])
self.assertEqual(expected['std_board_info'],
jsonutils.loads(gpu_dict['std_board_info']))
self.assertEqual(expected['vendor_board_info'],
jsonutils.loads(gpu_dict['vendor_board_info']))
self.assertEqual(expected['deployable_list'][0]['num_accelerators'],
gpu_dep_list[0].as_dict()['num_accelerators'])
self.assertEqual(expected['deployable_list'][0]['name'],
gpu_dep_list[0].as_dict()['name'])
self.assertEqual(expected['deployable_list'][0]['driver_name'],
gpu_dep_list[0].as_dict()['driver_name'])
self.assertEqual(attach_handle_list[0],
gpu_attach_handle_list[0].as_dict())
self.assertEqual(attribute_list, attribute_actual_data)
def multi_mock_open(*file_contents):
"""Create a mock "open" that will mock open multiple files in sequence.
: params file_contents: a list of file contents to be returned by open
: returns: (MagicMock) a mock opener that will return the contents of the
first file when opened the first time, the second file when
opened the second time, etc.
"""
mock_files = [
mock.mock_open(read_data=content).return_value for content in
file_contents]
mock_opener = mock.mock_open()
mock_opener.side_effect = mock_files
return mock_opener