Add gpu driver
1. Imply gpu driver of passthrough mode. 2. Pre-configure for GPU passthrough(We should move this guide to admin guide doc later): a) Edit /etc/default/grub Modify GRUB_CMDLINE_LINUX=’… quiet’ to GRUB_CMDLINE_LINUX=’… quiet intel_iommu=on’ b) grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg c) Reboot compute node. d) Check it is successful or not. dmesg | grep -e DMAR -e IOMMU You would see something echoed. References: https://pve.proxmox.com/wiki/Pci_passthrough 3.Remove unused import. 4.Fix spelling mistakes. 5.Fix _match_nova_addr, becase addr in os-acc is like 0000:00:06.0 Story: 2002954 Change-Id: I555b656f80b32eafdc24f9ba81ddc9af416d95e9
This commit is contained in:
parent
526f3a1429
commit
5c4e648159
@ -11,7 +11,3 @@ submitted for review via the Gerrit tool:
|
||||
https://docs.openstack.org/infra/manual/developers.html#development-workflow
|
||||
|
||||
Pull requests submitted through GitHub will be ignored.
|
||||
|
||||
Bugs should be filed on Launchpad, not GitHub:
|
||||
|
||||
https://bugs.launchpad.net/openstack-cyborg
|
||||
|
@ -27,7 +27,7 @@ class FPGADriver(object):
|
||||
"""Base class for FPGA drivers.
|
||||
|
||||
This is just a virtual FPGA drivers interface.
|
||||
Vedor should implement their specific drivers.
|
||||
Vendor should implement their specific drivers.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
|
42
cyborg/accelerator/drivers/gpu/__init__.py
Normal file
42
cyborg/accelerator/drivers/gpu/__init__.py
Normal file
@ -0,0 +1,42 @@
|
||||
# Copyright 2018 Beijing Lenovo Software Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from cyborg.accelerator.drivers.gpu.nvidia.driver import NVIDIAGPUDriver
|
||||
import os
|
||||
import glob
|
||||
|
||||
from oslo_log import log as logging
|
||||
|
||||
|
||||
__import__('pkg_resources').declare_namespace(__name__)
|
||||
__import__(".".join([__package__, 'base']))
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_gpu_vendor_driver():
|
||||
files = glob.glob(os.path.join(os.path.dirname(__file__), "*/driver*"))
|
||||
modules = set(map(lambda s: ".".join(s.rsplit(".")[0].rsplit("/", 2)[-2:]),
|
||||
files))
|
||||
for m in modules:
|
||||
try:
|
||||
__import__(".".join([__package__, m]))
|
||||
LOG.debug("Successfully loaded GPU vendor driver: %s." % m)
|
||||
except ImportError as e:
|
||||
LOG.error("Failed to load GPU vendor driver: %s. Details: %s"
|
||||
% (m, e))
|
||||
|
||||
|
||||
load_gpu_vendor_driver()
|
59
cyborg/accelerator/drivers/gpu/base.py
Normal file
59
cyborg/accelerator/drivers/gpu/base.py
Normal file
@ -0,0 +1,59 @@
|
||||
# Copyright 2018 Beijing Lenovo Software Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
|
||||
"""
|
||||
Cyborg GPU driver implementation.
|
||||
"""
|
||||
from oslo_log import log as logging
|
||||
|
||||
from cyborg.accelerator.drivers.gpu import utils
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
VENDOR_MAPS = {"10de": "nvidia", "102b": "matrox"}
|
||||
|
||||
|
||||
class GPUDriver(object):
|
||||
"""Base class for GPU drivers.
|
||||
|
||||
This is just a virtual GPU drivers interface.
|
||||
Vendor should implement their specific drivers.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create(cls, vendor, *args, **kwargs):
|
||||
for sclass in cls.__subclasses__():
|
||||
vendor_name = VENDOR_MAPS.get(vendor, vendor)
|
||||
if vendor_name == sclass.VENDOR:
|
||||
return sclass(*args, **kwargs)
|
||||
raise LookupError("Not find the GPU driver for vendor %s" % vendor)
|
||||
|
||||
def discover(self):
|
||||
"""
|
||||
Discover GPU information of current vendor(Identified by class).
|
||||
|
||||
:return: List of GPU information dict.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@classmethod
|
||||
def discover_vendors(cls):
|
||||
"""
|
||||
Discover GPU vendors of current node.
|
||||
|
||||
:return: GPU vendor ID list.
|
||||
"""
|
||||
return utils.discover_vendors()
|
0
cyborg/accelerator/drivers/gpu/nvidia/__init__.py
Normal file
0
cyborg/accelerator/drivers/gpu/nvidia/__init__.py
Normal file
33
cyborg/accelerator/drivers/gpu/nvidia/driver.py
Normal file
33
cyborg/accelerator/drivers/gpu/nvidia/driver.py
Normal file
@ -0,0 +1,33 @@
|
||||
# Copyright 2018 Beijing Lenovo Software Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
|
||||
"""
|
||||
Cyborg NVIDIA GPU driver implementation.
|
||||
"""
|
||||
|
||||
from cyborg.accelerator.drivers.gpu.base import GPUDriver
|
||||
from cyborg.accelerator.drivers.gpu.nvidia import sysinfo
|
||||
|
||||
|
||||
class NVIDIAGPUDriver(GPUDriver):
|
||||
"""Base class for GPU drivers.
|
||||
|
||||
This is just a virtual GPU drivers interface.
|
||||
Vendor should implement their specific drivers.
|
||||
"""
|
||||
VENDOR = "nvidia"
|
||||
|
||||
def discover(self):
|
||||
return sysinfo.gpu_tree()
|
26
cyborg/accelerator/drivers/gpu/nvidia/sysinfo.py
Normal file
26
cyborg/accelerator/drivers/gpu/nvidia/sysinfo.py
Normal file
@ -0,0 +1,26 @@
|
||||
# Copyright 2018 Beijing Lenovo Software Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
|
||||
"""
|
||||
Cyborg NVIDIA GPU driver implementation.
|
||||
"""
|
||||
|
||||
from cyborg.accelerator.drivers.gpu import utils
|
||||
VENDOR_ID = "10de"
|
||||
|
||||
|
||||
def gpu_tree():
|
||||
devs = utils.discover_gpus(VENDOR_ID)
|
||||
return devs
|
118
cyborg/accelerator/drivers/gpu/utils.py
Normal file
118
cyborg/accelerator/drivers/gpu/utils.py
Normal file
@ -0,0 +1,118 @@
|
||||
# Copyright 2018 Beijing Lenovo Software Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
|
||||
"""
|
||||
Utils for GPU driver.
|
||||
"""
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import jsonutils
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from cyborg.objects.driver_objects import driver_deployable, driver_device, \
|
||||
driver_attach_handle, driver_controlpath_id
|
||||
from cyborg.common import constants
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
GPU_FLAGS = ["VGA compatible controller", "3D controller"]
|
||||
GPU_INFO_PATTERN = re.compile("(?P<devices>[0-9]{4}:[0-9]{2}:[0-9]{2}\.[0-9]) "
|
||||
"(?P<controller>.*) [\[].*]: (?P<name>.*) .*"
|
||||
"[\[](?P<vendor_id>[0-9a-fA-F]"
|
||||
"{4}):(?P<product_id>[0-9a-fA-F]{4})].*")
|
||||
|
||||
# NOTE(wangzhh): The implementation of current release doesn't support virtual
|
||||
# GPU.
|
||||
|
||||
|
||||
def discover_vendors():
|
||||
cmd = "sudo lspci -nnn -D | grep -E '%s'"
|
||||
cmd = cmd % "|".join(GPU_FLAGS)
|
||||
# FIXME(wangzhh): Use oslo.privsep instead of subprocess here to prevent
|
||||
# shell injection attacks.
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
||||
p.wait()
|
||||
gpus = p.stdout.readlines()
|
||||
vendors = set()
|
||||
for gpu in gpus:
|
||||
m = GPU_INFO_PATTERN.match(gpu)
|
||||
if m:
|
||||
vendor_id = m.groupdict().get("vendor_id")
|
||||
vendors.add(vendor_id)
|
||||
return vendors
|
||||
|
||||
|
||||
def discover_gpus(vender_id=None):
|
||||
cmd = "sudo lspci -nnn -D| grep -E '%s'"
|
||||
cmd = cmd % "|".join(GPU_FLAGS)
|
||||
if vender_id:
|
||||
cmd = cmd + "| grep " + vender_id
|
||||
# FIXME(wangzhh): Use oslo.privsep instead of subprocess here to prevent
|
||||
# shell injection attacks.
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
||||
p.wait()
|
||||
gpus = p.stdout.readlines()
|
||||
gpu_list = []
|
||||
for gpu in gpus:
|
||||
m = GPU_INFO_PATTERN.match(gpu)
|
||||
if m:
|
||||
gpu_dict = m.groupdict()
|
||||
gpu_list.append(_generate_driver_device(gpu_dict))
|
||||
return gpu_list
|
||||
|
||||
|
||||
def _generate_driver_device(gpu):
|
||||
driver_device_obj = driver_device.DriverDevice()
|
||||
driver_device_obj.vendor = gpu["vendor_id"]
|
||||
driver_device_obj.model = gpu.get('model', 'miss model info')
|
||||
std_board_info = {'product_id': gpu.get('product_id', None),
|
||||
'controller': gpu.get('controller', None)}
|
||||
driver_device_obj.std_board_info = jsonutils.dumps(std_board_info)
|
||||
driver_device_obj.type = constants.DEVICE_GPU
|
||||
driver_device_obj.controlpath_id = _generate_controlpath_id(gpu)
|
||||
driver_device_obj.deployable_list = _generate_dep_list(gpu)
|
||||
return driver_device_obj
|
||||
|
||||
|
||||
def _generate_controlpath_id(gpu):
|
||||
driver_cpid = driver_controlpath_id.DriverControlPathID()
|
||||
driver_cpid.cpid_type = "PCI"
|
||||
driver_cpid.cpid_info = gpu["devices"]
|
||||
return driver_cpid
|
||||
|
||||
|
||||
def _generate_dep_list(gpu):
|
||||
dep_list = []
|
||||
driver_dep = driver_deployable.DriverDeployable()
|
||||
driver_dep.attach_handle_list = []
|
||||
# NOTE(wangzhh): The name of deployable should be unique, its format is
|
||||
# under disscussion, may looks like
|
||||
# <ComputeNodeName>_<NumaNodeName>_<CyborgName>_<NumInHost>, now simply
|
||||
# named <Device_name>_<Device_address>
|
||||
driver_dep.name = gpu.get('name', '') + '_' + gpu["devices"]
|
||||
driver_dep.num_accelerators = 1
|
||||
driver_dep.attach_handle_list = \
|
||||
[_generate_attach_handle(gpu)]
|
||||
dep_list.append(driver_dep)
|
||||
return dep_list
|
||||
|
||||
|
||||
def _generate_attach_handle(gpu):
|
||||
driver_ah = driver_attach_handle.DriverAttachHandle()
|
||||
driver_ah.attach_type = "PCI"
|
||||
driver_ah.in_use = False
|
||||
driver_ah.attach_info = gpu["devices"]
|
||||
return driver_ah
|
@ -0,0 +1,17 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from cyborg.tests import base
|
||||
|
||||
|
||||
class TestNvidiaGPUDriver(base.TestCase):
|
||||
""""""""
|
27
cyborg/tests/unit/accelerator/drivers/gpu/test_base.py
Normal file
27
cyborg/tests/unit/accelerator/drivers/gpu/test_base.py
Normal file
@ -0,0 +1,27 @@
|
||||
# Copyright 2018 Beijing Lenovo Software Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
|
||||
from cyborg.accelerator.drivers.gpu.base import GPUDriver
|
||||
from cyborg.tests import base
|
||||
|
||||
|
||||
class TestGPUDriver(base.TestCase):
|
||||
def test_create(self):
|
||||
GPUDriver.create("nvidia")
|
||||
self.assertRaises(LookupError, GPUDriver.create, "matrox")
|
||||
|
||||
def test_discover(self):
|
||||
d = GPUDriver()
|
||||
self.assertRaises(NotImplementedError, d.discover)
|
96
cyborg/tests/unit/accelerator/drivers/gpu/test_utils.py
Normal file
96
cyborg/tests/unit/accelerator/drivers/gpu/test_utils.py
Normal file
@ -0,0 +1,96 @@
|
||||
# Copyright 2018 Beijing Lenovo Software Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from oslo_serialization import jsonutils
|
||||
|
||||
import mock
|
||||
import copy
|
||||
import subprocess
|
||||
|
||||
from cyborg.accelerator.drivers.gpu import utils
|
||||
from cyborg import objects
|
||||
from cyborg.tests import base
|
||||
|
||||
NVIDIA_GPU_INFO = "0000:00:06.0 3D controller [0302]: NVIDIA Corporation GP100GL " \
|
||||
"[Tesla P100 PCIe 12GB] [10de:15f7] (rev a1)"
|
||||
|
||||
|
||||
class stdout(object):
|
||||
def readlines(self):
|
||||
return [NVIDIA_GPU_INFO]
|
||||
|
||||
|
||||
class p(object):
|
||||
def __init__(self):
|
||||
self.stdout = stdout()
|
||||
|
||||
def wait(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestGPUDriverUtils(base.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(TestGPUDriverUtils, self).setUp()
|
||||
self.p = p()
|
||||
|
||||
@mock.patch.object(subprocess, 'Popen', autospec=True)
|
||||
def test_discover_vendors(self, mock_popen):
|
||||
mock_popen.return_value = self.p
|
||||
gpu_venders = utils.discover_vendors()
|
||||
self.assertEqual(1, len(gpu_venders))
|
||||
|
||||
@mock.patch.object(subprocess, 'Popen', autospec=True)
|
||||
def test_discover_gpus(self, mock_popen):
|
||||
mock_popen.return_value = self.p
|
||||
vender_id = '10de'
|
||||
gpu_list = utils.discover_gpus(vender_id)
|
||||
self.assertEqual(1, len(gpu_list))
|
||||
attach_handle_list = [
|
||||
{'attach_type': 'PCI',
|
||||
'attach_info': '0000:00:06.0',
|
||||
'in_use': False}
|
||||
]
|
||||
expected = {
|
||||
'vendor': '10de',
|
||||
'type': 'GPU',
|
||||
'std_board_info':
|
||||
{"controller": "3D controller", "product_id": "15f7"},
|
||||
'deployable_list':
|
||||
[
|
||||
{
|
||||
'num_accelerators': 1,
|
||||
'name': 'NVIDIA Corporation GP100GL '
|
||||
'[Tesla P100 PCIe 12GB]_0000:00:06.0',
|
||||
'attach_handle_list': attach_handle_list
|
||||
},
|
||||
],
|
||||
'controlpath_id': {'cpid_info': '0000:00:06.0', 'cpid_type': 'PCI'}
|
||||
}
|
||||
gpu_obj = gpu_list[0]
|
||||
gpu_dict = gpu_obj.as_dict()
|
||||
gpu_dep_list = gpu_dict['deployable_list']
|
||||
gpu_attach_handle_list = \
|
||||
gpu_dep_list[0].as_dict()['attach_handle_list']
|
||||
self.assertEqual(expected['vendor'], gpu_dict['vendor'])
|
||||
self.assertEqual(expected['controlpath_id'],
|
||||
gpu_dict['controlpath_id'].as_dict())
|
||||
self.assertEqual(expected['std_board_info'],
|
||||
jsonutils.loads(gpu_dict['std_board_info']))
|
||||
self.assertEqual(expected['deployable_list'][0]['num_accelerators'],
|
||||
gpu_dep_list[0].as_dict()['num_accelerators'])
|
||||
self.assertEqual(expected['deployable_list'][0]['name'],
|
||||
gpu_dep_list[0].as_dict()['name'])
|
||||
self.assertEqual(attach_handle_list[0],
|
||||
gpu_attach_handle_list[0].as_dict())
|
@ -48,6 +48,7 @@ cyborg.database.migration_backend =
|
||||
cyborg.accelerator.driver =
|
||||
intel_fpga_driver = cyborg.accelerator.drivers.fpga.intel.driver:IntelFPGADriver
|
||||
nvmf_spdk_driver = cyborg.accelerator.drivers.spdk.nvmf.nvmf:NVMFDRIVER
|
||||
nvidia_gpu_driver = cyborg.accelerator.drivers.gpu.nvidia.driver:NVIDIAGPUDriver
|
||||
|
||||
oslo.config.opts =
|
||||
cyborg = cyborg.conf.opts:list_opts
|
||||
|
Loading…
x
Reference in New Issue
Block a user