Add gpu driver

1. Imply gpu driver of passthrough mode.
2. Pre-configure for GPU passthrough(We should move this guide to admin
guide doc later):
a) Edit /etc/default/grub
   Modify GRUB_CMDLINE_LINUX=’… quiet’ to GRUB_CMDLINE_LINUX=’… quiet
   intel_iommu=on’
b) grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg
c) Reboot compute node.
d) Check it is successful or not.
   dmesg | grep -e DMAR -e IOMMU
   You would see something echoed.
References:
https://pve.proxmox.com/wiki/Pci_passthrough

3.Remove unused import.
4.Fix spelling mistakes.
5.Fix _match_nova_addr, becase addr in os-acc is like 0000:00:06.0

Story: 2002954
Change-Id: I555b656f80b32eafdc24f9ba81ddc9af416d95e9
This commit is contained in:
wangzh21 2018-06-10 16:44:45 +08:00
parent 526f3a1429
commit 5c4e648159
14 changed files with 420 additions and 5 deletions

View File

@ -11,7 +11,3 @@ submitted for review via the Gerrit tool:
https://docs.openstack.org/infra/manual/developers.html#development-workflow
Pull requests submitted through GitHub will be ignored.
Bugs should be filed on Launchpad, not GitHub:
https://bugs.launchpad.net/openstack-cyborg

View File

@ -27,7 +27,7 @@ class FPGADriver(object):
"""Base class for FPGA drivers.
This is just a virtual FPGA drivers interface.
Vedor should implement their specific drivers.
Vendor should implement their specific drivers.
"""
@classmethod

View File

@ -0,0 +1,42 @@
# Copyright 2018 Beijing Lenovo Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from cyborg.accelerator.drivers.gpu.nvidia.driver import NVIDIAGPUDriver
import os
import glob
from oslo_log import log as logging
__import__('pkg_resources').declare_namespace(__name__)
__import__(".".join([__package__, 'base']))
LOG = logging.getLogger(__name__)
def load_gpu_vendor_driver():
files = glob.glob(os.path.join(os.path.dirname(__file__), "*/driver*"))
modules = set(map(lambda s: ".".join(s.rsplit(".")[0].rsplit("/", 2)[-2:]),
files))
for m in modules:
try:
__import__(".".join([__package__, m]))
LOG.debug("Successfully loaded GPU vendor driver: %s." % m)
except ImportError as e:
LOG.error("Failed to load GPU vendor driver: %s. Details: %s"
% (m, e))
load_gpu_vendor_driver()

View File

@ -0,0 +1,59 @@
# Copyright 2018 Beijing Lenovo Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Cyborg GPU driver implementation.
"""
from oslo_log import log as logging
from cyborg.accelerator.drivers.gpu import utils
LOG = logging.getLogger(__name__)
VENDOR_MAPS = {"10de": "nvidia", "102b": "matrox"}
class GPUDriver(object):
"""Base class for GPU drivers.
This is just a virtual GPU drivers interface.
Vendor should implement their specific drivers.
"""
@classmethod
def create(cls, vendor, *args, **kwargs):
for sclass in cls.__subclasses__():
vendor_name = VENDOR_MAPS.get(vendor, vendor)
if vendor_name == sclass.VENDOR:
return sclass(*args, **kwargs)
raise LookupError("Not find the GPU driver for vendor %s" % vendor)
def discover(self):
"""
Discover GPU information of current vendor(Identified by class).
:return: List of GPU information dict.
"""
raise NotImplementedError()
@classmethod
def discover_vendors(cls):
"""
Discover GPU vendors of current node.
:return: GPU vendor ID list.
"""
return utils.discover_vendors()

View File

@ -0,0 +1,33 @@
# Copyright 2018 Beijing Lenovo Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Cyborg NVIDIA GPU driver implementation.
"""
from cyborg.accelerator.drivers.gpu.base import GPUDriver
from cyborg.accelerator.drivers.gpu.nvidia import sysinfo
class NVIDIAGPUDriver(GPUDriver):
"""Base class for GPU drivers.
This is just a virtual GPU drivers interface.
Vendor should implement their specific drivers.
"""
VENDOR = "nvidia"
def discover(self):
return sysinfo.gpu_tree()

View File

@ -0,0 +1,26 @@
# Copyright 2018 Beijing Lenovo Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Cyborg NVIDIA GPU driver implementation.
"""
from cyborg.accelerator.drivers.gpu import utils
VENDOR_ID = "10de"
def gpu_tree():
devs = utils.discover_gpus(VENDOR_ID)
return devs

View File

@ -0,0 +1,118 @@
# Copyright 2018 Beijing Lenovo Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Utils for GPU driver.
"""
from oslo_log import log as logging
from oslo_serialization import jsonutils
import re
import subprocess
from cyborg.objects.driver_objects import driver_deployable, driver_device, \
driver_attach_handle, driver_controlpath_id
from cyborg.common import constants
LOG = logging.getLogger(__name__)
GPU_FLAGS = ["VGA compatible controller", "3D controller"]
GPU_INFO_PATTERN = re.compile("(?P<devices>[0-9]{4}:[0-9]{2}:[0-9]{2}\.[0-9]) "
"(?P<controller>.*) [\[].*]: (?P<name>.*) .*"
"[\[](?P<vendor_id>[0-9a-fA-F]"
"{4}):(?P<product_id>[0-9a-fA-F]{4})].*")
# NOTE(wangzhh): The implementation of current release doesn't support virtual
# GPU.
def discover_vendors():
cmd = "sudo lspci -nnn -D | grep -E '%s'"
cmd = cmd % "|".join(GPU_FLAGS)
# FIXME(wangzhh): Use oslo.privsep instead of subprocess here to prevent
# shell injection attacks.
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
p.wait()
gpus = p.stdout.readlines()
vendors = set()
for gpu in gpus:
m = GPU_INFO_PATTERN.match(gpu)
if m:
vendor_id = m.groupdict().get("vendor_id")
vendors.add(vendor_id)
return vendors
def discover_gpus(vender_id=None):
cmd = "sudo lspci -nnn -D| grep -E '%s'"
cmd = cmd % "|".join(GPU_FLAGS)
if vender_id:
cmd = cmd + "| grep " + vender_id
# FIXME(wangzhh): Use oslo.privsep instead of subprocess here to prevent
# shell injection attacks.
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
p.wait()
gpus = p.stdout.readlines()
gpu_list = []
for gpu in gpus:
m = GPU_INFO_PATTERN.match(gpu)
if m:
gpu_dict = m.groupdict()
gpu_list.append(_generate_driver_device(gpu_dict))
return gpu_list
def _generate_driver_device(gpu):
driver_device_obj = driver_device.DriverDevice()
driver_device_obj.vendor = gpu["vendor_id"]
driver_device_obj.model = gpu.get('model', 'miss model info')
std_board_info = {'product_id': gpu.get('product_id', None),
'controller': gpu.get('controller', None)}
driver_device_obj.std_board_info = jsonutils.dumps(std_board_info)
driver_device_obj.type = constants.DEVICE_GPU
driver_device_obj.controlpath_id = _generate_controlpath_id(gpu)
driver_device_obj.deployable_list = _generate_dep_list(gpu)
return driver_device_obj
def _generate_controlpath_id(gpu):
driver_cpid = driver_controlpath_id.DriverControlPathID()
driver_cpid.cpid_type = "PCI"
driver_cpid.cpid_info = gpu["devices"]
return driver_cpid
def _generate_dep_list(gpu):
dep_list = []
driver_dep = driver_deployable.DriverDeployable()
driver_dep.attach_handle_list = []
# NOTE(wangzhh): The name of deployable should be unique, its format is
# under disscussion, may looks like
# <ComputeNodeName>_<NumaNodeName>_<CyborgName>_<NumInHost>, now simply
# named <Device_name>_<Device_address>
driver_dep.name = gpu.get('name', '') + '_' + gpu["devices"]
driver_dep.num_accelerators = 1
driver_dep.attach_handle_list = \
[_generate_attach_handle(gpu)]
dep_list.append(driver_dep)
return dep_list
def _generate_attach_handle(gpu):
driver_ah = driver_attach_handle.DriverAttachHandle()
driver_ah.attach_type = "PCI"
driver_ah.in_use = False
driver_ah.attach_info = gpu["devices"]
return driver_ah

View File

@ -0,0 +1,17 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from cyborg.tests import base
class TestNvidiaGPUDriver(base.TestCase):
""""""""

View File

@ -0,0 +1,27 @@
# Copyright 2018 Beijing Lenovo Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from cyborg.accelerator.drivers.gpu.base import GPUDriver
from cyborg.tests import base
class TestGPUDriver(base.TestCase):
def test_create(self):
GPUDriver.create("nvidia")
self.assertRaises(LookupError, GPUDriver.create, "matrox")
def test_discover(self):
d = GPUDriver()
self.assertRaises(NotImplementedError, d.discover)

View File

@ -0,0 +1,96 @@
# Copyright 2018 Beijing Lenovo Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_serialization import jsonutils
import mock
import copy
import subprocess
from cyborg.accelerator.drivers.gpu import utils
from cyborg import objects
from cyborg.tests import base
NVIDIA_GPU_INFO = "0000:00:06.0 3D controller [0302]: NVIDIA Corporation GP100GL " \
"[Tesla P100 PCIe 12GB] [10de:15f7] (rev a1)"
class stdout(object):
def readlines(self):
return [NVIDIA_GPU_INFO]
class p(object):
def __init__(self):
self.stdout = stdout()
def wait(self):
pass
class TestGPUDriverUtils(base.TestCase):
def setUp(self):
super(TestGPUDriverUtils, self).setUp()
self.p = p()
@mock.patch.object(subprocess, 'Popen', autospec=True)
def test_discover_vendors(self, mock_popen):
mock_popen.return_value = self.p
gpu_venders = utils.discover_vendors()
self.assertEqual(1, len(gpu_venders))
@mock.patch.object(subprocess, 'Popen', autospec=True)
def test_discover_gpus(self, mock_popen):
mock_popen.return_value = self.p
vender_id = '10de'
gpu_list = utils.discover_gpus(vender_id)
self.assertEqual(1, len(gpu_list))
attach_handle_list = [
{'attach_type': 'PCI',
'attach_info': '0000:00:06.0',
'in_use': False}
]
expected = {
'vendor': '10de',
'type': 'GPU',
'std_board_info':
{"controller": "3D controller", "product_id": "15f7"},
'deployable_list':
[
{
'num_accelerators': 1,
'name': 'NVIDIA Corporation GP100GL '
'[Tesla P100 PCIe 12GB]_0000:00:06.0',
'attach_handle_list': attach_handle_list
},
],
'controlpath_id': {'cpid_info': '0000:00:06.0', 'cpid_type': 'PCI'}
}
gpu_obj = gpu_list[0]
gpu_dict = gpu_obj.as_dict()
gpu_dep_list = gpu_dict['deployable_list']
gpu_attach_handle_list = \
gpu_dep_list[0].as_dict()['attach_handle_list']
self.assertEqual(expected['vendor'], gpu_dict['vendor'])
self.assertEqual(expected['controlpath_id'],
gpu_dict['controlpath_id'].as_dict())
self.assertEqual(expected['std_board_info'],
jsonutils.loads(gpu_dict['std_board_info']))
self.assertEqual(expected['deployable_list'][0]['num_accelerators'],
gpu_dep_list[0].as_dict()['num_accelerators'])
self.assertEqual(expected['deployable_list'][0]['name'],
gpu_dep_list[0].as_dict()['name'])
self.assertEqual(attach_handle_list[0],
gpu_attach_handle_list[0].as_dict())

View File

@ -48,6 +48,7 @@ cyborg.database.migration_backend =
cyborg.accelerator.driver =
intel_fpga_driver = cyborg.accelerator.drivers.fpga.intel.driver:IntelFPGADriver
nvmf_spdk_driver = cyborg.accelerator.drivers.spdk.nvmf.nvmf:NVMFDRIVER
nvidia_gpu_driver = cyborg.accelerator.drivers.gpu.nvidia.driver:NVIDIAGPUDriver
oslo.config.opts =
cyborg = cyborg.conf.opts:list_opts