Identify accelerator devices during introspection
Identify acclerator devices by processing pci devices and update to ironic node when found. Currently only Tesla T4 from NVIDIA is supported. Change-Id: Id702cb04cb2445d544965821680cd0cc5cfd37e5 Story: 2007971 Task: 40473
This commit is contained in:
parent
7ff52c732b
commit
de2a27ad8b
|
@ -258,6 +258,19 @@ Here are some plugins that can be additionally enabled:
|
|||
[port_physnet]
|
||||
cidr_map = 10.10.10.0/24:physnet_a, 2001:db8::/64:physnet_b
|
||||
|
||||
``accelerators``
|
||||
Processes PCI data returned from inspection and compares with the
|
||||
accelerator inventory, it will update accelerator device information to
|
||||
the properties field of the ironic node if any accelerator device is
|
||||
found, for example::
|
||||
|
||||
{'local_gb': '1115', 'cpus': '40', 'cpu_arch': 'x86_64', 'memory_mb': '32768',
|
||||
'capabilities': 'boot_mode:bios,cpu_vt:true,cpu_aes:true,cpu_hugepages:true,cpu_hugepages_1g:true,cpu_txt:true',
|
||||
'accel': [{'vendor_id': '10de', 'device_id': '1eb8', 'type': 'GPU',
|
||||
'pci_address': '0000:82:00.0',
|
||||
'device_info': 'NVIDIA Corporation Tesla T4'}]
|
||||
}
|
||||
|
||||
Refer to :ref:`contributing_link` for information on how to write your
|
||||
own plugin.
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
from oslo_config import cfg
|
||||
|
||||
from ironic_inspector.conf import accelerators
|
||||
from ironic_inspector.conf import capabilities
|
||||
from ironic_inspector.conf import coordination
|
||||
from ironic_inspector.conf import default
|
||||
|
@ -31,6 +32,7 @@ from ironic_inspector.conf import swift
|
|||
CONF = cfg.CONF
|
||||
|
||||
|
||||
accelerators.register_opts(CONF)
|
||||
capabilities.register_opts(CONF)
|
||||
coordination.register_opts(CONF)
|
||||
discovery.register_opts(CONF)
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
|
||||
from oslo_config import cfg
|
||||
|
||||
from ironic_inspector.common.i18n import _
|
||||
|
||||
|
||||
_OPTS = [
|
||||
cfg.StrOpt('known_devices',
|
||||
default=os.path.abspath(os.path.join(
|
||||
os.path.dirname(__file__), '../known_accelerators.yaml')),
|
||||
help=_('The predefined accelerator devices which contains '
|
||||
'information used for identifying accelerators.')),
|
||||
]
|
||||
|
||||
|
||||
def register_opts(conf):
|
||||
conf.register_opts(_OPTS, 'accelerators')
|
||||
|
||||
|
||||
def list_opts():
|
||||
return _OPTS
|
|
@ -0,0 +1,5 @@
|
|||
pci_devices:
|
||||
- vendor_id: "10de"
|
||||
device_id: "1eb8"
|
||||
type: GPU
|
||||
device_info: NVIDIA Corporation Tesla T4
|
|
@ -0,0 +1,78 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Gather and distinguish Accelerator PCI devices from inventory."""
|
||||
|
||||
from oslo_config import cfg
|
||||
import yaml
|
||||
|
||||
from ironic_inspector.plugins import base
|
||||
from ironic_inspector import utils
|
||||
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = utils.getProcessingLogger(__name__)
|
||||
|
||||
|
||||
class AccelDevicesHook(base.ProcessingHook):
|
||||
"""Processing hook for distinguishing accelerator devices."""
|
||||
|
||||
def __init__(self):
|
||||
super(AccelDevicesHook, self).__init__()
|
||||
self._known_devices = {}
|
||||
with open(CONF.accelerators.known_devices) as f:
|
||||
self._known_devices = yaml.safe_load(f)
|
||||
self._validate_datasource()
|
||||
|
||||
def _validate_datasource(self):
|
||||
# Do a simple check against the data source
|
||||
if (not self._known_devices or
|
||||
'pci_devices' not in self._known_devices):
|
||||
raise RuntimeError('Could not find pci_devices in the '
|
||||
'configuration data')
|
||||
if not isinstance(self._known_devices['pci_devices'], list):
|
||||
raise RuntimeError('pci_devices should contain a list of devices')
|
||||
for device in self._known_devices['pci_devices']:
|
||||
if not device.get('vendor_id') or not device.get('device_id'):
|
||||
raise RuntimeError('one of devices is missing vendor_id or '
|
||||
'device_id')
|
||||
|
||||
def _find_accelerator(self, vendor_id, device_id):
|
||||
for dev in self._known_devices['pci_devices']:
|
||||
if (dev['vendor_id'] == vendor_id and
|
||||
dev['device_id'] == device_id):
|
||||
return dev
|
||||
|
||||
def before_update(self, introspection_data, node_info, **kwargs):
|
||||
pci_devices = introspection_data.get('pci_devices', [])
|
||||
if not pci_devices:
|
||||
LOG.warning('Unable to distinguish accelerator devices due to no '
|
||||
'PCI devices information was received from the '
|
||||
'ramdisk.')
|
||||
return
|
||||
|
||||
accelerators = []
|
||||
for pci_dev in pci_devices:
|
||||
dev = self._find_accelerator(pci_dev['vendor_id'],
|
||||
pci_dev['product_id'])
|
||||
if dev:
|
||||
accel = {k: dev[k] for k in dev.keys()}
|
||||
accel.update(pci_address=pci_dev['bus'])
|
||||
accelerators.append(accel)
|
||||
|
||||
if accelerators:
|
||||
node_info.update_properties(accelerators=accelerators)
|
||||
LOG.info('Found the following accelerator devices: %s',
|
||||
accelerators)
|
||||
else:
|
||||
LOG.info('No known accelerator devices found')
|
|
@ -0,0 +1,52 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from unittest import mock
|
||||
|
||||
from ironic_inspector import node_cache
|
||||
from ironic_inspector.plugins import accel_device
|
||||
from ironic_inspector.test import base as test_base
|
||||
|
||||
|
||||
class TestAccelDevicesHook(test_base.NodeTest):
|
||||
hook = accel_device.AccelDevicesHook()
|
||||
|
||||
@mock.patch.object(node_cache.NodeInfo, 'update_properties',
|
||||
autospec=True)
|
||||
def test_before_update(self, mock_update_props):
|
||||
self.data['pci_devices'] = [
|
||||
{"vendor_id": "10de", "product_id": "1eb8", "class": "1234",
|
||||
"bus": "0000:01:1f.0", "revision": "1"},
|
||||
]
|
||||
expected_accels = [{'vendor_id': '10de', 'device_id': '1eb8',
|
||||
'type': 'GPU', 'pci_address': '0000:01:1f.0',
|
||||
'device_info': 'NVIDIA Corporation Tesla T4'}]
|
||||
self.hook.before_update(self.data, self.node_info)
|
||||
mock_update_props.assert_called_once_with(self.node_info,
|
||||
accelerators=expected_accels)
|
||||
|
||||
@mock.patch.object(node_cache.NodeInfo, 'update_properties',
|
||||
autospec=True)
|
||||
def test_before_update_no_pci_info_from_ipa(self, mock_update_props):
|
||||
self.hook.before_update(self.data, self.node_info)
|
||||
self.assertFalse(mock_update_props.called)
|
||||
|
||||
@mock.patch.object(node_cache.NodeInfo, 'update_properties',
|
||||
autospec=True)
|
||||
def test_before_update_no_match(self, mock_update_props):
|
||||
self.data['pci_devices'] = [
|
||||
{"vendor_id": "1234", "product_id": "1234", "class": "1234",
|
||||
"bus": "0000:01:1f.0", "revision": "1"},
|
||||
]
|
||||
self.hook.before_update(self.data, self.node_info)
|
||||
self.assertFalse(mock_update_props.called)
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
features:
|
||||
- |
|
||||
Adds an ``accelerators`` plugin to identify acclerator devices and update
|
||||
the bare metal node for future scheduling. The accelerator devices will be
|
||||
saved to node properties under the key ``accelerators``. Introduces a
|
||||
configuration option ``[accelerators]known_devices`` to specify a
|
||||
configuration file which contains required information to identify
|
||||
accelerator devices, by default it uses the in-tree configuration file
|
||||
named ``known_accelerators.yaml``.
|
|
@ -15,6 +15,7 @@ keystonemiddleware>=4.18.0 # Apache-2.0
|
|||
netaddr>=0.7.18 # BSD
|
||||
pbr!=2.1.0,>=2.0.0 # Apache-2.0
|
||||
pytz>=2013.6 # MIT
|
||||
PyYAML>=5.3.1
|
||||
openstacksdk>=0.40.0 # Apache-2.0
|
||||
oslo.concurrency>=3.26.0 # Apache-2.0
|
||||
oslo.config>=5.2.0 # Apache-2.0
|
||||
|
|
|
@ -36,6 +36,7 @@ console_scripts =
|
|||
wsgi_scripts =
|
||||
ironic-inspector-api-wsgi = ironic_inspector.cmd.wsgi:initialize_wsgi_app
|
||||
ironic_inspector.hooks.processing =
|
||||
accelerators = ironic_inspector.plugins.accel_device:AccelDevicesHook
|
||||
scheduler = ironic_inspector.plugins.standard:SchedulerHook
|
||||
validate_interfaces = ironic_inspector.plugins.standard:ValidateInterfacesHook
|
||||
ramdisk_error = ironic_inspector.plugins.standard:RamdiskErrorHook
|
||||
|
|
Loading…
Reference in New Issue