diff --git a/doc/source/user/usage.rst b/doc/source/user/usage.rst index 4728efda0..64b569c4b 100644 --- a/doc/source/user/usage.rst +++ b/doc/source/user/usage.rst @@ -258,6 +258,19 @@ Here are some plugins that can be additionally enabled: [port_physnet] cidr_map = 10.10.10.0/24:physnet_a, 2001:db8::/64:physnet_b +``accelerators`` + Processes PCI data returned from inspection and compares with the + accelerator inventory, it will update accelerator device information to + the properties field of the ironic node if any accelerator device is + found, for example:: + + {'local_gb': '1115', 'cpus': '40', 'cpu_arch': 'x86_64', 'memory_mb': '32768', + 'capabilities': 'boot_mode:bios,cpu_vt:true,cpu_aes:true,cpu_hugepages:true,cpu_hugepages_1g:true,cpu_txt:true', + 'accel': [{'vendor_id': '10de', 'device_id': '1eb8', 'type': 'GPU', + 'pci_address': '0000:82:00.0', + 'device_info': 'NVIDIA Corporation Tesla T4'}] + } + Refer to :ref:`contributing_link` for information on how to write your own plugin. diff --git a/ironic_inspector/conf/__init__.py b/ironic_inspector/conf/__init__.py index f12431181..4592fa011 100644 --- a/ironic_inspector/conf/__init__.py +++ b/ironic_inspector/conf/__init__.py @@ -12,6 +12,7 @@ from oslo_config import cfg +from ironic_inspector.conf import accelerators from ironic_inspector.conf import capabilities from ironic_inspector.conf import coordination from ironic_inspector.conf import default @@ -31,6 +32,7 @@ from ironic_inspector.conf import swift CONF = cfg.CONF +accelerators.register_opts(CONF) capabilities.register_opts(CONF) coordination.register_opts(CONF) discovery.register_opts(CONF) diff --git a/ironic_inspector/conf/accelerators.py b/ironic_inspector/conf/accelerators.py new file mode 100644 index 000000000..b90e0a4ac --- /dev/null +++ b/ironic_inspector/conf/accelerators.py @@ -0,0 +1,35 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from oslo_config import cfg + +from ironic_inspector.common.i18n import _ + + +_OPTS = [ + cfg.StrOpt('known_devices', + default=os.path.abspath(os.path.join( + os.path.dirname(__file__), '../known_accelerators.yaml')), + help=_('The predefined accelerator devices which contains ' + 'information used for identifying accelerators.')), +] + + +def register_opts(conf): + conf.register_opts(_OPTS, 'accelerators') + + +def list_opts(): + return _OPTS diff --git a/ironic_inspector/known_accelerators.yaml b/ironic_inspector/known_accelerators.yaml new file mode 100644 index 000000000..950c47c03 --- /dev/null +++ b/ironic_inspector/known_accelerators.yaml @@ -0,0 +1,5 @@ +pci_devices: + - vendor_id: "10de" + device_id: "1eb8" + type: GPU + device_info: NVIDIA Corporation Tesla T4 diff --git a/ironic_inspector/plugins/accel_device.py b/ironic_inspector/plugins/accel_device.py new file mode 100644 index 000000000..9b5503d47 --- /dev/null +++ b/ironic_inspector/plugins/accel_device.py @@ -0,0 +1,78 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Gather and distinguish Accelerator PCI devices from inventory.""" + +from oslo_config import cfg +import yaml + +from ironic_inspector.plugins import base +from ironic_inspector import utils + + +CONF = cfg.CONF +LOG = utils.getProcessingLogger(__name__) + + +class AccelDevicesHook(base.ProcessingHook): + """Processing hook for distinguishing accelerator devices.""" + + def __init__(self): + super(AccelDevicesHook, self).__init__() + self._known_devices = {} + with open(CONF.accelerators.known_devices) as f: + self._known_devices = yaml.safe_load(f) + self._validate_datasource() + + def _validate_datasource(self): + # Do a simple check against the data source + if (not self._known_devices or + 'pci_devices' not in self._known_devices): + raise RuntimeError('Could not find pci_devices in the ' + 'configuration data') + if not isinstance(self._known_devices['pci_devices'], list): + raise RuntimeError('pci_devices should contain a list of devices') + for device in self._known_devices['pci_devices']: + if not device.get('vendor_id') or not device.get('device_id'): + raise RuntimeError('one of devices is missing vendor_id or ' + 'device_id') + + def _find_accelerator(self, vendor_id, device_id): + for dev in self._known_devices['pci_devices']: + if (dev['vendor_id'] == vendor_id and + dev['device_id'] == device_id): + return dev + + def before_update(self, introspection_data, node_info, **kwargs): + pci_devices = introspection_data.get('pci_devices', []) + if not pci_devices: + LOG.warning('Unable to distinguish accelerator devices due to no ' + 'PCI devices information was received from the ' + 'ramdisk.') + return + + accelerators = [] + for pci_dev in pci_devices: + dev = self._find_accelerator(pci_dev['vendor_id'], + pci_dev['product_id']) + if dev: + accel = {k: dev[k] for k in dev.keys()} + accel.update(pci_address=pci_dev['bus']) + accelerators.append(accel) + + if accelerators: + node_info.update_properties(accelerators=accelerators) + LOG.info('Found the following accelerator devices: %s', + accelerators) + else: + LOG.info('No known accelerator devices found') diff --git a/ironic_inspector/test/unit/test_plugins_accel_device.py b/ironic_inspector/test/unit/test_plugins_accel_device.py new file mode 100644 index 000000000..db8660bb1 --- /dev/null +++ b/ironic_inspector/test/unit/test_plugins_accel_device.py @@ -0,0 +1,52 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +from ironic_inspector import node_cache +from ironic_inspector.plugins import accel_device +from ironic_inspector.test import base as test_base + + +class TestAccelDevicesHook(test_base.NodeTest): + hook = accel_device.AccelDevicesHook() + + @mock.patch.object(node_cache.NodeInfo, 'update_properties', + autospec=True) + def test_before_update(self, mock_update_props): + self.data['pci_devices'] = [ + {"vendor_id": "10de", "product_id": "1eb8", "class": "1234", + "bus": "0000:01:1f.0", "revision": "1"}, + ] + expected_accels = [{'vendor_id': '10de', 'device_id': '1eb8', + 'type': 'GPU', 'pci_address': '0000:01:1f.0', + 'device_info': 'NVIDIA Corporation Tesla T4'}] + self.hook.before_update(self.data, self.node_info) + mock_update_props.assert_called_once_with(self.node_info, + accelerators=expected_accels) + + @mock.patch.object(node_cache.NodeInfo, 'update_properties', + autospec=True) + def test_before_update_no_pci_info_from_ipa(self, mock_update_props): + self.hook.before_update(self.data, self.node_info) + self.assertFalse(mock_update_props.called) + + @mock.patch.object(node_cache.NodeInfo, 'update_properties', + autospec=True) + def test_before_update_no_match(self, mock_update_props): + self.data['pci_devices'] = [ + {"vendor_id": "1234", "product_id": "1234", "class": "1234", + "bus": "0000:01:1f.0", "revision": "1"}, + ] + self.hook.before_update(self.data, self.node_info) + self.assertFalse(mock_update_props.called) diff --git a/releasenotes/notes/accelerators-2aa4f0cedf359810.yaml b/releasenotes/notes/accelerators-2aa4f0cedf359810.yaml new file mode 100644 index 000000000..2b7a368ab --- /dev/null +++ b/releasenotes/notes/accelerators-2aa4f0cedf359810.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + Adds an ``accelerators`` plugin to identify acclerator devices and update + the bare metal node for future scheduling. The accelerator devices will be + saved to node properties under the key ``accelerators``. Introduces a + configuration option ``[accelerators]known_devices`` to specify a + configuration file which contains required information to identify + accelerator devices, by default it uses the in-tree configuration file + named ``known_accelerators.yaml``. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 862e81790..c1999aa65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,7 @@ keystonemiddleware>=4.18.0 # Apache-2.0 netaddr>=0.7.18 # BSD pbr!=2.1.0,>=2.0.0 # Apache-2.0 pytz>=2013.6 # MIT +PyYAML>=5.3.1 openstacksdk>=0.40.0 # Apache-2.0 oslo.concurrency>=3.26.0 # Apache-2.0 oslo.config>=5.2.0 # Apache-2.0 diff --git a/setup.cfg b/setup.cfg index 4f034699f..18399455c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,6 +36,7 @@ console_scripts = wsgi_scripts = ironic-inspector-api-wsgi = ironic_inspector.cmd.wsgi:initialize_wsgi_app ironic_inspector.hooks.processing = + accelerators = ironic_inspector.plugins.accel_device:AccelDevicesHook scheduler = ironic_inspector.plugins.standard:SchedulerHook validate_interfaces = ironic_inspector.plugins.standard:ValidateInterfacesHook ramdisk_error = ironic_inspector.plugins.standard:RamdiskErrorHook