From 7c4fcad4a4b1afbe367d0b1980658fd882087547 Mon Sep 17 00:00:00 2001 From: Aurelien Lourot Date: Wed, 2 Mar 2022 11:18:25 +0100 Subject: [PATCH] Add list-vgpu-types action Change-Id: Ibd927191006152c82216974ac6ba0122aed8fd8d --- actions.yaml | 2 ++ src/charm.py | 11 +++++++++ src/nvidia_utils.py | 39 ++++++++++++++++++++++++++++++ unit_tests/test_nvidia_utils.py | 43 ++++++++++++++++++++++++++++++++- 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 actions.yaml diff --git a/actions.yaml b/actions.yaml new file mode 100644 index 0000000..872c78f --- /dev/null +++ b/actions.yaml @@ -0,0 +1,2 @@ +list-vgpu-types: + description: List all vGPU types registered by the NVIDIA driver. diff --git a/src/charm.py b/src/charm.py index 3108ca2..b3fde2a 100755 --- a/src/charm.py +++ b/src/charm.py @@ -25,6 +25,7 @@ from charm_utils import ( is_nvidia_software_to_be_installed, set_principal_unit_relation_data, ) +from nvidia_utils import list_vgpu_types class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm): @@ -46,6 +47,9 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm): self.framework.observe(self.on.nova_vgpu_relation_changed, self._on_nova_vgpu_relation_joined_or_changed) + self.framework.observe(self.on.list_vgpu_types_action, + self._list_vgpu_types_action) + # hash of the last successfully installed NVIDIA vGPU software passed # as resource to the charm: self._stored.set_default(last_installed_resource_hash=None) @@ -101,6 +105,13 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm): """ return check_status(self.config, self.services()) + def _list_vgpu_types_action(self, event): + """List all vGPU types registered by the NVIDIA driver. + + :type event: ops.charm.ActionEvent + """ + event.set_results({'output': list_vgpu_types()}) + if __name__ == '__main__': main(NovaComputeNvidiaVgpuCharm) diff --git a/src/nvidia_utils.py b/src/nvidia_utils.py index 2ad5c77..40df85d 100644 --- a/src/nvidia_utils.py +++ b/src/nvidia_utils.py @@ -17,6 +17,7 @@ import logging import os +from pathlib import Path from charmhelpers.core.hookenv import cached from charmhelpers.core.kernel import update_initramfs @@ -59,6 +60,44 @@ def disable_nouveau_driver(): update_initramfs() +def list_vgpu_types(): + """Human-readable list of all vGPU types registered by the NVIDIA driver. + + :rtype: str + """ + # NOTE(lourot): we are reinventing `mdevctl types` here. Unfortunately + # `mdevctl` is not available on Bionic. + + vgpu_types_dirname = 'mdev_supported_types' + found_pci_addr_dirs = [] + for root, dirs, files in os.walk('/sys/devices'): + if vgpu_types_dirname in dirs: + # At this point root looks like + # /sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0 + found_pci_addr_dirs.append(root) + + output_lines = [] + for pci_addr_dir in found_pci_addr_dirs: + root = os.path.join(pci_addr_dir, vgpu_types_dirname) + for vgpu_type in sorted(os.listdir(root)): + output_line = vgpu_type + output_line += ', ' + os.path.basename(pci_addr_dir) + output_line += ( + ', ' + Path(os.path.join(root, vgpu_type, 'name') + ).read_text().rstrip()) + output_line += ( + ', ' + Path(os.path.join(root, vgpu_type, 'description') + ).read_text().rstrip()) + + # At this point output_line looks like + # nvidia-256, 0000:41:00.0, GRID RTX6000-1Q, num_heads=4, + # frl_config=60, framebuffer=1024M, max_resolution=5120x2880, + # max_instance=24 + output_lines.append(output_line) + + return '\n'.join(output_lines) + + @cached def has_nvidia_gpu_hardware(): """Search for NVIDIA GPU hardware. diff --git a/unit_tests/test_nvidia_utils.py b/unit_tests/test_nvidia_utils.py index e6a64c0..54ff9db 100644 --- a/unit_tests/test_nvidia_utils.py +++ b/unit_tests/test_nvidia_utils.py @@ -15,7 +15,7 @@ import sys import unittest -from mock import patch +from mock import MagicMock, patch sys.path.append('src') # noqa @@ -61,3 +61,44 @@ class TestNvidiaUtils(unittest.TestCase): lspci_parser_mock.return_value.run.return_value = ( self._PCI_DEVICES_LIST_WITHOUT_GPU) self.assertFalse(nvidia_utils._has_nvidia_gpu_hardware_notcached()) + + @patch('nvidia_utils.Path') + @patch('nvidia_utils.os.listdir') + @patch('nvidia_utils.os.walk') + def test_list_vgpu_types(self, os_walk_mock, os_listdir_mock, path_mock): + os_walk_mock.return_value = [ + ('/sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0', + ['mdev_supported_types'], []), + ('/sys/devices/pci0000:c0/0000:c0:03.1/0000:c1:00.0', + ['mdev_supported_types'], []), + ] + os_listdir_mock.side_effect = [ + ['nvidia-256', 'nvidia-257'], + ['nvidia-301'], + ] + path_mock_obj = MagicMock() + path_mock.return_value = path_mock_obj + path_mock_obj.read_text.side_effect = [ + 'GRID RTX6000-1Q', + ('num_heads=4, frl_config=60, framebuffer=1024M, ' + 'max_resolution=5120x2880, max_instance=24'), + 'GRID RTX6000-2Q', + ('num_heads=4, frl_config=60, framebuffer=2048M, ' + 'max_resolution=7680x4320, max_instance=12'), + 'GRID V100-16C', + ('num_heads=1, frl_config=60, framebuffer=16384M, ' + 'max_resolution=4096x2160, max_instance=1'), + ] + + expected_output = '\n'.join([ + ('nvidia-256, 0000:41:00.0, GRID RTX6000-1Q, num_heads=4, ' + 'frl_config=60, framebuffer=1024M, max_resolution=5120x2880, ' + 'max_instance=24'), + ('nvidia-257, 0000:41:00.0, GRID RTX6000-2Q, num_heads=4, ' + 'frl_config=60, framebuffer=2048M, max_resolution=7680x4320, ' + 'max_instance=12'), + ('nvidia-301, 0000:c1:00.0, GRID V100-16C, num_heads=1, ' + 'frl_config=60, framebuffer=16384M, max_resolution=4096x2160, ' + 'max_instance=1'), + ]) + self.assertEqual(nvidia_utils.list_vgpu_types(), expected_output)