Add list-vgpu-types action

Change-Id: Ibd927191006152c82216974ac6ba0122aed8fd8d
This commit is contained in:
Aurelien Lourot 2022-03-02 11:18:25 +01:00
parent ffef5fe4bc
commit 7c4fcad4a4
4 changed files with 94 additions and 1 deletions

2
actions.yaml Normal file
View File

@ -0,0 +1,2 @@
list-vgpu-types:
description: List all vGPU types registered by the NVIDIA driver.

View File

@ -25,6 +25,7 @@ from charm_utils import (
is_nvidia_software_to_be_installed,
set_principal_unit_relation_data,
)
from nvidia_utils import list_vgpu_types
class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
@ -46,6 +47,9 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
self.framework.observe(self.on.nova_vgpu_relation_changed,
self._on_nova_vgpu_relation_joined_or_changed)
self.framework.observe(self.on.list_vgpu_types_action,
self._list_vgpu_types_action)
# hash of the last successfully installed NVIDIA vGPU software passed
# as resource to the charm:
self._stored.set_default(last_installed_resource_hash=None)
@ -101,6 +105,13 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
"""
return check_status(self.config, self.services())
def _list_vgpu_types_action(self, event):
"""List all vGPU types registered by the NVIDIA driver.
:type event: ops.charm.ActionEvent
"""
event.set_results({'output': list_vgpu_types()})
if __name__ == '__main__':
main(NovaComputeNvidiaVgpuCharm)

View File

@ -17,6 +17,7 @@
import logging
import os
from pathlib import Path
from charmhelpers.core.hookenv import cached
from charmhelpers.core.kernel import update_initramfs
@ -59,6 +60,44 @@ def disable_nouveau_driver():
update_initramfs()
def list_vgpu_types():
"""Human-readable list of all vGPU types registered by the NVIDIA driver.
:rtype: str
"""
# NOTE(lourot): we are reinventing `mdevctl types` here. Unfortunately
# `mdevctl` is not available on Bionic.
vgpu_types_dirname = 'mdev_supported_types'
found_pci_addr_dirs = []
for root, dirs, files in os.walk('/sys/devices'):
if vgpu_types_dirname in dirs:
# At this point root looks like
# /sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0
found_pci_addr_dirs.append(root)
output_lines = []
for pci_addr_dir in found_pci_addr_dirs:
root = os.path.join(pci_addr_dir, vgpu_types_dirname)
for vgpu_type in sorted(os.listdir(root)):
output_line = vgpu_type
output_line += ', ' + os.path.basename(pci_addr_dir)
output_line += (
', ' + Path(os.path.join(root, vgpu_type, 'name')
).read_text().rstrip())
output_line += (
', ' + Path(os.path.join(root, vgpu_type, 'description')
).read_text().rstrip())
# At this point output_line looks like
# nvidia-256, 0000:41:00.0, GRID RTX6000-1Q, num_heads=4,
# frl_config=60, framebuffer=1024M, max_resolution=5120x2880,
# max_instance=24
output_lines.append(output_line)
return '\n'.join(output_lines)
@cached
def has_nvidia_gpu_hardware():
"""Search for NVIDIA GPU hardware.

View File

@ -15,7 +15,7 @@
import sys
import unittest
from mock import patch
from mock import MagicMock, patch
sys.path.append('src') # noqa
@ -61,3 +61,44 @@ class TestNvidiaUtils(unittest.TestCase):
lspci_parser_mock.return_value.run.return_value = (
self._PCI_DEVICES_LIST_WITHOUT_GPU)
self.assertFalse(nvidia_utils._has_nvidia_gpu_hardware_notcached())
@patch('nvidia_utils.Path')
@patch('nvidia_utils.os.listdir')
@patch('nvidia_utils.os.walk')
def test_list_vgpu_types(self, os_walk_mock, os_listdir_mock, path_mock):
os_walk_mock.return_value = [
('/sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0',
['mdev_supported_types'], []),
('/sys/devices/pci0000:c0/0000:c0:03.1/0000:c1:00.0',
['mdev_supported_types'], []),
]
os_listdir_mock.side_effect = [
['nvidia-256', 'nvidia-257'],
['nvidia-301'],
]
path_mock_obj = MagicMock()
path_mock.return_value = path_mock_obj
path_mock_obj.read_text.side_effect = [
'GRID RTX6000-1Q',
('num_heads=4, frl_config=60, framebuffer=1024M, '
'max_resolution=5120x2880, max_instance=24'),
'GRID RTX6000-2Q',
('num_heads=4, frl_config=60, framebuffer=2048M, '
'max_resolution=7680x4320, max_instance=12'),
'GRID V100-16C',
('num_heads=1, frl_config=60, framebuffer=16384M, '
'max_resolution=4096x2160, max_instance=1'),
]
expected_output = '\n'.join([
('nvidia-256, 0000:41:00.0, GRID RTX6000-1Q, num_heads=4, '
'frl_config=60, framebuffer=1024M, max_resolution=5120x2880, '
'max_instance=24'),
('nvidia-257, 0000:41:00.0, GRID RTX6000-2Q, num_heads=4, '
'frl_config=60, framebuffer=2048M, max_resolution=7680x4320, '
'max_instance=12'),
('nvidia-301, 0000:c1:00.0, GRID V100-16C, num_heads=1, '
'frl_config=60, framebuffer=16384M, max_resolution=4096x2160, '
'max_instance=1'),
])
self.assertEqual(nvidia_utils.list_vgpu_types(), expected_output)