From ff4d0d002a35022df1cb71029ad82ad8f3b327df Mon Sep 17 00:00:00 2001 From: Sylvain Bauza Date: Wed, 21 Jul 2021 11:03:27 +0200 Subject: [PATCH] Rename vgpu options to mdev As a prerequisite for blueprint generic-mdevs we need to rename the existing enabled_vgpu_types options and dynamically generated groups into enabled_mdev_types. There is no upgrade impact for existing users, as the original options are still accepted. NOTE(sbauza): As we have a lot of methods and objects named gpu-ish let's just change what we need here and provide followups for fixing internal tech debt later. Change-Id: Idba094f6366a24965804b88da0bc1b9754549c99 Partially-Implements: blueprint generic-mdevs --- doc/source/admin/virtual-gpu.rst | 10 +-- nova/conf/devices.py | 48 ++++++----- nova/tests/functional/libvirt/test_reshape.py | 4 +- nova/tests/functional/libvirt/test_vgpu.py | 8 +- nova/tests/unit/conf/test_devices.py | 14 ++-- nova/tests/unit/virt/libvirt/test_driver.py | 82 +++++++++---------- nova/virt/libvirt/driver.py | 48 +++++------ .../notes/generic_mdevs-0e1b3ef8385f7fae.yaml | 13 +++ 8 files changed, 122 insertions(+), 105 deletions(-) create mode 100644 releasenotes/notes/generic_mdevs-0e1b3ef8385f7fae.yaml diff --git a/doc/source/admin/virtual-gpu.rst b/doc/source/admin/virtual-gpu.rst index 3c7d62ec9664..5fbdd55adcf6 100644 --- a/doc/source/admin/virtual-gpu.rst +++ b/doc/source/admin/virtual-gpu.rst @@ -33,12 +33,12 @@ Enable GPU types (Compute) #. Specify which specific GPU type(s) the instances would get. - Edit :oslo.config:option:`devices.enabled_vgpu_types`: + Edit :oslo.config:option:`devices.enabled_mdev_types`: .. code-block:: ini [devices] - enabled_vgpu_types = nvidia-35 + enabled_mdev_types = nvidia-35 If you want to support more than a single GPU type, you need to provide a separate configuration section for each device. For example: @@ -46,12 +46,12 @@ Enable GPU types (Compute) .. code-block:: ini [devices] - enabled_vgpu_types = nvidia-35, nvidia-36 + enabled_mdev_types = nvidia-35, nvidia-36 - [vgpu_nvidia-35] + [mdev_nvidia-35] device_addresses = 0000:84:00.0,0000:85:00.0 - [vgpu_nvidia-36] + [mdev_nvidia-36] device_addresses = 0000:86:00.0 where you have to define which physical GPUs are supported per GPU type. diff --git a/nova/conf/devices.py b/nova/conf/devices.py index cae434541cfb..81c7872a2b6b 100644 --- a/nova/conf/devices.py +++ b/nova/conf/devices.py @@ -16,36 +16,39 @@ devices_group = cfg.OptGroup( name='devices', title='physical or virtual device options') -vgpu_opts = [ - cfg.ListOpt('enabled_vgpu_types', +mdev_opts = [ + cfg.ListOpt('enabled_mdev_types', default=[], + deprecated_name='enabled_vgpu_types', help=""" -The vGPU types enabled in the compute node. +The mdev types enabled in the compute node. -Some pGPUs (e.g. NVIDIA GRID K1) support different vGPU types. User can use -this option to specify a list of enabled vGPU types that may be assigned to a +Some hardware (e.g. NVIDIA GRID K1) support different mdev types. User can use +this option to specify a list of enabled mdev types that may be assigned to a guest instance. -If more than one single vGPU type is provided, then for each *vGPU type* an -additional section, ``[vgpu_$(VGPU_TYPE)]``, must be added to the configuration +If more than one single mdev type is provided, then for each *mdev type* an +additional section, ``[mdev_$(MDEV_TYPE)]``, must be added to the configuration file. Each section then **must** be configured with a single configuration option, ``device_addresses``, which should be a list of PCI addresses -corresponding to the physical GPU(s) to assign to this type. +corresponding to the physical GPU(s) or mdev-capable hardware to assign to this +type. If one or more sections are missing (meaning that a specific type is not wanted -to use for at least one physical GPU) or if no device addresses are provided, -then Nova will only use the first type that was provided by -``[devices]/enabled_vgpu_types``. +to use for at least one physical device) or if no device addresses are provided +, then Nova will only use the first type that was provided by +``[devices]/enabled_mdev_types``. If the same PCI address is provided for two different types, nova-compute will return an InvalidLibvirtGPUConfig exception at restart. -An example is as the following:: +As an interim period, old configuration groups named ``[vgpu_$(MDEV_TYPE)]`` +will be accepted. A valid configuration could then be:: [devices] - enabled_vgpu_types = nvidia-35, nvidia-36 + enabled_mdev_types = nvidia-35, nvidia-36 - [vgpu_nvidia-35] + [mdev_nvidia-35] device_addresses = 0000:84:00.0,0000:85:00.0 [vgpu_nvidia-36] @@ -57,7 +60,7 @@ An example is as the following:: def register_opts(conf): conf.register_group(devices_group) - conf.register_opts(vgpu_opts, group=devices_group) + conf.register_opts(mdev_opts, group=devices_group) def register_dynamic_opts(conf): @@ -66,14 +69,15 @@ def register_dynamic_opts(conf): This must be called by the service that wishes to use the options **after** the initial configuration has been loaded. """ - opt = cfg.ListOpt('device_addresses', default=[], - item_type=cfg.types.String()) - # Register the '[vgpu_$(VGPU_TYPE)]/device_addresses' opts, implicitly - # registering the '[vgpu_$(VGPU_TYPE)]' groups in the process - for vgpu_type in conf.devices.enabled_vgpu_types: - conf.register_opt(opt, group='vgpu_%s' % vgpu_type) + # Register the '[mdev_$(MDEV_TYPE)]/device_addresses' opts, implicitly + # registering the '[mdev_$(MDEV_TYPE)]' groups in the process + for mdev_type in conf.devices.enabled_mdev_types: + opt = cfg.ListOpt('device_addresses', default=[], + item_type=cfg.types.String(), + deprecated_group='vgpu_%s' % mdev_type) + conf.register_opt(opt, group='mdev_%s' % mdev_type) def list_opts(): - return {devices_group: vgpu_opts} + return {devices_group: mdev_opts} diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py index 0f9ae2b67067..7a7a3d953dce 100644 --- a/nova/tests/functional/libvirt/test_reshape.py +++ b/nova/tests/functional/libvirt/test_reshape.py @@ -79,7 +79,7 @@ class VGPUReshapeTests(base.ServersTestBase): # start a compute with vgpu support disabled so the driver will # ignore the content of the above HostMdevDeviceInfo - self.flags(enabled_vgpu_types='', group='devices') + self.flags(enabled_mdev_types='', group='devices') hostname = self.start_compute( hostname='compute1', @@ -106,7 +106,7 @@ class VGPUReshapeTests(base.ServersTestBase): # enabled vgpu support self.flags( - enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, + enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, group='devices') # We don't want to restart the compute service or it would call for # a reshape but we still want to accept some vGPU types so we call diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index bbe1502ec10b..d8aeda3b074f 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -132,7 +132,7 @@ class VGPUTests(VGPUTestBase): # Start compute1 supporting only nvidia-11 self.flags( - enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, + enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, group='devices') # for the sake of resizing, we need to patch the two methods below @@ -293,7 +293,7 @@ class VGPUMultipleTypesTests(VGPUTestBase): self.flavor = self._create_flavor(extra_spec=extra_spec) self.flags( - enabled_vgpu_types=[fakelibvirt.NVIDIA_11_VGPU_TYPE, + enabled_mdev_types=[fakelibvirt.NVIDIA_11_VGPU_TYPE, fakelibvirt.NVIDIA_12_VGPU_TYPE], group='devices') # we need to call the below again to ensure the updated @@ -304,8 +304,8 @@ class VGPUMultipleTypesTests(VGPUTestBase): # - 0000:81:01.0 will only support nvidia-12 pgpu1_pci_addr = self.libvirt2pci_address(fakelibvirt.PGPU1_PCI_ADDR) pgpu2_pci_addr = self.libvirt2pci_address(fakelibvirt.PGPU2_PCI_ADDR) - self.flags(device_addresses=[pgpu1_pci_addr], group='vgpu_nvidia-11') - self.flags(device_addresses=[pgpu2_pci_addr], group='vgpu_nvidia-12') + self.flags(device_addresses=[pgpu1_pci_addr], group='mdev_nvidia-11') + self.flags(device_addresses=[pgpu2_pci_addr], group='mdev_nvidia-12') # Prepare traits for later on self._create_trait('CUSTOM_NVIDIA_11') diff --git a/nova/tests/unit/conf/test_devices.py b/nova/tests/unit/conf/test_devices.py index 6de8a97a94b3..24536d9bada5 100644 --- a/nova/tests/unit/conf/test_devices.py +++ b/nova/tests/unit/conf/test_devices.py @@ -20,15 +20,15 @@ CONF = nova.conf.CONF class DevicesConfTestCase(test.NoDBTestCase): def test_register_dynamic_opts(self): - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') - self.assertNotIn('vgpu_nvidia-11', CONF) - self.assertNotIn('vgpu_nvidia-12', CONF) + self.assertNotIn('mdev_nvidia-11', CONF) + self.assertNotIn('mdev_nvidia-12', CONF) nova.conf.devices.register_dynamic_opts(CONF) - self.assertIn('vgpu_nvidia-11', CONF) - self.assertIn('vgpu_nvidia-12', CONF) - self.assertEqual([], getattr(CONF, 'vgpu_nvidia-11').device_addresses) - self.assertEqual([], getattr(CONF, 'vgpu_nvidia-12').device_addresses) + self.assertIn('mdev_nvidia-11', CONF) + self.assertIn('mdev_nvidia-12', CONF) + self.assertEqual([], getattr(CONF, 'mdev_nvidia-11').device_addresses) + self.assertEqual([], getattr(CONF, 'mdev_nvidia-12').device_addresses) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 257e66e89b3b..299a1b6b1d2c 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -20601,7 +20601,7 @@ class TestUpdateProviderTree(test.NoDBTestCase): def _test_update_provider_tree( self, mock_gpu_invs, gpu_invs=None, vpmems=None): if gpu_invs: - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') mock_gpu_invs.return_value = gpu_invs if vpmems: self.driver._vpmems_by_rc = vpmems @@ -20855,7 +20855,7 @@ class TestUpdateProviderTree(test.NoDBTestCase): def test_update_provider_tree_for_vgpu_reshape( self, mock_gpus, mock_get_devs, mock_get_mdev_info): """Tests the VGPU reshape scenario.""" - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') # Let's assume we have two PCI devices each having 4 pGPUs for this # type pci_devices = ['pci_0000_06_00_0', 'pci_0000_07_00_0'] @@ -20987,7 +20987,7 @@ class TestUpdateProviderTree(test.NoDBTestCase): """Tests the VGPU reshape failure scenario where VGPU allocations are not on the root compute node provider as expected. """ - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') # Let's assume we have two PCI devices each having 4 pGPUs for this # type pci_devices = ['pci_0000_06_00_0', 'pci_0000_07_00_0'] @@ -25181,7 +25181,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): self.assertEqual({}, drvr._get_gpu_inventories()) # Now, set a specific GPU type and restart the driver - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) expected = { # the first GPU also has one mdev allocated against it @@ -25206,13 +25206,13 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): @mock.patch('nova.virt.libvirt.driver.LibvirtDriver' '._get_mdev_capable_devices') def test_get_gpu_inventories_with_two_types(self, get_mdev_capable_devs): - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') # we need to call the below again to ensure the updated # 'device_addresses' value is read and the new groups created nova.conf.devices.register_dynamic_opts(CONF) - self.flags(device_addresses=['0000:06:00.0'], group='vgpu_nvidia-11') - self.flags(device_addresses=['0000:07:00.0'], group='vgpu_nvidia-12') + self.flags(device_addresses=['0000:06:00.0'], group='mdev_nvidia-11') + self.flags(device_addresses=['0000:07:00.0'], group='mdev_nvidia-12') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) expected = { # the first GPU supports nvidia-11 and has one mdev with this type @@ -25244,7 +25244,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): self.assertEqual([], drvr._get_supported_vgpu_types()) # Now, provide only one supported vGPU type - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') self.assertEqual(['nvidia-11'], drvr._get_supported_vgpu_types()) # Given we only support one vGPU type, we don't have any map for PCI # devices *yet* @@ -25256,18 +25256,18 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): mock_warning.reset_mock() # Now two types without forgetting to provide the pGPU addresses - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') # we need to call the below again to ensure the updated # 'device_addresses' value is read and the new groups created nova.conf.devices.register_dynamic_opts(CONF) - self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11') self.assertEqual(['nvidia-11'], drvr._get_supported_vgpu_types()) self.assertEqual({}, drvr.pgpu_type_mapping) - msg = ("The vGPU type '%(type)s' was listed in '[devices] " - "enabled_vgpu_types' but no corresponding " - "'[vgpu_%(type)s]' group or " - "'[vgpu_%(type)s] device_addresses' " + msg = ("The mdev type '%(type)s' was listed in '[devices] " + "enabled_mdev_types' but no corresponding " + "'[mdev_%(type)s]' group or " + "'[mdev_%(type)s] device_addresses' " "option was defined. Only the first type '%(ftype)s' " "will be used." % {'type': 'nvidia-12', 'ftype': 'nvidia-11'}) @@ -25276,8 +25276,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): mock_warning.reset_mock() # And now do it correctly ! - self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') - self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12') self.assertEqual(['nvidia-11', 'nvidia-12'], drvr._get_supported_vgpu_types()) self.assertEqual({'0000:84:00.0': 'nvidia-11', @@ -25285,32 +25285,32 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): mock_warning.assert_not_called() def test_get_supported_vgpu_types_with_duplicate_types(self): - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') # we need to call the below again to ensure the updated # 'device_addresses' value is read and the new groups created nova.conf.devices.register_dynamic_opts(CONF) # Provide the same pGPU PCI ID for two different types - self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') - self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-12') + self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11') + self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-12') self.assertRaises(exception.InvalidLibvirtGPUConfig, libvirt_driver.LibvirtDriver, fake.FakeVirtAPI(), False) def test_get_supported_vgpu_types_with_invalid_pci_address(self): - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') # we need to call the below again to ensure the updated # 'device_addresses' value is read and the new groups created nova.conf.devices.register_dynamic_opts(CONF) # Fat-finger the PCI address - self.flags(device_addresses=['whoops'], group='vgpu_nvidia-11') + self.flags(device_addresses=['whoops'], group='mdev_nvidia-11') self.assertRaises(exception.InvalidLibvirtGPUConfig, libvirt_driver.LibvirtDriver, fake.FakeVirtAPI(), False) @mock.patch.object(nova.conf.devices, 'register_dynamic_opts') def test_get_supported_vgpu_types_registering_dynamic_opts(self, rdo): - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) @@ -25328,42 +25328,42 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): self.assertIsNone(drvr._get_vgpu_type_per_pgpu(device)) # BY default, we return the first type if we only support one. - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) self.assertEqual('nvidia-11', drvr._get_vgpu_type_per_pgpu(device)) # Now, make sure we provide the right vGPU type for the device - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') # we need to call the below again to ensure the updated # 'device_addresses' value is read and the new groups created nova.conf.devices.register_dynamic_opts(CONF) - self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') - self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) # the libvirt name pci_0000_84_00_0 matches 0000:84:00.0 self.assertEqual('nvidia-11', drvr._get_vgpu_type_per_pgpu(device)) def test_get_vgpu_type_per_pgpu_with_incorrect_pci_addr(self): - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') # we need to call the below again to ensure the updated # 'device_addresses' value is read and the new groups created nova.conf.devices.register_dynamic_opts(CONF) - self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') - self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) # 'whoops' is not a correct libvirt name corresponding to a PCI address self.assertIsNone(drvr._get_vgpu_type_per_pgpu('whoops')) def test_get_vgpu_type_per_pgpu_with_unconfigured_pgpu(self): - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') # we need to call the below again to ensure the updated # 'device_addresses' value is read and the new groups created nova.conf.devices.register_dynamic_opts(CONF) - self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') - self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) # 0000:86:00.0 wasn't configured self.assertIsNone(drvr._get_vgpu_type_per_pgpu('pci_0000_86_00_0')) @@ -25560,7 +25560,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_existing_mdevs_not_assigned') def test_allocate_mdevs_with_available_mdevs(self, get_unassigned_mdevs): - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') allocations = { uuids.rp1: { 'resources': { @@ -25586,13 +25586,13 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): unallocated_mdevs, get_mdev_capable_devs, privsep_create_mdev): - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') # we need to call the below again to ensure the updated # 'device_addresses' value is read and the new groups created nova.conf.devices.register_dynamic_opts(CONF) - self.flags(device_addresses=['0000:06:00.0'], group='vgpu_nvidia-11') - self.flags(device_addresses=['0000:07:00.0'], group='vgpu_nvidia-12') + self.flags(device_addresses=['0000:06:00.0'], group='mdev_nvidia-11') + self.flags(device_addresses=['0000:07:00.0'], group='mdev_nvidia-12') allocations = { uuids.rp1: { 'resources': { @@ -25633,7 +25633,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): unallocated_mdevs, get_mdev_capable_devs, privsep_create_mdev): - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') allocations = { uuids.rp1: { 'resources': { @@ -25743,13 +25743,13 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): def test_recreate_mediated_device_on_init_host( self, get_all_assigned_mdevs, exists, mock_get_mdev_info, get_mdev_capable_devs, privsep_create_mdev): - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') # we need to call the below again to ensure the updated # 'device_addresses' value is read and the new groups created nova.conf.devices.register_dynamic_opts(CONF) - self.flags(device_addresses=['0000:06:00.0'], group='vgpu_nvidia-11') - self.flags(device_addresses=['0000:07:00.0'], group='vgpu_nvidia-12') + self.flags(device_addresses=['0000:06:00.0'], group='mdev_nvidia-11') + self.flags(device_addresses=['0000:07:00.0'], group='mdev_nvidia-12') get_all_assigned_mdevs.return_value = {uuids.mdev1: uuids.inst1, uuids.mdev2: uuids.inst2} @@ -25793,7 +25793,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): '_get_all_assigned_mediated_devices') def test_recreate_mediated_device_on_init_host_with_wrong_config( self, get_all_assigned_mdevs, exists, mock_get_mdev_info): - self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'], group='devices') get_all_assigned_mdevs.return_value = {uuids.mdev1: uuids.inst1} # We pretend this mdev doesn't exist hence it needs recreation diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 65f4e44f2731..fc4d2350af51 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -7382,26 +7382,26 @@ class LibvirtDriver(driver.ComputeDriver): return total def _get_supported_vgpu_types(self): - if not CONF.devices.enabled_vgpu_types: + if not CONF.devices.enabled_mdev_types: return [] # Make sure we register all the types as the compute service could # be calling this method before init_host() - if len(CONF.devices.enabled_vgpu_types) > 1: + if len(CONF.devices.enabled_mdev_types) > 1: nova.conf.devices.register_dynamic_opts(CONF) - for vgpu_type in CONF.devices.enabled_vgpu_types: - group = getattr(CONF, 'vgpu_%s' % vgpu_type, None) + for vgpu_type in CONF.devices.enabled_mdev_types: + group = getattr(CONF, 'mdev_%s' % vgpu_type, None) if group is None or not group.device_addresses: - first_type = CONF.devices.enabled_vgpu_types[0] - if len(CONF.devices.enabled_vgpu_types) > 1: + first_type = CONF.devices.enabled_mdev_types[0] + if len(CONF.devices.enabled_mdev_types) > 1: # Only provide the warning if the operator provided more # than one type as it's not needed to provide groups # if you only use one vGPU type. - msg = ("The vGPU type '%(type)s' was listed in '[devices] " - "enabled_vgpu_types' but no corresponding " - "'[vgpu_%(type)s]' group or " - "'[vgpu_%(type)s] device_addresses' " + msg = ("The mdev type '%(type)s' was listed in '[devices] " + "enabled_mdev_types' but no corresponding " + "'[mdev_%(type)s]' group or " + "'[mdev_%(type)s] device_addresses' " "option was defined. Only the first type " "'%(ftype)s' will be used." % {'type': vgpu_type, 'ftype': first_type}) @@ -7426,7 +7426,7 @@ class LibvirtDriver(driver.ComputeDriver): reason="incorrect PCI address: %s" % device_address ) self.pgpu_type_mapping[device_address] = vgpu_type - return CONF.devices.enabled_vgpu_types + return CONF.devices.enabled_mdev_types def _get_vgpu_type_per_pgpu(self, device_address): """Provides the vGPU type the pGPU supports. @@ -7464,19 +7464,19 @@ class LibvirtDriver(driver.ComputeDriver): # in case we can't find a specific pGPU return - def _count_mediated_devices(self, enabled_vgpu_types): + def _count_mediated_devices(self, enabled_mdev_types): """Counts the sysfs objects (handles) that represent a mediated device - and filtered by $enabled_vgpu_types. + and filtered by $enabled_mdev_types. Those handles can be in use by a libvirt guest or not. - :param enabled_vgpu_types: list of enabled VGPU types on this host + :param enabled_mdev_types: list of enabled VGPU types on this host :returns: dict, keyed by parent GPU libvirt PCI device ID, of number of mdev device handles for that GPU """ counts_per_parent: ty.Dict[str, int] = collections.defaultdict(int) - mediated_devices = self._get_mediated_devices(types=enabled_vgpu_types) + mediated_devices = self._get_mediated_devices(types=enabled_mdev_types) for mdev in mediated_devices: parent_vgpu_type = self._get_vgpu_type_per_pgpu(mdev['parent']) if mdev['type'] != parent_vgpu_type: @@ -7487,16 +7487,16 @@ class LibvirtDriver(driver.ComputeDriver): counts_per_parent[mdev['parent']] += 1 return counts_per_parent - def _count_mdev_capable_devices(self, enabled_vgpu_types): + def _count_mdev_capable_devices(self, enabled_mdev_types): """Counts the mdev-capable devices on this host filtered by - $enabled_vgpu_types. + $enabled_mdev_types. - :param enabled_vgpu_types: list of enabled VGPU types on this host + :param enabled_mdev_types: list of enabled VGPU types on this host :returns: dict, keyed by device name, to an integer count of available instances of each type per device """ mdev_capable_devices = self._get_mdev_capable_devices( - types=enabled_vgpu_types) + types=enabled_mdev_types) counts_per_dev: ty.Dict[str, int] = collections.defaultdict(int) for dev in mdev_capable_devices: # dev_id is the libvirt name for the PCI device, @@ -7516,7 +7516,7 @@ class LibvirtDriver(driver.ComputeDriver): def _get_gpu_inventories(self): """Returns the inventories for each physical GPU for a specific type - supported by the enabled_vgpu_types CONF option. + supported by the enabled_mdev_types CONF option. :returns: dict, keyed by libvirt PCI name, of dicts like: {'pci_0000_84_00_0': @@ -7531,16 +7531,16 @@ class LibvirtDriver(driver.ComputeDriver): """ # Bail out early if operator doesn't care about providing vGPUs - enabled_vgpu_types = self.supported_vgpu_types - if not enabled_vgpu_types: + enabled_mdev_types = self.supported_vgpu_types + if not enabled_mdev_types: return {} inventories = {} - count_per_parent = self._count_mediated_devices(enabled_vgpu_types) + count_per_parent = self._count_mediated_devices(enabled_mdev_types) for dev_name, count in count_per_parent.items(): inventories[dev_name] = {'total': count} # Filter how many available mdevs we can create for all the supported # types. - count_per_dev = self._count_mdev_capable_devices(enabled_vgpu_types) + count_per_dev = self._count_mdev_capable_devices(enabled_mdev_types) # Combine the counts into the dict that we return to the caller. for dev_name, count in count_per_dev.items(): inv_per_parent = inventories.setdefault( diff --git a/releasenotes/notes/generic_mdevs-0e1b3ef8385f7fae.yaml b/releasenotes/notes/generic_mdevs-0e1b3ef8385f7fae.yaml new file mode 100644 index 000000000000..ecd8bdbcf0ea --- /dev/null +++ b/releasenotes/notes/generic_mdevs-0e1b3ef8385f7fae.yaml @@ -0,0 +1,13 @@ +--- +deprecations: + - | + The existing config options in the ``[devices]`` group for managing virtual + GPUs are now renamed in order to be more generic since the mediated devices + framework from the linux kernel can support other devices: + + - ``enabled_vgpu_types`` is now deprecated in favour of + ``enabled_mdev_types`` + - Dynamic configuration groups called ``[vgpu_*]`` are now deprecated in + favour of ``[mdev_*]`` + + Support for the deprecated options will be removed in a future release.