From 5b5cbc64f9b4b024ec6686ed05e8b9b887d64ca1 Mon Sep 17 00:00:00 2001 From: Sylvain Bauza Date: Mon, 23 Mar 2020 18:19:22 +0100 Subject: [PATCH] Support different vGPU types per pGPU It's now possible to have a different vGPU type for each pGPU. By modifying the config, you can say which PCI device (ie. a pGPU) should use a specific vGPU type. For upgrades, the behaviour from Train won't be changed since we only use the first type if we don't have the dynamic options (so operators don't need to change nova.conf before upgrading). Implements: blueprint vgpu-multiple-types Change-Id: I46f0a76811142888db2bbc66cc3fde04ff890c01 --- doc/source/admin/virtual-gpu.rst | 81 +++++- nova/compute/manager.py | 2 + nova/conf/devices.py | 44 +++- nova/exception.py | 4 + nova/tests/functional/libvirt/test_reshape.py | 14 +- nova/tests/unit/conf/test_devices.py | 34 +++ nova/tests/unit/conf_fixture.py | 2 + nova/tests/unit/virt/libvirt/test_driver.py | 242 +++++++++++++++++- nova/virt/libvirt/driver.py | 161 +++++++++--- .../vgpu-multiple-types-2b1ded7d1cc28880.yaml | 8 + 10 files changed, 528 insertions(+), 64 deletions(-) create mode 100644 nova/tests/unit/conf/test_devices.py create mode 100644 releasenotes/notes/vgpu-multiple-types-2b1ded7d1cc28880.yaml diff --git a/doc/source/admin/virtual-gpu.rst b/doc/source/admin/virtual-gpu.rst index 680f48b5a66e..b4de23da8a47 100644 --- a/doc/source/admin/virtual-gpu.rst +++ b/doc/source/admin/virtual-gpu.rst @@ -35,15 +35,33 @@ Enable GPU types (Compute) [devices] enabled_vgpu_types = nvidia-35 - .. note:: + If you want to support more than a single GPU type, you need to provide a + separate configuration section for each device. For example: - As of the Queens release, Nova only supports a single type. If more - than one vGPU type is specified (as a comma-separated list), only the - first one will be used. + .. code-block:: ini + + [devices] + enabled_vgpu_types = nvidia-35, nvidia-36 + + [vgpu_nvidia-35] + device_addresses = 0000:84:00.0,0000:85:00.0 + + [vgpu_nvidia-36] + device_addresses = 0000:86:00.0 + + where you have to define which physical GPUs are supported per GPU type. + + If the same PCI address is provided for two different types, nova-compute + will refuse to start and issue a specific error in the logs. To know which specific type(s) to mention, please refer to `How to discover a GPU type`_. + .. versionchanged:: 21.0.0 + + Supporting multiple GPU types is only supported by the Ussuri release and + later versions. + #. Restart the ``nova-compute`` service. @@ -269,6 +287,59 @@ OpenStackClient. For details on specific commands, see its documentation. physical GPU having the PCI ID ``0000:85:00.0``. +(Optional) Provide custom traits for multiple GPU types +------------------------------------------------------- + +Since operators want to support different GPU types per compute, it would be +nice to have flavors asking for a specific GPU type. This is now possible +using custom traits by decorating child Resource Providers that correspond +to physical GPUs. + +.. note:: + + Possible improvements in a future release could consist of providing + automatic tagging of Resource Providers with standard traits corresponding + to versioned mapping of public GPU types. For the moment, this has to be + done manually. + +#. Get the list of resource providers + + See `Checking allocations and inventories for virtual GPUs`_ first for getting + the list of Resource Providers that support a ``VGPU`` resource class. + +#. Define custom traits that will correspond for each to a GPU type + + .. code-block:: console + + $ openstack --os-placement-api-version 1.6 trait create CUSTOM_NVIDIA_11 + + In this example, we ask to create a custom trait named ``CUSTOM_NVIDIA_11``. + +#. Add the corresponding trait to the Resource Provider matching the GPU + + .. code-block:: console + + $ openstack --os-placement-api-version 1.6 resource provider trait set \ + --trait CUSTOM_NVIDIA_11 e2f8607b-0683-4141-a8af-f5e20682e28c + + In this case, the trait ``CUSTOM_NVIDIA_11`` will be added to the Resource + Provider with the UUID ``e2f8607b-0683-4141-a8af-f5e20682e28c`` that + corresponds to the PCI address ``0000:85:00:0`` as shown above. + +#. Amend the flavor to add a requested trait + + .. code-block:: console + + $ openstack flavor set --property trait:CUSTOM_NVIDIA_11=required vgpu_1 + + In this example, we add the ``CUSTOM_NVIDIA_11`` trait as a required + information for the ``vgpu_1`` flavor we created earlier. + + This will allow the Placement service to only return the Resource Providers + matching this trait so only the GPUs that were decorated with will be checked + for this flavor. + + Caveats ------- @@ -324,6 +395,8 @@ For XenServer: resize. If you want to migrate an instance, make sure to rebuild it after the migration. +* Multiple GPU types per compute is not supported by the XenServer driver. + .. [#] https://bugs.launchpad.net/nova/+bug/1762688 .. Links diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 8be0f2db288d..d8380c763c05 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1384,6 +1384,8 @@ class ComputeManager(manager.Manager): whitelist.Whitelist(CONF.pci.passthrough_whitelist) nova.conf.neutron.register_dynamic_opts(CONF) + # Even if only libvirt uses them, make it available for all drivers + nova.conf.devices.register_dynamic_opts(CONF) # Override the number of concurrent disk operations allowed if the # user has specified a limit. diff --git a/nova/conf/devices.py b/nova/conf/devices.py index 9dea1fdf3a8c..cae434541cfb 100644 --- a/nova/conf/devices.py +++ b/nova/conf/devices.py @@ -24,12 +24,33 @@ The vGPU types enabled in the compute node. Some pGPUs (e.g. NVIDIA GRID K1) support different vGPU types. User can use this option to specify a list of enabled vGPU types that may be assigned to a -guest instance. But please note that Nova only supports a single type in the -Queens release. If more than one vGPU type is specified (as a comma-separated -list), only the first one will be used. An example is as the following:: +guest instance. + +If more than one single vGPU type is provided, then for each *vGPU type* an +additional section, ``[vgpu_$(VGPU_TYPE)]``, must be added to the configuration +file. Each section then **must** be configured with a single configuration +option, ``device_addresses``, which should be a list of PCI addresses +corresponding to the physical GPU(s) to assign to this type. + +If one or more sections are missing (meaning that a specific type is not wanted +to use for at least one physical GPU) or if no device addresses are provided, +then Nova will only use the first type that was provided by +``[devices]/enabled_vgpu_types``. + +If the same PCI address is provided for two different types, nova-compute will +return an InvalidLibvirtGPUConfig exception at restart. + +An example is as the following:: [devices] - enabled_vgpu_types = GRID K100,Intel GVT-g,MxGPU.2,nvidia-11 + enabled_vgpu_types = nvidia-35, nvidia-36 + + [vgpu_nvidia-35] + device_addresses = 0000:84:00.0,0000:85:00.0 + + [vgpu_nvidia-36] + device_addresses = 0000:86:00.0 + """) ] @@ -39,5 +60,20 @@ def register_opts(conf): conf.register_opts(vgpu_opts, group=devices_group) +def register_dynamic_opts(conf): + """Register dynamically-generated options and groups. + + This must be called by the service that wishes to use the options **after** + the initial configuration has been loaded. + """ + opt = cfg.ListOpt('device_addresses', default=[], + item_type=cfg.types.String()) + + # Register the '[vgpu_$(VGPU_TYPE)]/device_addresses' opts, implicitly + # registering the '[vgpu_$(VGPU_TYPE)]' groups in the process + for vgpu_type in conf.devices.enabled_vgpu_types: + conf.register_opt(opt, group='vgpu_%s' % vgpu_type) + + def list_opts(): return {devices_group: vgpu_opts} diff --git a/nova/exception.py b/nova/exception.py index a5f16b85fc85..5e8669bae18b 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2297,3 +2297,7 @@ class DeviceProfileError(NovaException): class AcceleratorRequestOpFailed(NovaException): msg_fmt = _("Failed to %(op)s accelerator requests: %(msg)s") + + +class InvalidLibvirtGPUConfig(NovaException): + msg_fmt = _('Invalid configuration for GPU devices: %(reason)s') diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py index c27d9a8460f2..f495c1fcd6cb 100644 --- a/nova/tests/functional/libvirt/test_reshape.py +++ b/nova/tests/functional/libvirt/test_reshape.py @@ -87,6 +87,16 @@ class VGPUReshapeTests(base.ServersTestBase): '/resource_providers/%s/inventories' % compute_rp_uuid, inventories) + # enabled vgpu support + self.flags( + enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, + group='devices') + # We don't want to restart the compute service or it would call for + # a reshape but we still want to accept some vGPU types so we call + # directly the needed method + self.compute.driver.supported_vgpu_types = ( + self.compute.driver._get_supported_vgpu_types()) + # now we boot two servers with vgpu extra_spec = {"resources:VGPU": 1} flavor_id = self._create_flavor(extra_spec=extra_spec) @@ -139,10 +149,6 @@ class VGPUReshapeTests(base.ServersTestBase): {'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2, 'VGPU': 1}, allocations[compute_rp_uuid]['resources']) - # enabled vgpu support - self.flags( - enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, - group='devices') # restart compute which will trigger a reshape self.compute = self.restart_compute_service(self.compute) diff --git a/nova/tests/unit/conf/test_devices.py b/nova/tests/unit/conf/test_devices.py new file mode 100644 index 000000000000..6de8a97a94b3 --- /dev/null +++ b/nova/tests/unit/conf/test_devices.py @@ -0,0 +1,34 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import nova.conf +from nova import test + + +CONF = nova.conf.CONF + + +class DevicesConfTestCase(test.NoDBTestCase): + + def test_register_dynamic_opts(self): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + + self.assertNotIn('vgpu_nvidia-11', CONF) + self.assertNotIn('vgpu_nvidia-12', CONF) + + nova.conf.devices.register_dynamic_opts(CONF) + + self.assertIn('vgpu_nvidia-11', CONF) + self.assertIn('vgpu_nvidia-12', CONF) + self.assertEqual([], getattr(CONF, 'vgpu_nvidia-11').device_addresses) + self.assertEqual([], getattr(CONF, 'vgpu_nvidia-12').device_addresses) diff --git a/nova/tests/unit/conf_fixture.py b/nova/tests/unit/conf_fixture.py index b02d73b39e77..ac14c135e657 100644 --- a/nova/tests/unit/conf_fixture.py +++ b/nova/tests/unit/conf_fixture.py @@ -17,6 +17,7 @@ from oslo_config import fixture as config_fixture from oslo_policy import opts as policy_opts +from nova.conf import devices from nova.conf import neutron from nova.conf import paths from nova import config @@ -64,3 +65,4 @@ class ConfFixture(config_fixture.Config): init_rpc=False) policy_opts.set_defaults(self.conf) neutron.register_dynamic_opts(self.conf) + devices.register_dynamic_opts(self.conf) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index eeac3468e96c..e2902fc899a1 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -23264,7 +23264,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): '._get_mediated_devices') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver' '._get_mdev_capable_devices') - def test_get_gpu_inventories(self, get_mdev_capable_devs, + def _test_get_gpu_inventories(self, drvr, expected, expected_types, + get_mdev_capable_devs, get_mediated_devices): get_mdev_capable_devs.return_value = [ {"dev_id": "pci_0000_06_00_0", @@ -23279,6 +23280,9 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): "types": {'nvidia-11': {'availableInstances': 7, 'name': 'GRID M60-0B', 'deviceAPI': 'vfio-pci'}, + 'nvidia-12': {'availableInstances': 10, + 'name': 'GRID M60-8Q', + 'deviceAPI': 'vfio-pci'}, } }, ] @@ -23292,13 +23296,19 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): 'parent': "pci_0000_07_00_0", 'type': 'nvidia-11', 'iommu_group': 1}] - drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + self.assertEqual(expected, drvr._get_gpu_inventories()) + get_mdev_capable_devs.assert_called_once_with(types=expected_types) + get_mediated_devices.assert_called_once_with(types=expected_types) + + def test_get_gpu_inventories_with_a_single_type(self): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) # If the operator doesn't provide GPU types self.assertEqual({}, drvr._get_gpu_inventories()) - # Now, set a specific GPU type + # Now, set a specific GPU type and restart the driver self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) expected = { # the first GPU also has one mdev allocated against it 'pci_0000_06_00_0': {'total': 15 + 1, @@ -23317,9 +23327,158 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): 'allocation_ratio': 1.0, }, } - self.assertEqual(expected, drvr._get_gpu_inventories()) - get_mdev_capable_devs.assert_called_once_with(types=['nvidia-11']) - get_mediated_devices.assert_called_once_with(types=['nvidia-11']) + self._test_get_gpu_inventories(drvr, expected, ['nvidia-11']) + + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver' + '._get_mdev_capable_devices') + def test_get_gpu_inventories_with_two_types(self, get_mdev_capable_devs): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:06:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:07:00.0'], group='vgpu_nvidia-12') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + expected = { + # the first GPU supports nvidia-11 and has one mdev with this type + 'pci_0000_06_00_0': {'total': 15 + 1, + 'max_unit': 15 + 1, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + # the second GPU supports nvidia-12 but the existing mdev is not + # using this type, so we only count the availableInstances value + # for nvidia-12. + 'pci_0000_07_00_0': {'total': 10, + 'max_unit': 10, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + } + self._test_get_gpu_inventories(drvr, expected, ['nvidia-11', + 'nvidia-12']) + + @mock.patch.object(libvirt_driver.LOG, 'warning') + def test_get_supported_vgpu_types(self, mock_warning): + # Verify that by default we don't support vGPU types + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + self.assertEqual([], drvr._get_supported_vgpu_types()) + + # Now, provide only one supported vGPU type + self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.assertEqual(['nvidia-11'], drvr._get_supported_vgpu_types()) + # Given we only support one vGPU type, we don't have any map for PCI + # devices *yet* + self.assertEqual({}, drvr.pgpu_type_mapping) + # Since the operator wanted to only support one type, it's fine to not + # provide config groups + mock_warning.assert_not_called() + # For further checking + mock_warning.reset_mock() + + # Now two types without forgetting to provide the pGPU addresses + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.assertEqual(['nvidia-11'], drvr._get_supported_vgpu_types()) + self.assertEqual({}, drvr.pgpu_type_mapping) + msg = ("The vGPU type '%(type)s' was listed in '[devices] " + "enabled_vgpu_types' but no corresponding " + "'[vgpu_%(type)s]' group or " + "'[vgpu_%(type)s] device_addresses' " + "option was defined. Only the first type '%(ftype)s' " + "will be used." % {'type': 'nvidia-12', + 'ftype': 'nvidia-11'}) + mock_warning.assert_called_once_with(msg) + # For further checking + mock_warning.reset_mock() + + # And now do it correctly ! + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + self.assertEqual(['nvidia-11', 'nvidia-12'], + drvr._get_supported_vgpu_types()) + self.assertEqual({'0000:84:00.0': 'nvidia-11', + '0000:85:00.0': 'nvidia-12'}, drvr.pgpu_type_mapping) + mock_warning.assert_not_called() + + def test_get_supported_vgpu_types_with_duplicate_types(self): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + # Provide the same pGPU PCI ID for two different types + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-12') + self.assertRaises(exception.InvalidLibvirtGPUConfig, + libvirt_driver.LibvirtDriver, + fake.FakeVirtAPI(), False) + + def test_get_supported_vgpu_types_with_invalid_pci_address(self): + self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + # Fat-finger the PCI address + self.flags(device_addresses=['whoops'], group='vgpu_nvidia-11') + self.assertRaises(exception.InvalidLibvirtGPUConfig, + libvirt_driver.LibvirtDriver, + fake.FakeVirtAPI(), False) + + def test_get_vgpu_type_per_pgpu(self): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + device = 'pci_0000_84_00_0' + self.assertIsNone(drvr._get_vgpu_type_per_pgpu(device)) + + # BY default, we return the first type if we only support one. + self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + self.assertEqual('nvidia-11', drvr._get_vgpu_type_per_pgpu(device)) + + # Now, make sure we provide the right vGPU type for the device + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + # the libvirt name pci_0000_84_00_0 matches 0000:84:00.0 + self.assertEqual('nvidia-11', drvr._get_vgpu_type_per_pgpu(device)) + + def test_get_vgpu_type_per_pgpu_with_incorrect_pci_addr(self): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + # 'whoops' is not a correct libvirt name corresponding to a PCI address + self.assertIsNone(drvr._get_vgpu_type_per_pgpu('whoops')) + + def test_get_vgpu_type_per_pgpu_with_unconfigured_pgpu(self): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + # 0000:86:00.0 wasn't configured + self.assertIsNone(drvr._get_vgpu_type_per_pgpu('pci_0000_86_00_0')) @mock.patch.object(host.Host, 'device_lookup_by_name') @mock.patch.object(host.Host, 'list_mdev_capable_devices') @@ -23517,7 +23676,13 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): unallocated_mdevs, get_mdev_capable_devs, privsep_create_mdev): - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:06:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:07:00.0'], group='vgpu_nvidia-12') allocations = { uuids.rp1: { 'resources': { @@ -23529,7 +23694,12 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): get_mdev_capable_devs.return_value = [ {"dev_id": "pci_0000_06_00_0", "vendor_id": 0x10de, - "types": {'nvidia-11': {'availableInstances': 16, + # This pGPU can support both types but the operator only wanted + # to use nvidia-11 for it. + "types": {'nvidia-10': {'availableInstances': 16, + 'name': 'GRID M60-8Q', + 'deviceAPI': 'vfio-pci'}, + 'nvidia-11': {'availableInstances': 16, 'name': 'GRID M60-0B', 'deviceAPI': 'vfio-pci'}, } @@ -23610,11 +23780,13 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): "pGPU device name %(name)s can't be guessed from the ProviderTree " "roots %(roots)s", {'name': 'oops_I_did_it_again', 'roots': 'cn'}) + @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_vgpu_type_per_pgpu') @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_mediated_devices') @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_all_assigned_mediated_devices') def test_get_existing_mdevs_not_assigned(self, get_all_assigned_mdevs, - get_mediated_devices): + get_mediated_devices, + get_vgpu_type_per_pgpu): # mdev2 is assigned to instance1 get_all_assigned_mdevs.return_value = {uuids.mdev2: uuids.inst1} # there is a total of 2 mdevs, mdev1 and mdev2 @@ -23627,10 +23799,22 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): 'uuid': uuids.mdev2, 'parent': "pci_some", 'type': 'nvidia-11', - 'iommu_group': 1}] + 'iommu_group': 1}, + {'dev_id': 'mdev_some_uuid3', + 'uuid': uuids.mdev3, + 'parent': "pci_some", + 'type': 'nvidia-12', + 'iommu_group': 1}, + ] + + def _fake_get_vgpu_type_per_pgpu(parent_addr): + # Always return the same vGPU type so we avoid mdev3 + return 'nvidia-11' + get_vgpu_type_per_pgpu.side_effect = _fake_get_vgpu_type_per_pgpu drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - # Since mdev2 is assigned to inst1, only mdev1 is available + # Since mdev3 type is not supported and mdev2 is assigned to inst1, + # only mdev1 is available self.assertEqual(set([uuids.mdev1]), drvr._get_existing_mdevs_not_assigned(parent=None)) @@ -23647,7 +23831,13 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): def test_recreate_mediated_device_on_init_host( self, get_all_assigned_mdevs, exists, mock_get_mdev_info, get_mdev_capable_devs, privsep_create_mdev): - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:06:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:07:00.0'], group='vgpu_nvidia-12') get_all_assigned_mdevs.return_value = {uuids.mdev1: uuids.inst1, uuids.mdev2: uuids.inst2} @@ -23664,7 +23854,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): exists.side_effect = _exists mock_get_mdev_info.side_effect = [ {"dev_id": "mdev_fake", - "uuid": uuids.mdev1, + "uuid": uuids.mdev2, "parent": "pci_0000_06_00_0", "type": "nvidia-11", "iommu_group": 12 @@ -23680,9 +23870,35 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) drvr.init_host(host='foo') + # Only mdev2 will be recreated as mdev1 already exists. privsep_create_mdev.assert_called_once_with( "0000:06:00.0", 'nvidia-11', uuid=uuids.mdev2) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + '_get_mediated_device_information') + @mock.patch.object(os.path, 'exists') + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_get_all_assigned_mediated_devices') + def test_recreate_mediated_device_on_init_host_with_wrong_config( + self, get_all_assigned_mdevs, exists, mock_get_mdev_info): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + get_all_assigned_mdevs.return_value = {uuids.mdev1: uuids.inst1} + # We pretend this mdev doesn't exist hence it needs recreation + exists.return_value = False + mock_get_mdev_info.side_effect = [ + {"dev_id": "mdev_fake", + "uuid": uuids.mdev1, + "parent": "pci_0000_06_00_0", + "type": "nvidia-99", + "iommu_group": 12 + }] + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + # mdev1 was originally created for nvidia-99 but the operator messed up + # the configuration by removing this type, we want to hardstop. + self.assertRaises(exception.InvalidLibvirtGPUConfig, + drvr.init_host, host='foo') + @mock.patch.object(libvirt_guest.Guest, 'detach_device') def _test_detach_mediated_devices(self, side_effect, detach_device): diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 6ded3249f56e..3ff2b2dae5a1 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -429,6 +429,10 @@ class LibvirtDriver(driver.ComputeDriver): self._vpmems_by_name, self._vpmems_by_rc = self._discover_vpmems( vpmem_conf=CONF.libvirt.pmem_namespaces) + # We default to not support vGPUs unless the configuration is set. + self.pgpu_type_mapping = collections.defaultdict(str) + self.supported_vgpu_types = self._get_supported_vgpu_types() + def _discover_vpmems(self, vpmem_conf=None): """Discover vpmems on host and configuration. @@ -795,6 +799,21 @@ class LibvirtDriver(driver.ComputeDriver): dev_name = libvirt_utils.mdev_uuid2name(mdev_uuid) dev_info = self._get_mediated_device_information(dev_name) parent = dev_info['parent'] + parent_type = self._get_vgpu_type_per_pgpu(parent) + if dev_info['type'] != parent_type: + # NOTE(sbauza): The mdev was created by using a different + # vGPU type. We can't recreate the mdev until the operator + # modifies the configuration. + parent = "{}:{}:{}.{}".format(*parent[4:].split('_')) + msg = ("The instance UUID %(inst)s uses a VGPU that " + "its parent pGPU %(parent)s no longer " + "supports as the instance vGPU type %(type)s " + "is not accepted for the pGPU. Please correct " + "the configuration accordingly." % + {'inst': instance_uuid, + 'parent': parent, + 'type': dev_info['type']}) + raise exception.InvalidLibvirtGPUConfig(reason=msg) self._create_new_mediated_device(parent, uuid=mdev_uuid) def _set_multiattach_support(self): @@ -6497,12 +6516,80 @@ class LibvirtDriver(driver.ComputeDriver): def _get_supported_vgpu_types(self): if not CONF.devices.enabled_vgpu_types: return [] - # TODO(sbauza): Move this check up to compute_manager.init_host - if len(CONF.devices.enabled_vgpu_types) > 1: - LOG.warning('libvirt only supports one GPU type per compute node,' - ' only first type will be used.') - requested_types = CONF.devices.enabled_vgpu_types[:1] - return requested_types + + for vgpu_type in CONF.devices.enabled_vgpu_types: + group = getattr(CONF, 'vgpu_%s' % vgpu_type, None) + if group is None or not group.device_addresses: + first_type = CONF.devices.enabled_vgpu_types[0] + if len(CONF.devices.enabled_vgpu_types) > 1: + # Only provide the warning if the operator provided more + # than one type as it's not needed to provide groups + # if you only use one vGPU type. + msg = ("The vGPU type '%(type)s' was listed in '[devices] " + "enabled_vgpu_types' but no corresponding " + "'[vgpu_%(type)s]' group or " + "'[vgpu_%(type)s] device_addresses' " + "option was defined. Only the first type " + "'%(ftype)s' will be used." % {'type': vgpu_type, + 'ftype': first_type}) + LOG.warning(msg) + # We need to reset the mapping table that we started to provide + # keys and values from previously processed vGPUs but since + # there is a problem for this vGPU type, we only want to + # support only the first type. + self.pgpu_type_mapping.clear() + return [first_type] + for device_address in group.device_addresses: + if device_address in self.pgpu_type_mapping: + raise exception.InvalidLibvirtGPUConfig( + reason="duplicate types for PCI ID %s" % device_address + ) + # Just checking whether the operator fat-fingered the address. + # If it's wrong, it will return an exception + try: + pci_utils.parse_address(device_address) + except exception.PciDeviceWrongAddressFormat: + raise exception.InvalidLibvirtGPUConfig( + reason="incorrect PCI address: %s" % device_address + ) + self.pgpu_type_mapping[device_address] = vgpu_type + return CONF.devices.enabled_vgpu_types + + def _get_vgpu_type_per_pgpu(self, device_address): + """Provides the vGPU type the pGPU supports. + + :param device_address: the libvirt PCI device name, + eg.'pci_0000_84_00_0' + """ + # Bail out quickly if we don't support vGPUs + if not self.supported_vgpu_types: + return + + if len(self.supported_vgpu_types) == 1: + # The operator wanted to only support one single type so we can + # blindly return it for every single pGPU + return self.supported_vgpu_types[0] + # The libvirt name is like 'pci_0000_84_00_0' + try: + device_address = "{}:{}:{}.{}".format( + *device_address[4:].split('_')) + # Validates whether it's a PCI ID... + pci_utils.parse_address(device_address) + # .format() can return IndexError + except (exception.PciDeviceWrongAddressFormat, IndexError): + # this is not a valid PCI address + LOG.warning("The PCI address %s was invalid for getting the" + "related vGPU type", device_address) + return + try: + return self.pgpu_type_mapping.get(device_address) + except KeyError: + LOG.warning("No vGPU type was configured for PCI address: %s", + device_address) + # We accept to return None instead of raising an exception + # because we prefer the callers to return the existing exceptions + # in case we can't find a specific pGPU + return def _count_mediated_devices(self, enabled_vgpu_types): """Counts the sysfs objects (handles) that represent a mediated device @@ -6518,6 +6605,12 @@ class LibvirtDriver(driver.ComputeDriver): counts_per_parent = collections.defaultdict(int) mediated_devices = self._get_mediated_devices(types=enabled_vgpu_types) for mdev in mediated_devices: + parent_vgpu_type = self._get_vgpu_type_per_pgpu(mdev['parent']) + if mdev['type'] != parent_vgpu_type: + # Even if some mdev was created for another vGPU type, just + # verify all the mdevs related to the type that their pGPU + # has + continue counts_per_parent[mdev['parent']] += 1 return counts_per_parent @@ -6536,10 +6629,13 @@ class LibvirtDriver(driver.ComputeDriver): # dev_id is the libvirt name for the PCI device, # eg. pci_0000_84_00_0 which matches a PCI address of 0000:84:00.0 dev_name = dev['dev_id'] + dev_supported_type = self._get_vgpu_type_per_pgpu(dev_name) for _type in dev['types']: + if _type != dev_supported_type: + # This is not the type the operator wanted to support for + # this physical GPU + continue available = dev['types'][_type]['availableInstances'] - # TODO(sbauza): Once we support multiple types, check which - # PCI devices are set for this type # NOTE(sbauza): Even if we support multiple types, Nova will # only use one per physical GPU. counts_per_dev[dev_name] += available @@ -6562,7 +6658,7 @@ class LibvirtDriver(driver.ComputeDriver): """ # Bail out early if operator doesn't care about providing vGPUs - enabled_vgpu_types = self._get_supported_vgpu_types() + enabled_vgpu_types = self.supported_vgpu_types if not enabled_vgpu_types: return {} inventories = {} @@ -6923,6 +7019,11 @@ class LibvirtDriver(driver.ComputeDriver): mdevs = self._get_mediated_devices(requested_types) available_mdevs = set() for mdev in mdevs: + parent_vgpu_type = self._get_vgpu_type_per_pgpu(mdev['parent']) + if mdev['type'] != parent_vgpu_type: + # This mdev is using a vGPU type that is not supported by the + # configuration option for its pGPU parent, so we can't use it. + continue # FIXME(sbauza): No longer accept the parent value to be nullable # once we fix the reshape functional test if parent is None or mdev['parent'] == parent: @@ -6940,7 +7041,7 @@ class LibvirtDriver(driver.ComputeDriver): :returns: the newly created mdev UUID or None if not possible """ - supported_types = self._get_supported_vgpu_types() + supported_types = self.supported_vgpu_types # Try to see if we can still create a new mediated device devices = self._get_mdev_capable_devices(supported_types) for device in devices: @@ -6951,19 +7052,15 @@ class LibvirtDriver(driver.ComputeDriver): # The device is not the one that was called, not creating # the mdev continue - # For the moment, the libvirt driver only supports one - # type per host - # TODO(sbauza): Once we support more than one type, make - # sure we lookup which type the parent pGPU supports. - asked_type = supported_types[0] - if device['types'][asked_type]['availableInstances'] > 0: + dev_supported_type = self._get_vgpu_type_per_pgpu(dev_name) + if dev_supported_type and device['types'][ + dev_supported_type]['availableInstances'] > 0: # That physical GPU has enough room for a new mdev # We need the PCI address, not the libvirt name # The libvirt name is like 'pci_0000_84_00_0' pci_addr = "{}:{}:{}.{}".format(*dev_name[4:].split('_')) - chosen_mdev = nova.privsep.libvirt.create_mdev(pci_addr, - asked_type, - uuid=uuid) + chosen_mdev = nova.privsep.libvirt.create_mdev( + pci_addr, dev_supported_type, uuid=uuid) return chosen_mdev @utils.synchronized(VGPU_RESOURCE_SEMAPHORE) @@ -6984,12 +7081,8 @@ class LibvirtDriver(driver.ComputeDriver): vgpu_allocations = self._vgpu_allocations(allocations) if not vgpu_allocations: return - # TODO(sbauza): Once we have nested resource providers, find which one - # is having the related allocation for the specific VGPU type. - # For the moment, we should only have one allocation for - # ResourceProvider. - # TODO(sbauza): Iterate over all the allocations once we have - # nested Resource Providers. For the moment, just take the first. + # TODO(sbauza): For the moment, we only support allocations for only + # one pGPU. if len(vgpu_allocations) > 1: LOG.warning('More than one allocation was passed over to libvirt ' 'while at the moment libvirt only supports one. Only ' @@ -7038,7 +7131,7 @@ class LibvirtDriver(driver.ComputeDriver): raise exception.ComputeResourcesUnavailable( reason='vGPU resource is not available') - supported_types = self._get_supported_vgpu_types() + supported_types = self.supported_vgpu_types # Which mediated devices are created but not assigned to a guest ? mdevs_available = self._get_existing_mdevs_not_assigned( parent_device, supported_types) @@ -7399,12 +7492,9 @@ class LibvirtDriver(driver.ComputeDriver): 'reserved': self._get_reserved_host_disk_gb_from_config(), } - # NOTE(sbauza): For the moment, the libvirt driver only supports - # providing the total number of virtual GPUs for a single GPU type. If - # you have multiple physical GPUs, each of them providing multiple GPU - # types, only one type will be used for each of the physical GPUs. - # If one of the pGPUs doesn't support this type, it won't be used. - # TODO(sbauza): Use traits to make a better world. + # TODO(sbauza): Use traits to providing vGPU types. For the moment, + # it will be only documentation support by explaining to use + # osc-placement to create custom traits for each of the pGPU RPs. self._update_provider_tree_for_vgpu( provider_tree, nodename, allocations=allocations) @@ -7543,13 +7633,6 @@ class LibvirtDriver(driver.ComputeDriver): representing that resource provider in the tree """ # Create the VGPU child providers if they do not already exist. - # TODO(mriedem): For the moment, _get_supported_vgpu_types() only - # returns one single type but that will be changed once we support - # multiple types. - # Note that we can't support multiple vgpu types until a reshape has - # been performed on the vgpu resources provided by the root provider, - # if any. - # Dict of PGPU RPs keyed by their libvirt PCI name pgpu_rps = {} for pgpu_dev_id, inventory in inventories_dict.items(): diff --git a/releasenotes/notes/vgpu-multiple-types-2b1ded7d1cc28880.yaml b/releasenotes/notes/vgpu-multiple-types-2b1ded7d1cc28880.yaml new file mode 100644 index 000000000000..421e1b50450b --- /dev/null +++ b/releasenotes/notes/vgpu-multiple-types-2b1ded7d1cc28880.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + The libvirt driver now supports defining different virtual GPU types for + each physical GPU. See the ``[devices]/enabled_vgpu_types`` configuration + option for knowing how to do it. Please refer to + https://docs.openstack.org/nova/latest/admin/virtual-gpu.html for further + documentation.