diff --git a/doc/source/admin/virtual-gpu.rst b/doc/source/admin/virtual-gpu.rst index c724f1520d08..65bb47d0f972 100644 --- a/doc/source/admin/virtual-gpu.rst +++ b/doc/source/admin/virtual-gpu.rst @@ -172,6 +172,39 @@ provided by compute nodes. $ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu +Ask for more than one vGPU per instance by the flavor +----------------------------------------------------- + +.. versionchanged:: 33.0.0 + +We have an open bug report `bug 1758086`_ explaining that the nvidia driver +doesn't support more than one vGPU per instance (and per GPU resource - which +can be a physical GPU or a virtual function, see nvidia docs for more details). +In order to alleviate this problem, this is mandatory to require in the flavor +to have all the vGPUs to be spread between multiple GPU resource providers. + +For example, you can request two groups of vGPUs this way : + +.. code-block:: console + + $ openstack flavor set vgpu_2 --property "resources1:VGPU=1" \ + --property "resources2:VGPU=1" \ + + +With SR-IOV GPUs (you may need to refer to nvidia documentation to know the +distinction), this will work without requiring further attributes as every +single VGPU Resource Provider only provides a single VGPU resource. + +For non-SRIOV GPUs, you may require other properties in order to request +Placement to allocate you some host with two distinct GPUs. +You may need to create distinct custom traits per GPU or custom resource +classes for explicitly telling in your flavor that you would want resources +from distinct entities, or you could use ``group_policy=isolate`` as a property +but you would need to make sure that you don't ask for other resources but +virtual GPUs in your flavor or Placement would shard all the allocations for +*all* resource groups. + + How to discover a GPU type -------------------------- @@ -490,6 +523,7 @@ For nested vGPUs: .. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688 .. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705 .. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html +.. _bug 1758086: https://bugs.launchpad.net/nova/+bug/1758086 .. Links .. _Intel GVT-g: https://01.org/igvt-g diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index 366b3afa1d09..16240c29717a 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -321,10 +321,16 @@ class VGPUTests(VGPUTestBase): image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', flavor_id=flavor, networks='auto', host=self.compute1.host) - # FIXME(sbauza): Unfortunately, we only accept one allocation per - # instance by the libvirt driver as you can see in _allocate_mdevs(). - # So, eventually, we only have one vGPU for this instance. - self.assert_mdev_usage(self.compute1, expected_amount=1) + # Eventually, we have two allocations and two mdevs + self.assert_mdev_usage(self.compute1, expected_amount=2) + # Let's verify those are spread between both GPU RPs + rp_uuid = self.compute_rp_uuids['host1'] + rp_uuids = self._get_all_rp_uuids_in_a_tree(rp_uuid) + for rp in rp_uuids: + inventory = self._get_provider_inventory(rp) + if orc.VGPU in inventory: + usage = self._get_provider_usages(rp) + self.assertEqual(1, usage[orc.VGPU]) class VGPUMultipleTypesTests(VGPUTestBase): diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 0107d29f7b2c..34c3b19d681e 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -29397,13 +29397,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): } } drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - self.assertIsNone(drvr._allocate_mdevs(allocations=allocations)) + self.assertEqual([], drvr._allocate_mdevs(allocations=allocations)) def _get_fake_provider_tree_with_vgpu(self): - """Returns a fake ProviderTree with VGPU inventory on two children RPs - with one with a correct name and the other one wrong. + """Returns a fake ProviderTree with VGPU inventory on 3 children RPs + with the first two with a correct name and the third wrong. - The child provider is named rp1 and its UUID is uuids.rp1. + The child providers are named rp[1-3] and their UUIDs are uuids.rp1, + uuids.rp2 and uuids.rp3 """ cn_rp = dict( uuid=uuids.cn, @@ -29423,10 +29424,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'], uuid=uuids.rp1, generation=0) pt.update_inventory(uuids.rp1, vgpu_rp_inv) - # Create a second child with a bad naming convention - pt.new_child('oops_I_did_it_again', cn_rp['uuid'], + # Create a second child also with a correct naming attribute + pt.new_child(cn_rp['name'] + '_' + 'pci_0000_07_00_0', cn_rp['uuid'], uuid=uuids.rp2, generation=0) pt.update_inventory(uuids.rp2, vgpu_rp_inv) + # Create a third child with a bad naming convention + pt.new_child('oops_I_did_it_again', cn_rp['uuid'], + uuid=uuids.rp3, generation=0) + pt.update_inventory(uuids.rp3, vgpu_rp_inv) return pt @mock.patch.object(libvirt_driver.LibvirtDriver, @@ -29453,6 +29458,37 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0', ['nvidia-11']) + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_get_existing_mdevs_not_assigned') + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_get_supported_mdev_resource_classes') + def test_allocate_mdevs_with_multiple_allocs(self, get_supported_mdev_rcs, + get_unassigned_mdevs): + self.flags(enabled_mdev_types=['nvidia-11'], group='devices') + allocations = { + uuids.rp1: { + 'resources': { + orc.VGPU: 1, + } + }, + uuids.rp2: { + 'resources': { + orc.VGPU: 1, + } + } + } + get_supported_mdev_rcs.return_value = set([orc.VGPU]) + get_unassigned_mdevs.side_effect = (set([uuids.mdev1]), + set([uuids.mdev2])) + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + # Mock the fact update_provider_tree() should have run + drvr.provider_tree = self._get_fake_provider_tree_with_vgpu() + self.assertEqual([uuids.mdev1, uuids.mdev2], + drvr._allocate_mdevs(allocations=allocations)) + get_unassigned_mdevs.assert_has_calls( + [mock.call('pci_0000_06_00_0', ['nvidia-11']), + mock.call('pci_0000_07_00_0', ['nvidia-11'])]) + @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_mdev_capable_devices') @mock.patch.object(libvirt_driver.LibvirtDriver, @@ -29513,7 +29549,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): # Test that we were unable to guess the RP name allocations = { - uuids.rp2: { + uuids.rp3: { 'resources': { orc.VGPU: 1, } diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 24ed46205ea7..e02f37bf3a0b 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -9237,43 +9237,39 @@ class LibvirtDriver(driver.ComputeDriver): That code is supporting Placement API version 1.12 """ vgpu_allocations = self._vgpu_allocations(allocations) - if not vgpu_allocations: - return - # TODO(sbauza): For the moment, we only support allocations for only - # one pGPU. - if len(vgpu_allocations) > 1: - LOG.warning('More than one allocation was passed over to libvirt ' - 'while at the moment libvirt only supports one. Only ' - 'the first allocation will be looked up.') - rp_uuid, alloc = next(iter(vgpu_allocations.items())) - # We only have one allocation with a supported resource class - vgpus_asked = list(alloc['resources'].values())[0] - # Find if we allocated against a specific pGPU (and then the allocation - # is made against a child RP) or any pGPU (in case the VGPU inventory - # is still on the root RP) - try: - allocated_rp = self.provider_tree.data(rp_uuid) - except ValueError: - # The provider doesn't exist, return a better understandable - # exception - raise exception.ComputeResourcesUnavailable( - reason='mdev-capable resource is not available') - # FIXME(sbauza): The functional reshape test assumes that we could - # run _allocate_mdevs() against non-nested RPs but this is impossible - # as all inventories have been reshaped *before now* since it's done - # on init_host() (when the compute restarts or whatever else calls it). - # That said, since fixing the functional test isn't easy yet, let's - # assume we still support a non-nested RP for now. - if allocated_rp.parent_uuid is None: - # We are on a root RP - parent_device = None - else: + chosen_mdevs = [] + for rp_uuid, alloc in vgpu_allocations.items(): + # We only have one allocation with a supported resource class + # FIXME(sbauza): If a new vfio-mdev usage supports more than one + # type per PCI device, we would need to modify this. For the + # moment, all of the vfio-mdev drivers that we know only support + # one type per mdev-supported device. + vgpus_asked = list(alloc['resources'].values())[0] + + try: + allocated_rp = self.provider_tree.data(rp_uuid) + except ValueError: + # The provider doesn't exist, return a better understandable + # exception + raise exception.ComputeResourcesUnavailable( + reason='Resource Provider %s is missing' % rp_uuid) rp_name = allocated_rp.name # There can be multiple roots, we need to find the root name # to guess the physical device name roots = list(self.provider_tree.roots) for root in roots: + # FIXME(sbauza): The functional reshape test assumes that we + # could run _allocate_mdevs() against non-nested RPs but this + # is impossible as all inventories have been reshaped *before + # now* since it's done on init_host() (when the compute + # restarts or whatever else calls it). That said, since fixing + # the functional test isn't easy yet, let's assume we still + # support a non-nested RP for now. + if allocated_rp.parent_uuid is None: + # We are on a root RP + parent_device = None + break if rp_name.startswith(root.name + '_'): # The RP name convention is : # root_name + '_' + parent_device @@ -9290,28 +9286,29 @@ class LibvirtDriver(driver.ComputeDriver): raise exception.ComputeResourcesUnavailable( reason='mdev-capable resource is not available') - supported_types = self.supported_vgpu_types - # Which mediated devices are created but not assigned to a guest ? - mdevs_available = self._get_existing_mdevs_not_assigned( - parent_device, supported_types) + supported_types = self.supported_vgpu_types + # Which mediated devices are created but not assigned to a guest ? + mdevs_available = self._get_existing_mdevs_not_assigned( + parent_device, supported_types) - chosen_mdevs = [] - for c in range(vgpus_asked): - chosen_mdev = None - if mdevs_available: - # Take the first available mdev - chosen_mdev = mdevs_available.pop() - else: - LOG.debug('No available mdevs where found. ' - 'Creating an new one...') - chosen_mdev = self._create_new_mediated_device(parent_device) - if not chosen_mdev: - # If we can't find devices having available VGPUs, just raise - raise exception.ComputeResourcesUnavailable( - reason='mdev-capable resource is not available') - else: - chosen_mdevs.append(chosen_mdev) - LOG.info('Allocated mdev: %s.', chosen_mdev) + for c in range(vgpus_asked): + chosen_mdev = None + if mdevs_available: + # Take the first available mdev + chosen_mdev = mdevs_available.pop() + else: + LOG.debug('No available mdevs where found. ' + 'Creating a new one...') + chosen_mdev = self._create_new_mediated_device( + parent_device) + if not chosen_mdev: + # If we can't find devices having available VGPUs, just + # raise + raise exception.ComputeResourcesUnavailable( + reason='mdev-capable resource is not available') + else: + chosen_mdevs.append(chosen_mdev) + LOG.info('Allocated mdev: %s.', chosen_mdev) return chosen_mdevs def _detach_mediated_devices(self, guest): diff --git a/releasenotes/notes/bug-1758086-e9d147380d149789.yaml b/releasenotes/notes/bug-1758086-e9d147380d149789.yaml new file mode 100644 index 000000000000..22fe5a39d7e1 --- /dev/null +++ b/releasenotes/notes/bug-1758086-e9d147380d149789.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + You can now request different resource groups in your flavor for VGPU or + generic mediated device custom resource classes. Previously, only the + first resource request group was honored. See `bug #1758086`_ for more + details. + + .. _bug #1758086: https://bugs.launchpad.net/nova/+bug/1758086