Merge "Support multiple allocations for vGPUs"

This commit is contained in:
Zuul
2026-03-11 04:44:04 +00:00
committed by Gerrit Code Review
5 changed files with 145 additions and 63 deletions

View File

@@ -172,6 +172,39 @@ provided by compute nodes.
$ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu
Ask for more than one vGPU per instance by the flavor
-----------------------------------------------------
.. versionchanged:: 33.0.0
We have an open bug report `bug 1758086`_ explaining that the nvidia driver
doesn't support more than one vGPU per instance (and per GPU resource - which
can be a physical GPU or a virtual function, see nvidia docs for more details).
In order to alleviate this problem, this is mandatory to require in the flavor
to have all the vGPUs to be spread between multiple GPU resource providers.
For example, you can request two groups of vGPUs this way :
.. code-block:: console
$ openstack flavor set vgpu_2 --property "resources1:VGPU=1" \
--property "resources2:VGPU=1" \
With SR-IOV GPUs (you may need to refer to nvidia documentation to know the
distinction), this will work without requiring further attributes as every
single VGPU Resource Provider only provides a single VGPU resource.
For non-SRIOV GPUs, you may require other properties in order to request
Placement to allocate you some host with two distinct GPUs.
You may need to create distinct custom traits per GPU or custom resource
classes for explicitly telling in your flavor that you would want resources
from distinct entities, or you could use ``group_policy=isolate`` as a property
but you would need to make sure that you don't ask for other resources but
virtual GPUs in your flavor or Placement would shard all the allocations for
*all* resource groups.
How to discover a GPU type
--------------------------
@@ -490,6 +523,7 @@ For nested vGPUs:
.. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html
.. _bug 1758086: https://bugs.launchpad.net/nova/+bug/1758086
.. Links
.. _Intel GVT-g: https://01.org/igvt-g

View File

@@ -321,10 +321,16 @@ class VGPUTests(VGPUTestBase):
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=flavor, networks='auto', host=self.compute1.host)
# FIXME(sbauza): Unfortunately, we only accept one allocation per
# instance by the libvirt driver as you can see in _allocate_mdevs().
# So, eventually, we only have one vGPU for this instance.
self.assert_mdev_usage(self.compute1, expected_amount=1)
# Eventually, we have two allocations and two mdevs
self.assert_mdev_usage(self.compute1, expected_amount=2)
# Let's verify those are spread between both GPU RPs
rp_uuid = self.compute_rp_uuids['host1']
rp_uuids = self._get_all_rp_uuids_in_a_tree(rp_uuid)
for rp in rp_uuids:
inventory = self._get_provider_inventory(rp)
if orc.VGPU in inventory:
usage = self._get_provider_usages(rp)
self.assertEqual(1, usage[orc.VGPU])
class VGPUMultipleTypesTests(VGPUTestBase):

View File

@@ -29397,13 +29397,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
}
}
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
self.assertIsNone(drvr._allocate_mdevs(allocations=allocations))
self.assertEqual([], drvr._allocate_mdevs(allocations=allocations))
def _get_fake_provider_tree_with_vgpu(self):
"""Returns a fake ProviderTree with VGPU inventory on two children RPs
with one with a correct name and the other one wrong.
"""Returns a fake ProviderTree with VGPU inventory on 3 children RPs
with the first two with a correct name and the third wrong.
The child provider is named rp1 and its UUID is uuids.rp1.
The child providers are named rp[1-3] and their UUIDs are uuids.rp1,
uuids.rp2 and uuids.rp3
"""
cn_rp = dict(
uuid=uuids.cn,
@@ -29423,10 +29424,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'],
uuid=uuids.rp1, generation=0)
pt.update_inventory(uuids.rp1, vgpu_rp_inv)
# Create a second child with a bad naming convention
pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
# Create a second child also with a correct naming attribute
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_07_00_0', cn_rp['uuid'],
uuid=uuids.rp2, generation=0)
pt.update_inventory(uuids.rp2, vgpu_rp_inv)
# Create a third child with a bad naming convention
pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
uuid=uuids.rp3, generation=0)
pt.update_inventory(uuids.rp3, vgpu_rp_inv)
return pt
@mock.patch.object(libvirt_driver.LibvirtDriver,
@@ -29453,6 +29458,37 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0',
['nvidia-11'])
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_existing_mdevs_not_assigned')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_supported_mdev_resource_classes')
def test_allocate_mdevs_with_multiple_allocs(self, get_supported_mdev_rcs,
get_unassigned_mdevs):
self.flags(enabled_mdev_types=['nvidia-11'], group='devices')
allocations = {
uuids.rp1: {
'resources': {
orc.VGPU: 1,
}
},
uuids.rp2: {
'resources': {
orc.VGPU: 1,
}
}
}
get_supported_mdev_rcs.return_value = set([orc.VGPU])
get_unassigned_mdevs.side_effect = (set([uuids.mdev1]),
set([uuids.mdev2]))
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
# Mock the fact update_provider_tree() should have run
drvr.provider_tree = self._get_fake_provider_tree_with_vgpu()
self.assertEqual([uuids.mdev1, uuids.mdev2],
drvr._allocate_mdevs(allocations=allocations))
get_unassigned_mdevs.assert_has_calls(
[mock.call('pci_0000_06_00_0', ['nvidia-11']),
mock.call('pci_0000_07_00_0', ['nvidia-11'])])
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_mdev_capable_devices')
@mock.patch.object(libvirt_driver.LibvirtDriver,
@@ -29513,7 +29549,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
# Test that we were unable to guess the RP name
allocations = {
uuids.rp2: {
uuids.rp3: {
'resources': {
orc.VGPU: 1,
}

View File

@@ -9237,43 +9237,39 @@ class LibvirtDriver(driver.ComputeDriver):
That code is supporting Placement API version 1.12
"""
vgpu_allocations = self._vgpu_allocations(allocations)
if not vgpu_allocations:
return
# TODO(sbauza): For the moment, we only support allocations for only
# one pGPU.
if len(vgpu_allocations) > 1:
LOG.warning('More than one allocation was passed over to libvirt '
'while at the moment libvirt only supports one. Only '
'the first allocation will be looked up.')
rp_uuid, alloc = next(iter(vgpu_allocations.items()))
# We only have one allocation with a supported resource class
vgpus_asked = list(alloc['resources'].values())[0]
# Find if we allocated against a specific pGPU (and then the allocation
# is made against a child RP) or any pGPU (in case the VGPU inventory
# is still on the root RP)
try:
allocated_rp = self.provider_tree.data(rp_uuid)
except ValueError:
# The provider doesn't exist, return a better understandable
# exception
raise exception.ComputeResourcesUnavailable(
reason='mdev-capable resource is not available')
# FIXME(sbauza): The functional reshape test assumes that we could
# run _allocate_mdevs() against non-nested RPs but this is impossible
# as all inventories have been reshaped *before now* since it's done
# on init_host() (when the compute restarts or whatever else calls it).
# That said, since fixing the functional test isn't easy yet, let's
# assume we still support a non-nested RP for now.
if allocated_rp.parent_uuid is None:
# We are on a root RP
parent_device = None
else:
chosen_mdevs = []
for rp_uuid, alloc in vgpu_allocations.items():
# We only have one allocation with a supported resource class
# FIXME(sbauza): If a new vfio-mdev usage supports more than one
# type per PCI device, we would need to modify this. For the
# moment, all of the vfio-mdev drivers that we know only support
# one type per mdev-supported device.
vgpus_asked = list(alloc['resources'].values())[0]
try:
allocated_rp = self.provider_tree.data(rp_uuid)
except ValueError:
# The provider doesn't exist, return a better understandable
# exception
raise exception.ComputeResourcesUnavailable(
reason='Resource Provider %s is missing' % rp_uuid)
rp_name = allocated_rp.name
# There can be multiple roots, we need to find the root name
# to guess the physical device name
roots = list(self.provider_tree.roots)
for root in roots:
# FIXME(sbauza): The functional reshape test assumes that we
# could run _allocate_mdevs() against non-nested RPs but this
# is impossible as all inventories have been reshaped *before
# now* since it's done on init_host() (when the compute
# restarts or whatever else calls it). That said, since fixing
# the functional test isn't easy yet, let's assume we still
# support a non-nested RP for now.
if allocated_rp.parent_uuid is None:
# We are on a root RP
parent_device = None
break
if rp_name.startswith(root.name + '_'):
# The RP name convention is :
# root_name + '_' + parent_device
@@ -9290,28 +9286,29 @@ class LibvirtDriver(driver.ComputeDriver):
raise exception.ComputeResourcesUnavailable(
reason='mdev-capable resource is not available')
supported_types = self.supported_vgpu_types
# Which mediated devices are created but not assigned to a guest ?
mdevs_available = self._get_existing_mdevs_not_assigned(
parent_device, supported_types)
supported_types = self.supported_vgpu_types
# Which mediated devices are created but not assigned to a guest ?
mdevs_available = self._get_existing_mdevs_not_assigned(
parent_device, supported_types)
chosen_mdevs = []
for c in range(vgpus_asked):
chosen_mdev = None
if mdevs_available:
# Take the first available mdev
chosen_mdev = mdevs_available.pop()
else:
LOG.debug('No available mdevs where found. '
'Creating an new one...')
chosen_mdev = self._create_new_mediated_device(parent_device)
if not chosen_mdev:
# If we can't find devices having available VGPUs, just raise
raise exception.ComputeResourcesUnavailable(
reason='mdev-capable resource is not available')
else:
chosen_mdevs.append(chosen_mdev)
LOG.info('Allocated mdev: %s.', chosen_mdev)
for c in range(vgpus_asked):
chosen_mdev = None
if mdevs_available:
# Take the first available mdev
chosen_mdev = mdevs_available.pop()
else:
LOG.debug('No available mdevs where found. '
'Creating a new one...')
chosen_mdev = self._create_new_mediated_device(
parent_device)
if not chosen_mdev:
# If we can't find devices having available VGPUs, just
# raise
raise exception.ComputeResourcesUnavailable(
reason='mdev-capable resource is not available')
else:
chosen_mdevs.append(chosen_mdev)
LOG.info('Allocated mdev: %s.', chosen_mdev)
return chosen_mdevs
def _detach_mediated_devices(self, guest):

View File

@@ -0,0 +1,9 @@
---
fixes:
- |
You can now request different resource groups in your flavor for VGPU or
generic mediated device custom resource classes. Previously, only the
first resource request group was honored. See `bug #1758086`_ for more
details.
.. _bug #1758086: https://bugs.launchpad.net/nova/+bug/1758086