Merge "Support multiple allocations for vGPUs"
This commit is contained in:
@@ -172,6 +172,39 @@ provided by compute nodes.
|
||||
$ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu
|
||||
|
||||
|
||||
Ask for more than one vGPU per instance by the flavor
|
||||
-----------------------------------------------------
|
||||
|
||||
.. versionchanged:: 33.0.0
|
||||
|
||||
We have an open bug report `bug 1758086`_ explaining that the nvidia driver
|
||||
doesn't support more than one vGPU per instance (and per GPU resource - which
|
||||
can be a physical GPU or a virtual function, see nvidia docs for more details).
|
||||
In order to alleviate this problem, this is mandatory to require in the flavor
|
||||
to have all the vGPUs to be spread between multiple GPU resource providers.
|
||||
|
||||
For example, you can request two groups of vGPUs this way :
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ openstack flavor set vgpu_2 --property "resources1:VGPU=1" \
|
||||
--property "resources2:VGPU=1" \
|
||||
|
||||
|
||||
With SR-IOV GPUs (you may need to refer to nvidia documentation to know the
|
||||
distinction), this will work without requiring further attributes as every
|
||||
single VGPU Resource Provider only provides a single VGPU resource.
|
||||
|
||||
For non-SRIOV GPUs, you may require other properties in order to request
|
||||
Placement to allocate you some host with two distinct GPUs.
|
||||
You may need to create distinct custom traits per GPU or custom resource
|
||||
classes for explicitly telling in your flavor that you would want resources
|
||||
from distinct entities, or you could use ``group_policy=isolate`` as a property
|
||||
but you would need to make sure that you don't ask for other resources but
|
||||
virtual GPUs in your flavor or Placement would shard all the allocations for
|
||||
*all* resource groups.
|
||||
|
||||
|
||||
How to discover a GPU type
|
||||
--------------------------
|
||||
|
||||
@@ -490,6 +523,7 @@ For nested vGPUs:
|
||||
.. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
|
||||
.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
|
||||
.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html
|
||||
.. _bug 1758086: https://bugs.launchpad.net/nova/+bug/1758086
|
||||
|
||||
.. Links
|
||||
.. _Intel GVT-g: https://01.org/igvt-g
|
||||
|
||||
@@ -321,10 +321,16 @@ class VGPUTests(VGPUTestBase):
|
||||
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||
flavor_id=flavor, networks='auto', host=self.compute1.host)
|
||||
|
||||
# FIXME(sbauza): Unfortunately, we only accept one allocation per
|
||||
# instance by the libvirt driver as you can see in _allocate_mdevs().
|
||||
# So, eventually, we only have one vGPU for this instance.
|
||||
self.assert_mdev_usage(self.compute1, expected_amount=1)
|
||||
# Eventually, we have two allocations and two mdevs
|
||||
self.assert_mdev_usage(self.compute1, expected_amount=2)
|
||||
# Let's verify those are spread between both GPU RPs
|
||||
rp_uuid = self.compute_rp_uuids['host1']
|
||||
rp_uuids = self._get_all_rp_uuids_in_a_tree(rp_uuid)
|
||||
for rp in rp_uuids:
|
||||
inventory = self._get_provider_inventory(rp)
|
||||
if orc.VGPU in inventory:
|
||||
usage = self._get_provider_usages(rp)
|
||||
self.assertEqual(1, usage[orc.VGPU])
|
||||
|
||||
|
||||
class VGPUMultipleTypesTests(VGPUTestBase):
|
||||
|
||||
@@ -29397,13 +29397,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
||||
}
|
||||
}
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
self.assertIsNone(drvr._allocate_mdevs(allocations=allocations))
|
||||
self.assertEqual([], drvr._allocate_mdevs(allocations=allocations))
|
||||
|
||||
def _get_fake_provider_tree_with_vgpu(self):
|
||||
"""Returns a fake ProviderTree with VGPU inventory on two children RPs
|
||||
with one with a correct name and the other one wrong.
|
||||
"""Returns a fake ProviderTree with VGPU inventory on 3 children RPs
|
||||
with the first two with a correct name and the third wrong.
|
||||
|
||||
The child provider is named rp1 and its UUID is uuids.rp1.
|
||||
The child providers are named rp[1-3] and their UUIDs are uuids.rp1,
|
||||
uuids.rp2 and uuids.rp3
|
||||
"""
|
||||
cn_rp = dict(
|
||||
uuid=uuids.cn,
|
||||
@@ -29423,10 +29424,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
||||
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'],
|
||||
uuid=uuids.rp1, generation=0)
|
||||
pt.update_inventory(uuids.rp1, vgpu_rp_inv)
|
||||
# Create a second child with a bad naming convention
|
||||
pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
|
||||
# Create a second child also with a correct naming attribute
|
||||
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_07_00_0', cn_rp['uuid'],
|
||||
uuid=uuids.rp2, generation=0)
|
||||
pt.update_inventory(uuids.rp2, vgpu_rp_inv)
|
||||
# Create a third child with a bad naming convention
|
||||
pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
|
||||
uuid=uuids.rp3, generation=0)
|
||||
pt.update_inventory(uuids.rp3, vgpu_rp_inv)
|
||||
return pt
|
||||
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
@@ -29453,6 +29458,37 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
||||
get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0',
|
||||
['nvidia-11'])
|
||||
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_get_existing_mdevs_not_assigned')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_get_supported_mdev_resource_classes')
|
||||
def test_allocate_mdevs_with_multiple_allocs(self, get_supported_mdev_rcs,
|
||||
get_unassigned_mdevs):
|
||||
self.flags(enabled_mdev_types=['nvidia-11'], group='devices')
|
||||
allocations = {
|
||||
uuids.rp1: {
|
||||
'resources': {
|
||||
orc.VGPU: 1,
|
||||
}
|
||||
},
|
||||
uuids.rp2: {
|
||||
'resources': {
|
||||
orc.VGPU: 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
get_supported_mdev_rcs.return_value = set([orc.VGPU])
|
||||
get_unassigned_mdevs.side_effect = (set([uuids.mdev1]),
|
||||
set([uuids.mdev2]))
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
# Mock the fact update_provider_tree() should have run
|
||||
drvr.provider_tree = self._get_fake_provider_tree_with_vgpu()
|
||||
self.assertEqual([uuids.mdev1, uuids.mdev2],
|
||||
drvr._allocate_mdevs(allocations=allocations))
|
||||
get_unassigned_mdevs.assert_has_calls(
|
||||
[mock.call('pci_0000_06_00_0', ['nvidia-11']),
|
||||
mock.call('pci_0000_07_00_0', ['nvidia-11'])])
|
||||
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_get_mdev_capable_devices')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
@@ -29513,7 +29549,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
||||
|
||||
# Test that we were unable to guess the RP name
|
||||
allocations = {
|
||||
uuids.rp2: {
|
||||
uuids.rp3: {
|
||||
'resources': {
|
||||
orc.VGPU: 1,
|
||||
}
|
||||
|
||||
@@ -9237,43 +9237,39 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
That code is supporting Placement API version 1.12
|
||||
"""
|
||||
vgpu_allocations = self._vgpu_allocations(allocations)
|
||||
if not vgpu_allocations:
|
||||
return
|
||||
# TODO(sbauza): For the moment, we only support allocations for only
|
||||
# one pGPU.
|
||||
if len(vgpu_allocations) > 1:
|
||||
LOG.warning('More than one allocation was passed over to libvirt '
|
||||
'while at the moment libvirt only supports one. Only '
|
||||
'the first allocation will be looked up.')
|
||||
rp_uuid, alloc = next(iter(vgpu_allocations.items()))
|
||||
# We only have one allocation with a supported resource class
|
||||
vgpus_asked = list(alloc['resources'].values())[0]
|
||||
|
||||
# Find if we allocated against a specific pGPU (and then the allocation
|
||||
# is made against a child RP) or any pGPU (in case the VGPU inventory
|
||||
# is still on the root RP)
|
||||
try:
|
||||
allocated_rp = self.provider_tree.data(rp_uuid)
|
||||
except ValueError:
|
||||
# The provider doesn't exist, return a better understandable
|
||||
# exception
|
||||
raise exception.ComputeResourcesUnavailable(
|
||||
reason='mdev-capable resource is not available')
|
||||
# FIXME(sbauza): The functional reshape test assumes that we could
|
||||
# run _allocate_mdevs() against non-nested RPs but this is impossible
|
||||
# as all inventories have been reshaped *before now* since it's done
|
||||
# on init_host() (when the compute restarts or whatever else calls it).
|
||||
# That said, since fixing the functional test isn't easy yet, let's
|
||||
# assume we still support a non-nested RP for now.
|
||||
if allocated_rp.parent_uuid is None:
|
||||
# We are on a root RP
|
||||
parent_device = None
|
||||
else:
|
||||
chosen_mdevs = []
|
||||
for rp_uuid, alloc in vgpu_allocations.items():
|
||||
# We only have one allocation with a supported resource class
|
||||
# FIXME(sbauza): If a new vfio-mdev usage supports more than one
|
||||
# type per PCI device, we would need to modify this. For the
|
||||
# moment, all of the vfio-mdev drivers that we know only support
|
||||
# one type per mdev-supported device.
|
||||
vgpus_asked = list(alloc['resources'].values())[0]
|
||||
|
||||
try:
|
||||
allocated_rp = self.provider_tree.data(rp_uuid)
|
||||
except ValueError:
|
||||
# The provider doesn't exist, return a better understandable
|
||||
# exception
|
||||
raise exception.ComputeResourcesUnavailable(
|
||||
reason='Resource Provider %s is missing' % rp_uuid)
|
||||
rp_name = allocated_rp.name
|
||||
# There can be multiple roots, we need to find the root name
|
||||
# to guess the physical device name
|
||||
roots = list(self.provider_tree.roots)
|
||||
for root in roots:
|
||||
# FIXME(sbauza): The functional reshape test assumes that we
|
||||
# could run _allocate_mdevs() against non-nested RPs but this
|
||||
# is impossible as all inventories have been reshaped *before
|
||||
# now* since it's done on init_host() (when the compute
|
||||
# restarts or whatever else calls it). That said, since fixing
|
||||
# the functional test isn't easy yet, let's assume we still
|
||||
# support a non-nested RP for now.
|
||||
if allocated_rp.parent_uuid is None:
|
||||
# We are on a root RP
|
||||
parent_device = None
|
||||
break
|
||||
if rp_name.startswith(root.name + '_'):
|
||||
# The RP name convention is :
|
||||
# root_name + '_' + parent_device
|
||||
@@ -9290,28 +9286,29 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
raise exception.ComputeResourcesUnavailable(
|
||||
reason='mdev-capable resource is not available')
|
||||
|
||||
supported_types = self.supported_vgpu_types
|
||||
# Which mediated devices are created but not assigned to a guest ?
|
||||
mdevs_available = self._get_existing_mdevs_not_assigned(
|
||||
parent_device, supported_types)
|
||||
supported_types = self.supported_vgpu_types
|
||||
# Which mediated devices are created but not assigned to a guest ?
|
||||
mdevs_available = self._get_existing_mdevs_not_assigned(
|
||||
parent_device, supported_types)
|
||||
|
||||
chosen_mdevs = []
|
||||
for c in range(vgpus_asked):
|
||||
chosen_mdev = None
|
||||
if mdevs_available:
|
||||
# Take the first available mdev
|
||||
chosen_mdev = mdevs_available.pop()
|
||||
else:
|
||||
LOG.debug('No available mdevs where found. '
|
||||
'Creating an new one...')
|
||||
chosen_mdev = self._create_new_mediated_device(parent_device)
|
||||
if not chosen_mdev:
|
||||
# If we can't find devices having available VGPUs, just raise
|
||||
raise exception.ComputeResourcesUnavailable(
|
||||
reason='mdev-capable resource is not available')
|
||||
else:
|
||||
chosen_mdevs.append(chosen_mdev)
|
||||
LOG.info('Allocated mdev: %s.', chosen_mdev)
|
||||
for c in range(vgpus_asked):
|
||||
chosen_mdev = None
|
||||
if mdevs_available:
|
||||
# Take the first available mdev
|
||||
chosen_mdev = mdevs_available.pop()
|
||||
else:
|
||||
LOG.debug('No available mdevs where found. '
|
||||
'Creating a new one...')
|
||||
chosen_mdev = self._create_new_mediated_device(
|
||||
parent_device)
|
||||
if not chosen_mdev:
|
||||
# If we can't find devices having available VGPUs, just
|
||||
# raise
|
||||
raise exception.ComputeResourcesUnavailable(
|
||||
reason='mdev-capable resource is not available')
|
||||
else:
|
||||
chosen_mdevs.append(chosen_mdev)
|
||||
LOG.info('Allocated mdev: %s.', chosen_mdev)
|
||||
return chosen_mdevs
|
||||
|
||||
def _detach_mediated_devices(self, guest):
|
||||
|
||||
9
releasenotes/notes/bug-1758086-e9d147380d149789.yaml
Normal file
9
releasenotes/notes/bug-1758086-e9d147380d149789.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
You can now request different resource groups in your flavor for VGPU or
|
||||
generic mediated device custom resource classes. Previously, only the
|
||||
first resource request group was honored. See `bug #1758086`_ for more
|
||||
details.
|
||||
|
||||
.. _bug #1758086: https://bugs.launchpad.net/nova/+bug/1758086
|
||||
Reference in New Issue
Block a user