Merge "Handle mdev devices in libvirt 7.7+" into stable/zed

This commit is contained in:
Zuul 2023-03-22 15:33:53 +00:00 committed by Gerrit Code Review
commit 38c629c87b
6 changed files with 96 additions and 26 deletions

View File

@ -63,21 +63,11 @@ class VGPUTestsLibvirt7_7(test_vgpu.VGPUTestBase):
flavor_id=self.flavor, host=self.compute1.host,
networks='auto', expected_state='ACTIVE')
# TODO(sbauza): Modify this once bug #1851656 is fixed.
# mdev_name2uuid() raises a badly formed hexadecimal UUID string error
self.assertRaises(ValueError,
self.assert_mdev_usage,
self.compute1, expected_amount=1)
self.assert_mdev_usage(self.compute1, expected_amount=1)
# Now, the problem is that we can't create new instances with VGPUs
# from this host.
server = self._create_server(
self._create_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=self.flavor, host=self.compute1.host,
networks='auto', expected_state='ERROR')
# The error is due to a bad mdev name parsing
self.assertIn('fault', server)
# since we only have one host, we have a RescheduledException as this
# service was creating an exception and we can't use another one.
self.assertIn('Exceeded maximum number of retries',
server['fault']['message'])
networks='auto', expected_state='ACTIVE')
self.assert_mdev_usage(self.compute1, expected_amount=2)

View File

@ -3181,6 +3181,32 @@ class LibvirtConfigNodeDeviceTest(LibvirtConfigBaseTest):
config.LibvirtConfigNodeDeviceMdevInformation)
self.assertEqual("nvidia-11", obj.mdev_information.type)
self.assertEqual(12, obj.mdev_information.iommu_group)
self.assertIsNone(obj.mdev_information.uuid)
def test_config_mdev_device_uuid(self):
xmlin = """
<device>
<name>mdev_b2107403_110c_45b0_af87_32cc91597b8a_0000_41_00_0</name>
<path>/sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0/b2107403-110c-45b0-af87-32cc91597b8a</path>
<parent>pci_0000_41_00_0</parent>
<driver>
<name>vfio_mdev</name>
</driver>
<capability type='mdev'>
<type id='nvidia-442'/>
<uuid>b2107403-110c-45b0-af87-32cc91597b8a</uuid>
<iommuGroup number='57'/>
</capability>
</device>"""
obj = config.LibvirtConfigNodeDevice()
obj.parse_str(xmlin)
self.assertIsInstance(obj.mdev_information,
config.LibvirtConfigNodeDeviceMdevInformation)
self.assertEqual("nvidia-442", obj.mdev_information.type)
self.assertEqual(57, obj.mdev_information.iommu_group)
self.assertEqual("b2107403-110c-45b0-af87-32cc91597b8a",
obj.mdev_information.uuid)
def test_config_vdpa_device(self):
xmlin = """

View File

@ -3382,6 +3382,7 @@ class LibvirtConfigNodeDeviceMdevInformation(LibvirtConfigObject):
root_name="capability", **kwargs)
self.type = None
self.iommu_group = None
self.uuid = None
def parse_dom(self, xmldoc):
super(LibvirtConfigNodeDeviceMdevInformation,
@ -3391,6 +3392,8 @@ class LibvirtConfigNodeDeviceMdevInformation(LibvirtConfigObject):
self.type = c.get('id')
if c.tag == "iommuGroup":
self.iommu_group = int(c.get('number'))
if c.tag == "uuid":
self.uuid = c.text
class LibvirtConfigNodeDeviceVpdCap(LibvirtConfigObject):

View File

@ -8227,15 +8227,52 @@ class LibvirtDriver(driver.ComputeDriver):
def _get_mediated_device_information(self, devname):
"""Returns a dict of a mediated device."""
virtdev = self._host.device_lookup_by_name(devname)
# LP #1951656 - In Libvirt 7.7, the mdev name now includes the PCI
# address of the parent device (e.g. mdev_<uuid>_<pci_address>) due to
# the mdevctl allowing for multiple mediated devs having the same UUID
# defined (only one can be active at a time). Since the guest
# information doesn't have the parent ID, try to lookup which
# mediated device is available that matches the UUID. If multiple
# devices are found that match the UUID, then this is an error
# condition.
try:
virtdev = self._host.device_lookup_by_name(devname)
except libvirt.libvirtError as ex:
if ex.get_error_code() != libvirt.VIR_ERR_NO_NODE_DEVICE:
raise
mdevs = [dev for dev in self._host.list_mediated_devices()
if dev.startswith(devname)]
# If no matching devices are found, simply raise the original
# exception indicating that no devices are found.
if not mdevs:
raise
elif len(mdevs) > 1:
msg = ("The mediated device name %(devname)s refers to a UUID "
"that is present in multiple libvirt mediated devices. "
"Matching libvirt mediated devices are %(devices)s. "
"Mediated device UUIDs must be unique for Nova." %
{'devname': devname,
'devices': ', '.join(mdevs)})
raise exception.InvalidLibvirtMdevConfig(reason=msg)
LOG.debug('Found requested device %s as %s. Using that.',
devname, mdevs[0])
virtdev = self._host.device_lookup_by_name(mdevs[0])
xmlstr = virtdev.XMLDesc(0)
cfgdev = vconfig.LibvirtConfigNodeDevice()
cfgdev.parse_str(xmlstr)
# Starting with Libvirt 7.3, the uuid information is available in the
# node device information. If its there, use that. Otherwise,
# fall back to the previous behavior of parsing the uuid from the
# devname.
if cfgdev.mdev_information.uuid:
mdev_uuid = cfgdev.mdev_information.uuid
else:
mdev_uuid = libvirt_utils.mdev_name2uuid(cfgdev.name)
device = {
"dev_id": cfgdev.name,
# name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4
"uuid": libvirt_utils.mdev_name2uuid(cfgdev.name),
"uuid": mdev_uuid,
# the physical GPU PCI device
"parent": cfgdev.parent,
"type": cfgdev.mdev_information.type,

View File

@ -1566,7 +1566,7 @@ class Host(object):
def list_mediated_devices(self, flags=0):
"""Lookup mediated devices.
:returns: a list of virNodeDevice instance
:returns: a list of strings with the name of the instance
"""
return self._list_devices("mdev", flags=flags)

View File

@ -575,17 +575,31 @@ def get_default_machine_type(arch: str) -> ty.Optional[str]:
def mdev_name2uuid(mdev_name: str) -> str:
"""Convert an mdev name (of the form mdev_<uuid_with_underscores>) to a
uuid (of the form 8-4-4-4-12).
"""Convert an mdev name (of the form mdev_<uuid_with_underscores> or
mdev_<uuid_with_underscores>_<pciaddress>) to a uuid
(of the form 8-4-4-4-12).
:param mdev_name: the name of the mdev to parse the UUID from
:returns: string containing the uuid
"""
return str(uuid.UUID(mdev_name[5:].replace('_', '-')))
mdev_uuid = mdev_name[5:].replace('_', '-')
# Unconditionnally remove the PCI address from the name
mdev_uuid = mdev_uuid[:36]
return str(uuid.UUID(mdev_uuid))
def mdev_uuid2name(mdev_uuid: str) -> str:
"""Convert an mdev uuid (of the form 8-4-4-4-12) to a name (of the form
mdev_<uuid_with_underscores>).
def mdev_uuid2name(mdev_uuid: str, parent: str = None) -> str:
"""Convert an mdev uuid (of the form 8-4-4-4-12) and optionally its parent
device to a name (of the form mdev_<uuid_with_underscores>[_<pciid>]).
:param mdev_uuid: the uuid of the mediated device
:param parent: the parent device id for the mediated device
:returns: name of the mdev to reference in libvirt
"""
return "mdev_" + mdev_uuid.replace('-', '_')
name = "mdev_" + mdev_uuid.replace('-', '_')
if parent and parent.startswith('pci_'):
name = name + parent[4:]
return name
def get_flags_by_flavor_specs(flavor: 'objects.Flavor') -> ty.Set[str]: