Handle mdev devices in libvirt 7.7+
Libvirt 7.7 changed the mdev device naming to include the parent PCI device when listing node devices. The domain, however, will still only see the UUID and not see the parent PCI device. Changing the parsing to simply drop the PCI identifier is not enough as the device cannot be found when attempting to lookup the new ID. Modify the Libvirt Driver's _get_mediated_device_information to tolerate different formats of the mdev name. This first uses the legacy behavior by trying to lookup the device name that is passed in (typically mdev_<uuid> format) and if that is not found, iterates the list of mdev node devices until the right UUID is found and selects that one. Note that the lookup of the mdev device by UUID are needed in order to keep the ability to recreate assigned mediated devices on a reboot of the compute node. Additionally, the libvirt utils parsing method mdev_name2uuid, has been updated to tolerate both mdev_<uuid> and mdev_<uuid>_<pciid> formats. Closes-Bug: 1951656 Change-Id: Ifed0fa16053228990a6a8df8d4c666521db7e329
This commit is contained in:
committed by
Sylvain Bauza
parent
1852019747
commit
a28b907c4f
@@ -63,21 +63,11 @@ class VGPUTestsLibvirt7_7(test_vgpu.VGPUTestBase):
|
||||
flavor_id=self.flavor, host=self.compute1.host,
|
||||
networks='auto', expected_state='ACTIVE')
|
||||
|
||||
# TODO(sbauza): Modify this once bug #1851656 is fixed.
|
||||
# mdev_name2uuid() raises a badly formed hexadecimal UUID string error
|
||||
self.assertRaises(ValueError,
|
||||
self.assert_mdev_usage,
|
||||
self.compute1, expected_amount=1)
|
||||
self.assert_mdev_usage(self.compute1, expected_amount=1)
|
||||
|
||||
# Now, the problem is that we can't create new instances with VGPUs
|
||||
# from this host.
|
||||
server = self._create_server(
|
||||
self._create_server(
|
||||
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||
flavor_id=self.flavor, host=self.compute1.host,
|
||||
networks='auto', expected_state='ERROR')
|
||||
# The error is due to a bad mdev name parsing
|
||||
self.assertIn('fault', server)
|
||||
# since we only have one host, we have a RescheduledException as this
|
||||
# service was creating an exception and we can't use another one.
|
||||
self.assertIn('Exceeded maximum number of retries',
|
||||
server['fault']['message'])
|
||||
networks='auto', expected_state='ACTIVE')
|
||||
|
||||
self.assert_mdev_usage(self.compute1, expected_amount=2)
|
||||
|
||||
@@ -3181,6 +3181,32 @@ class LibvirtConfigNodeDeviceTest(LibvirtConfigBaseTest):
|
||||
config.LibvirtConfigNodeDeviceMdevInformation)
|
||||
self.assertEqual("nvidia-11", obj.mdev_information.type)
|
||||
self.assertEqual(12, obj.mdev_information.iommu_group)
|
||||
self.assertIsNone(obj.mdev_information.uuid)
|
||||
|
||||
def test_config_mdev_device_uuid(self):
|
||||
xmlin = """
|
||||
<device>
|
||||
<name>mdev_b2107403_110c_45b0_af87_32cc91597b8a_0000_41_00_0</name>
|
||||
<path>/sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0/b2107403-110c-45b0-af87-32cc91597b8a</path>
|
||||
<parent>pci_0000_41_00_0</parent>
|
||||
<driver>
|
||||
<name>vfio_mdev</name>
|
||||
</driver>
|
||||
<capability type='mdev'>
|
||||
<type id='nvidia-442'/>
|
||||
<uuid>b2107403-110c-45b0-af87-32cc91597b8a</uuid>
|
||||
<iommuGroup number='57'/>
|
||||
</capability>
|
||||
</device>"""
|
||||
|
||||
obj = config.LibvirtConfigNodeDevice()
|
||||
obj.parse_str(xmlin)
|
||||
self.assertIsInstance(obj.mdev_information,
|
||||
config.LibvirtConfigNodeDeviceMdevInformation)
|
||||
self.assertEqual("nvidia-442", obj.mdev_information.type)
|
||||
self.assertEqual(57, obj.mdev_information.iommu_group)
|
||||
self.assertEqual("b2107403-110c-45b0-af87-32cc91597b8a",
|
||||
obj.mdev_information.uuid)
|
||||
|
||||
def test_config_vdpa_device(self):
|
||||
xmlin = """
|
||||
|
||||
@@ -3382,6 +3382,7 @@ class LibvirtConfigNodeDeviceMdevInformation(LibvirtConfigObject):
|
||||
root_name="capability", **kwargs)
|
||||
self.type = None
|
||||
self.iommu_group = None
|
||||
self.uuid = None
|
||||
|
||||
def parse_dom(self, xmldoc):
|
||||
super(LibvirtConfigNodeDeviceMdevInformation,
|
||||
@@ -3391,6 +3392,8 @@ class LibvirtConfigNodeDeviceMdevInformation(LibvirtConfigObject):
|
||||
self.type = c.get('id')
|
||||
if c.tag == "iommuGroup":
|
||||
self.iommu_group = int(c.get('number'))
|
||||
if c.tag == "uuid":
|
||||
self.uuid = c.text
|
||||
|
||||
|
||||
class LibvirtConfigNodeDeviceVpdCap(LibvirtConfigObject):
|
||||
|
||||
@@ -8227,15 +8227,52 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
|
||||
def _get_mediated_device_information(self, devname):
|
||||
"""Returns a dict of a mediated device."""
|
||||
virtdev = self._host.device_lookup_by_name(devname)
|
||||
# LP #1951656 - In Libvirt 7.7, the mdev name now includes the PCI
|
||||
# address of the parent device (e.g. mdev_<uuid>_<pci_address>) due to
|
||||
# the mdevctl allowing for multiple mediated devs having the same UUID
|
||||
# defined (only one can be active at a time). Since the guest
|
||||
# information doesn't have the parent ID, try to lookup which
|
||||
# mediated device is available that matches the UUID. If multiple
|
||||
# devices are found that match the UUID, then this is an error
|
||||
# condition.
|
||||
try:
|
||||
virtdev = self._host.device_lookup_by_name(devname)
|
||||
except libvirt.libvirtError as ex:
|
||||
if ex.get_error_code() != libvirt.VIR_ERR_NO_NODE_DEVICE:
|
||||
raise
|
||||
mdevs = [dev for dev in self._host.list_mediated_devices()
|
||||
if dev.startswith(devname)]
|
||||
# If no matching devices are found, simply raise the original
|
||||
# exception indicating that no devices are found.
|
||||
if not mdevs:
|
||||
raise
|
||||
elif len(mdevs) > 1:
|
||||
msg = ("The mediated device name %(devname)s refers to a UUID "
|
||||
"that is present in multiple libvirt mediated devices. "
|
||||
"Matching libvirt mediated devices are %(devices)s. "
|
||||
"Mediated device UUIDs must be unique for Nova." %
|
||||
{'devname': devname,
|
||||
'devices': ', '.join(mdevs)})
|
||||
raise exception.InvalidLibvirtMdevConfig(reason=msg)
|
||||
|
||||
LOG.debug('Found requested device %s as %s. Using that.',
|
||||
devname, mdevs[0])
|
||||
virtdev = self._host.device_lookup_by_name(mdevs[0])
|
||||
xmlstr = virtdev.XMLDesc(0)
|
||||
cfgdev = vconfig.LibvirtConfigNodeDevice()
|
||||
cfgdev.parse_str(xmlstr)
|
||||
# Starting with Libvirt 7.3, the uuid information is available in the
|
||||
# node device information. If its there, use that. Otherwise,
|
||||
# fall back to the previous behavior of parsing the uuid from the
|
||||
# devname.
|
||||
if cfgdev.mdev_information.uuid:
|
||||
mdev_uuid = cfgdev.mdev_information.uuid
|
||||
else:
|
||||
mdev_uuid = libvirt_utils.mdev_name2uuid(cfgdev.name)
|
||||
|
||||
device = {
|
||||
"dev_id": cfgdev.name,
|
||||
# name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4
|
||||
"uuid": libvirt_utils.mdev_name2uuid(cfgdev.name),
|
||||
"uuid": mdev_uuid,
|
||||
# the physical GPU PCI device
|
||||
"parent": cfgdev.parent,
|
||||
"type": cfgdev.mdev_information.type,
|
||||
|
||||
@@ -1566,7 +1566,7 @@ class Host(object):
|
||||
def list_mediated_devices(self, flags=0):
|
||||
"""Lookup mediated devices.
|
||||
|
||||
:returns: a list of virNodeDevice instance
|
||||
:returns: a list of strings with the name of the instance
|
||||
"""
|
||||
return self._list_devices("mdev", flags=flags)
|
||||
|
||||
|
||||
@@ -575,17 +575,31 @@ def get_default_machine_type(arch: str) -> ty.Optional[str]:
|
||||
|
||||
|
||||
def mdev_name2uuid(mdev_name: str) -> str:
|
||||
"""Convert an mdev name (of the form mdev_<uuid_with_underscores>) to a
|
||||
uuid (of the form 8-4-4-4-12).
|
||||
"""Convert an mdev name (of the form mdev_<uuid_with_underscores> or
|
||||
mdev_<uuid_with_underscores>_<pciaddress>) to a uuid
|
||||
(of the form 8-4-4-4-12).
|
||||
|
||||
:param mdev_name: the name of the mdev to parse the UUID from
|
||||
:returns: string containing the uuid
|
||||
"""
|
||||
return str(uuid.UUID(mdev_name[5:].replace('_', '-')))
|
||||
mdev_uuid = mdev_name[5:].replace('_', '-')
|
||||
# Unconditionnally remove the PCI address from the name
|
||||
mdev_uuid = mdev_uuid[:36]
|
||||
return str(uuid.UUID(mdev_uuid))
|
||||
|
||||
|
||||
def mdev_uuid2name(mdev_uuid: str) -> str:
|
||||
"""Convert an mdev uuid (of the form 8-4-4-4-12) to a name (of the form
|
||||
mdev_<uuid_with_underscores>).
|
||||
def mdev_uuid2name(mdev_uuid: str, parent: str = None) -> str:
|
||||
"""Convert an mdev uuid (of the form 8-4-4-4-12) and optionally its parent
|
||||
device to a name (of the form mdev_<uuid_with_underscores>[_<pciid>]).
|
||||
|
||||
:param mdev_uuid: the uuid of the mediated device
|
||||
:param parent: the parent device id for the mediated device
|
||||
:returns: name of the mdev to reference in libvirt
|
||||
"""
|
||||
return "mdev_" + mdev_uuid.replace('-', '_')
|
||||
name = "mdev_" + mdev_uuid.replace('-', '_')
|
||||
if parent and parent.startswith('pci_'):
|
||||
name = name + parent[4:]
|
||||
return name
|
||||
|
||||
|
||||
def get_flags_by_flavor_specs(flavor: 'objects.Flavor') -> ty.Set[str]:
|
||||
|
||||
Reference in New Issue
Block a user