Recreate mediated devices on reboot
There is a lacking feature in libvirt (due to the nature of what is mediated device as just a sysfs file) that mediated devices are not persisted. When you reboot your host, you loose all of them which can be a big pain for operators having allocated guests using vGPUs. This change will iterate over all the instances, see if they have a nested vGPU, check if the related mediated device exists, and if not, rebalance between physical GPUs to find a proper one which can fit. Note that due to the fact we don't persist neither in Nova the mediated device information, mediated devices can be created on different physical devices that they were before the reboot. That's not a big deal since we only support one type at the moment, but that could become a problem later as we would need to figure out which type the mediated device was before the reboot. Partially-Implements: blueprint add-support-for-vgpu Change-Id: Ie6c9108808a461359d717d8a9e9399c8a407bfe9
This commit is contained in:
parent
a40c00957e
commit
26c8c6a86c
|
@ -18508,7 +18508,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
|
|||
self.assertEqual([uuids.mdev1],
|
||||
drvr._allocate_mdevs(allocations=allocations))
|
||||
privsep_create_mdev.assert_called_once_with("0000:06:00.0",
|
||||
'nvidia-11')
|
||||
'nvidia-11',
|
||||
uuid=None)
|
||||
|
||||
@mock.patch.object(nova.privsep.libvirt, 'create_mdev')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
|
@ -18563,6 +18564,43 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
|
|||
self.assertEqual(set([uuids.mdev1]),
|
||||
drvr._get_existing_mdevs_not_assigned())
|
||||
|
||||
@mock.patch.object(nova.privsep.libvirt, 'create_mdev')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_get_mdev_capable_devices')
|
||||
@mock.patch.object(os.path, 'exists')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_get_all_assigned_mediated_devices')
|
||||
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion',
|
||||
return_value=versionutils.convert_version_to_int(
|
||||
libvirt_driver.MIN_LIBVIRT_MDEV_SUPPORT))
|
||||
def test_recreate_mediated_device_on_init_host(
|
||||
self, _get_libvirt_version,
|
||||
get_all_assigned_mdevs, exists, get_mdev_capable_devs,
|
||||
privsep_create_mdev):
|
||||
self.flags(enabled_vgpu_types=['nvidia-11'], group='devices')
|
||||
get_all_assigned_mdevs.return_value = {uuids.mdev1: uuids.inst1,
|
||||
uuids.mdev2: uuids.inst2}
|
||||
|
||||
# Fake the fact that mdev1 is existing but mdev2 not
|
||||
def _exists(path):
|
||||
# Just verify what we ask
|
||||
self.assertIn('/sys/bus/mdev/devices/', path)
|
||||
return True if uuids.mdev1 in path else False
|
||||
|
||||
exists.side_effect = _exists
|
||||
get_mdev_capable_devs.return_value = [
|
||||
{"dev_id": "pci_0000_06_00_0",
|
||||
"types": {'nvidia-11': {'availableInstances': 16,
|
||||
'name': 'GRID M60-0B',
|
||||
'deviceAPI': 'vfio-pci'},
|
||||
}
|
||||
}]
|
||||
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
|
||||
drvr.init_host(host='foo')
|
||||
privsep_create_mdev.assert_called_once_with(
|
||||
"0000:06:00.0", 'nvidia-11', uuid=uuids.mdev2)
|
||||
|
||||
|
||||
class LibvirtVolumeUsageTestCase(test.NoDBTestCase):
|
||||
"""Test for LibvirtDriver.get_all_volume_usage."""
|
||||
|
|
|
@ -565,6 +565,31 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
'libvirt_ver': self._version_to_string(
|
||||
MIN_LIBVIRT_OTHER_ARCH.get(kvm_arch))})
|
||||
|
||||
# TODO(sbauza): Remove this code once mediated devices are persisted
|
||||
# across reboots.
|
||||
if self._host.has_min_version(MIN_LIBVIRT_MDEV_SUPPORT):
|
||||
self._recreate_assigned_mediated_devices()
|
||||
|
||||
@staticmethod
|
||||
def _is_existing_mdev(uuid):
|
||||
# FIXME(sbauza): Some kernel can have a uevent race meaning that the
|
||||
# libvirt daemon won't know when a mediated device is created unless
|
||||
# you restart that daemon. Until all kernels we support are not having
|
||||
# that possible race, check the sysfs directly instead of asking the
|
||||
# libvirt API.
|
||||
# See https://bugzilla.redhat.com/show_bug.cgi?id=1376907 for ref.
|
||||
return os.path.exists('/sys/bus/mdev/devices/{0}'.format(uuid))
|
||||
|
||||
def _recreate_assigned_mediated_devices(self):
|
||||
"""Recreate assigned mdevs that could have disappeared if we reboot
|
||||
the host.
|
||||
"""
|
||||
mdevs = self._get_all_assigned_mediated_devices()
|
||||
requested_types = self._get_supported_vgpu_types()
|
||||
for (mdev_uuid, instance_uuid) in six.iteritems(mdevs):
|
||||
if not self._is_existing_mdev(mdev_uuid):
|
||||
self._create_new_mediated_device(requested_types, mdev_uuid)
|
||||
|
||||
def _set_multiattach_support(self):
|
||||
# Check to see if multiattach is supported. Based on bugzilla
|
||||
# https://bugzilla.redhat.com/show_bug.cgi?id=1378242 and related
|
||||
|
@ -5777,12 +5802,13 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
for mdev in mdevs]) - set(allocated_mdevs)
|
||||
return available_mdevs
|
||||
|
||||
def _create_new_mediated_device(self, requested_types):
|
||||
def _create_new_mediated_device(self, requested_types, uuid=None):
|
||||
"""Find a physical device that can support a new mediated device and
|
||||
create it.
|
||||
|
||||
:param requested_types: Filter only capable devices supporting those
|
||||
types.
|
||||
:param uuid: The possible mdev UUID we want to create again
|
||||
|
||||
:returns: the newly created mdev UUID or None if not possible
|
||||
"""
|
||||
|
@ -5801,7 +5827,8 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
# The libvirt name is like 'pci_0000_84_00_0'
|
||||
pci_addr = "{}:{}:{}.{}".format(*dev_name[4:].split('_'))
|
||||
chosen_mdev = nova.privsep.libvirt.create_mdev(pci_addr,
|
||||
asked_type)
|
||||
asked_type,
|
||||
uuid=uuid)
|
||||
return chosen_mdev
|
||||
|
||||
@utils.synchronized(VGPU_RESOURCE_SEMAPHORE)
|
||||
|
|
|
@ -34,16 +34,12 @@ features:
|
|||
* For the moment, please don't restart instances (or suspend/resume them)
|
||||
or the VGPU related device will be removed from the guest.
|
||||
|
||||
* Mediated devices that are created by the libvirt driver are not
|
||||
persisted upon reboot. Consequently, a guest startup would fail since the
|
||||
virtual device wouldn't be existing. In order to prevent that issue,
|
||||
operators rebooting a compute node have to, before restarting
|
||||
nova-compute service, look at all the guest XML configuration and
|
||||
recreate mediated devices for existing guests by doing
|
||||
|
||||
..
|
||||
|
||||
echo <UUID> > /sys/class/mdev_bus/<device>/mdev_supported_types/<type>/create
|
||||
* Mediated devices that are created by the libvirt driver are not persisted
|
||||
upon reboot. Consequently, a guest startup would fail since the virtual
|
||||
device wouldn't exist. In order to prevent that issue, when restarting
|
||||
the compute service, the libvirt driver now looks at all the guest XMLs
|
||||
to check if they have mediated devices, and if the mediated device no
|
||||
longer exists, then Nova recreates it by using the same UUID.
|
||||
|
||||
* If you use Nvidia GRID cards, please know that there is a limitation with
|
||||
the nvidia driver that prevents one guest to have more than one virtual
|
||||
|
|
Loading…
Reference in New Issue