Recreate mediated devices on reboot

There is a lacking feature in libvirt (due to the nature of what is mediated
device as just a sysfs file) that mediated devices are not persisted.
When you reboot your host, you loose all of them which can be a big pain for
operators having allocated guests using vGPUs.

This change will iterate over all the instances, see if they have a nested
vGPU, check if the related mediated device exists, and if not, rebalance
between physical GPUs to find a proper one which can fit.

Note that due to the fact we don't persist neither in Nova the mediated device
information, mediated devices can be created on different physical devices that
they were before the reboot. That's not a big deal since we only support one
type at the moment, but that could become a problem later as we would need to
figure out which type the mediated device was before the reboot.

Partially-Implements: blueprint add-support-for-vgpu
Change-Id: Ie6c9108808a461359d717d8a9e9399c8a407bfe9
This commit is contained in:
Sylvain Bauza 2018-01-12 15:43:44 +01:00
parent a40c00957e
commit 26c8c6a86c
3 changed files with 74 additions and 13 deletions

View File

@ -18508,7 +18508,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
self.assertEqual([uuids.mdev1],
drvr._allocate_mdevs(allocations=allocations))
privsep_create_mdev.assert_called_once_with("0000:06:00.0",
'nvidia-11')
'nvidia-11',
uuid=None)
@mock.patch.object(nova.privsep.libvirt, 'create_mdev')
@mock.patch.object(libvirt_driver.LibvirtDriver,
@ -18563,6 +18564,43 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
self.assertEqual(set([uuids.mdev1]),
drvr._get_existing_mdevs_not_assigned())
@mock.patch.object(nova.privsep.libvirt, 'create_mdev')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_mdev_capable_devices')
@mock.patch.object(os.path, 'exists')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_all_assigned_mediated_devices')
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion',
return_value=versionutils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_MDEV_SUPPORT))
def test_recreate_mediated_device_on_init_host(
self, _get_libvirt_version,
get_all_assigned_mdevs, exists, get_mdev_capable_devs,
privsep_create_mdev):
self.flags(enabled_vgpu_types=['nvidia-11'], group='devices')
get_all_assigned_mdevs.return_value = {uuids.mdev1: uuids.inst1,
uuids.mdev2: uuids.inst2}
# Fake the fact that mdev1 is existing but mdev2 not
def _exists(path):
# Just verify what we ask
self.assertIn('/sys/bus/mdev/devices/', path)
return True if uuids.mdev1 in path else False
exists.side_effect = _exists
get_mdev_capable_devs.return_value = [
{"dev_id": "pci_0000_06_00_0",
"types": {'nvidia-11': {'availableInstances': 16,
'name': 'GRID M60-0B',
'deviceAPI': 'vfio-pci'},
}
}]
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
drvr.init_host(host='foo')
privsep_create_mdev.assert_called_once_with(
"0000:06:00.0", 'nvidia-11', uuid=uuids.mdev2)
class LibvirtVolumeUsageTestCase(test.NoDBTestCase):
"""Test for LibvirtDriver.get_all_volume_usage."""

View File

@ -565,6 +565,31 @@ class LibvirtDriver(driver.ComputeDriver):
'libvirt_ver': self._version_to_string(
MIN_LIBVIRT_OTHER_ARCH.get(kvm_arch))})
# TODO(sbauza): Remove this code once mediated devices are persisted
# across reboots.
if self._host.has_min_version(MIN_LIBVIRT_MDEV_SUPPORT):
self._recreate_assigned_mediated_devices()
@staticmethod
def _is_existing_mdev(uuid):
# FIXME(sbauza): Some kernel can have a uevent race meaning that the
# libvirt daemon won't know when a mediated device is created unless
# you restart that daemon. Until all kernels we support are not having
# that possible race, check the sysfs directly instead of asking the
# libvirt API.
# See https://bugzilla.redhat.com/show_bug.cgi?id=1376907 for ref.
return os.path.exists('/sys/bus/mdev/devices/{0}'.format(uuid))
def _recreate_assigned_mediated_devices(self):
"""Recreate assigned mdevs that could have disappeared if we reboot
the host.
"""
mdevs = self._get_all_assigned_mediated_devices()
requested_types = self._get_supported_vgpu_types()
for (mdev_uuid, instance_uuid) in six.iteritems(mdevs):
if not self._is_existing_mdev(mdev_uuid):
self._create_new_mediated_device(requested_types, mdev_uuid)
def _set_multiattach_support(self):
# Check to see if multiattach is supported. Based on bugzilla
# https://bugzilla.redhat.com/show_bug.cgi?id=1378242 and related
@ -5777,12 +5802,13 @@ class LibvirtDriver(driver.ComputeDriver):
for mdev in mdevs]) - set(allocated_mdevs)
return available_mdevs
def _create_new_mediated_device(self, requested_types):
def _create_new_mediated_device(self, requested_types, uuid=None):
"""Find a physical device that can support a new mediated device and
create it.
:param requested_types: Filter only capable devices supporting those
types.
:param uuid: The possible mdev UUID we want to create again
:returns: the newly created mdev UUID or None if not possible
"""
@ -5801,7 +5827,8 @@ class LibvirtDriver(driver.ComputeDriver):
# The libvirt name is like 'pci_0000_84_00_0'
pci_addr = "{}:{}:{}.{}".format(*dev_name[4:].split('_'))
chosen_mdev = nova.privsep.libvirt.create_mdev(pci_addr,
asked_type)
asked_type,
uuid=uuid)
return chosen_mdev
@utils.synchronized(VGPU_RESOURCE_SEMAPHORE)

View File

@ -34,16 +34,12 @@ features:
* For the moment, please don't restart instances (or suspend/resume them)
or the VGPU related device will be removed from the guest.
* Mediated devices that are created by the libvirt driver are not
persisted upon reboot. Consequently, a guest startup would fail since the
virtual device wouldn't be existing. In order to prevent that issue,
operators rebooting a compute node have to, before restarting
nova-compute service, look at all the guest XML configuration and
recreate mediated devices for existing guests by doing
..
echo <UUID> > /sys/class/mdev_bus/<device>/mdev_supported_types/<type>/create
* Mediated devices that are created by the libvirt driver are not persisted
upon reboot. Consequently, a guest startup would fail since the virtual
device wouldn't exist. In order to prevent that issue, when restarting
the compute service, the libvirt driver now looks at all the guest XMLs
to check if they have mediated devices, and if the mediated device no
longer exists, then Nova recreates it by using the same UUID.
* If you use Nvidia GRID cards, please know that there is a limitation with
the nvidia driver that prevents one guest to have more than one virtual