Merge "Avoid suspending guest with attached vGPUs"

This commit is contained in:
Zuul 2018-01-31 00:29:53 +00:00 committed by Gerrit Code Review
commit 64c452d568
3 changed files with 82 additions and 1 deletions

View File

@ -12499,6 +12499,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
@mock.patch.object(dmcrypt, 'delete_volume')
@mock.patch.object(conn, '_get_instance_disk_info_from_config',
return_value=[])
@mock.patch.object(conn, '_detach_mediated_devices')
@mock.patch.object(conn, '_detach_direct_passthrough_ports')
@mock.patch.object(conn, '_detach_pci_devices')
@mock.patch.object(pci_manager, 'get_instance_pci_devs',
@ -12507,6 +12508,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
def suspend(mock_get_guest, mock_get_instance_pci_devs,
mock_detach_pci_devices,
mock_detach_direct_passthrough_ports,
mock_detach_mediated_devices,
mock_get_instance_disk_info,
mock_delete_volume):
mock_managedSave = mock.Mock()
@ -18817,6 +18819,59 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
privsep_create_mdev.assert_called_once_with(
"0000:06:00.0", 'nvidia-11', uuid=uuids.mdev2)
@mock.patch.object(libvirt_guest.Guest, 'detach_device')
def _test_detach_mediated_devices(self, side_effect, detach_device):
dom_with_vgpu = (
"""<domain> <devices>
<disk type='file' device='disk'>
<driver name='qemu' type='qcow2' cache='none'/>
<source file='xxx'/>
<target dev='vda' bus='virtio'/>
<alias name='virtio-disk0'/>
<address type='pci' domain='0x0000' bus='0x00'
slot='0x04' function='0x0'/>
</disk>
<hostdev mode='subsystem' type='mdev' managed='no'
model='vfio-pci'>
<source>
<address uuid='81db53c6-6659-42a0-a34c-1507fdc72983'/>
</source>
<alias name='hostdev0'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x05'
function='0x0'/>
</hostdev>
</devices></domain>""")
detach_device.side_effect = side_effect
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
guest = libvirt_guest.Guest(FakeVirtDomain(fake_xml=dom_with_vgpu))
drvr._detach_mediated_devices(guest)
return detach_device
def test_detach_mediated_devices(self):
def fake_detach_device(cfg_obj, **kwargs):
self.assertIsInstance(cfg_obj,
vconfig.LibvirtConfigGuestHostdevMDEV)
detach_mock = self._test_detach_mediated_devices(fake_detach_device)
detach_mock.assert_called_once_with(mock.ANY, live=True)
def test_detach_mediated_devices_raises_exc_unsupported(self):
exc = fakelibvirt.make_libvirtError(
fakelibvirt.libvirtError, 'virDomainDetachDeviceFlags() failed',
error_code=fakelibvirt.VIR_ERR_CONFIG_UNSUPPORTED)
self.assertRaises(exception.InstanceFaultRollback,
self._test_detach_mediated_devices, exc)
def test_detach_mediated_devices_raises_exc(self):
exc = test.TestingException()
self.assertRaises(test.TestingException,
self._test_detach_mediated_devices, exc)
class LibvirtVolumeUsageTestCase(test.NoDBTestCase):
"""Test for LibvirtDriver.get_all_volume_usage."""

View File

@ -2780,6 +2780,7 @@ class LibvirtDriver(driver.ComputeDriver):
self._detach_pci_devices(guest,
pci_manager.get_instance_pci_devs(instance))
self._detach_direct_passthrough_ports(context, instance, guest)
self._detach_mediated_devices(guest)
guest.save_memory_state()
def resume(self, context, instance, network_info, block_device_info=None):
@ -5910,6 +5911,28 @@ class LibvirtDriver(driver.ComputeDriver):
chosen_mdevs.append(chosen_mdev)
return chosen_mdevs
def _detach_mediated_devices(self, guest):
mdevs = guest.get_all_devices(
devtype=vconfig.LibvirtConfigGuestHostdevMDEV)
for mdev_cfg in mdevs:
try:
guest.detach_device(mdev_cfg, live=True)
except libvirt.libvirtError as ex:
error_code = ex.get_error_code()
# NOTE(sbauza): There is a pending issue with libvirt that
# doesn't allow to hot-unplug mediated devices. Let's
# short-circuit the suspend action and set the instance back
# to ACTIVE.
# TODO(sbauza): Once libvirt supports this, amend the resume()
# operation to support reallocating mediated devices.
if error_code == libvirt.VIR_ERR_CONFIG_UNSUPPORTED:
reason = _("Suspend is not supported for instances having "
"attached vGPUs.")
raise exception.InstanceFaultRollback(
exception.InstanceSuspendFailure(reason=reason))
else:
raise
def _has_numa_support(self):
# This means that the host can support LibvirtConfigGuestNUMATune
# and the nodeset field in LibvirtConfigGuestMemoryBackingPage

View File

@ -33,7 +33,10 @@ features:
* Suspending a guest having vGPUs doesn't work yet given a libvirt concern
(it can't hot-unplug mediated devices from a guest). Workarounds using
other instance actions (like snapshotting the instance or shelving it)
are recommended until libvirt supports that.
are recommended until libvirt supports that. If a user asks to suspend
the instance, Nova will get an exception that will set the instance state
back to ACTIVE, and you can see the suspend action in os-instance-action
API will be Error.
* Resizing an instance with a new flavor that has vGPU resources doesn't
allocate those vGPUs to the instance (the instance is created without