diff --git a/doc/source/admin/virtual-gpu.rst b/doc/source/admin/virtual-gpu.rst index 9bef6889c8a8..c7f295dda09f 100644 --- a/doc/source/admin/virtual-gpu.rst +++ b/doc/source/admin/virtual-gpu.rst @@ -301,6 +301,10 @@ Caveats that will cause the instance to be set back to ACTIVE. The ``suspend`` action in the ``os-instance-actions`` API will have an *Error* state. + .. versionchanged:: 25.0.0 + + This has been resolved in the Yoga release. See `bug 1948705`_. + * Resizing an instance with a new flavor that has vGPU resources doesn't allocate those vGPUs to the instance (the instance is created without vGPU resources). The proposed workaround is to rebuild the instance after @@ -350,6 +354,7 @@ For nested vGPUs: .. _bug 1778563: https://bugs.launchpad.net/nova/+bug/1778563 .. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688 +.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705 .. Links .. _Intel GVT-g: https://01.org/igvt-g diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 3fa6ae1e6567..35aa9f284f64 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -16563,9 +16563,15 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock.patch.object(guest, 'sync_guest_time'), mock.patch.object(drvr, '_wait_for_running', side_effect=loopingcall.LoopingCallDone()), + mock.patch.object(drvr, + '_get_mdevs_from_guest_config', + return_value='fake_mdevs'), + mock.patch.object(drvr, '_attach_mediated_devices'), ) as (_get_existing_domain_xml, _create_guest_with_network, _attach_pci_devices, get_instance_pci_devs, get_image_metadata, - mock_sync_time, mock_wait): + mock_sync_time, mock_wait, + _get_mdevs_from_guest_config, + _attach_mediated_devices): get_image_metadata.return_value = {'bar': 234} drvr.resume(self.context, instance, network_info, @@ -16580,6 +16586,9 @@ class LibvirtConnTestCase(test.NoDBTestCase, self.assertTrue(mock_sync_time.called) _attach_pci_devices.assert_has_calls([mock.call(guest, 'fake_pci_devs')]) + _attach_mediated_devices.assert_has_calls( + [mock.call(guest, 'fake_mdevs')] + ) @mock.patch.object(host.Host, '_get_domain') @mock.patch.object(libvirt_driver.LibvirtDriver, 'get_info') @@ -26073,6 +26082,55 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): self.assertRaises(test.TestingException, self._test_detach_mediated_devices, exc) + @mock.patch.object(libvirt_guest.Guest, 'attach_device') + def _test_attach_mediated_devices(self, side_effect, attach_device): + dom_without_vgpu = ( + """ + + + + + +
+ + """) + + vgpu_xml = ( + """ + + +
+ + +
+ + """) + + attach_device.side_effect = side_effect + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + guest = libvirt_guest.Guest(FakeVirtDomain(fake_xml=dom_without_vgpu)) + mdevs = drvr._get_mdevs_from_guest_config(vgpu_xml) + drvr._attach_mediated_devices(guest, mdevs) + return attach_device + + def test_attach_mediated_devices(self): + def fake_attach_device(cfg_obj, **kwargs): + self.assertIsInstance(cfg_obj, + vconfig.LibvirtConfigGuestHostdevMDEV) + + attach_mock = self._test_attach_mediated_devices(fake_attach_device) + attach_mock.assert_called_once_with(mock.ANY, live=True) + + def test_attach_mediated_devices_raises_exc(self): + exc = test.TestingException() + + self.assertRaises(test.TestingException, + self._test_attach_mediated_devices, exc) + def test_storage_bus_traits__qemu_kvm(self): """Test getting storage bus traits per virt type. """ diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 73953a7c0a48..a6b6146c92eb 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -3966,6 +3966,10 @@ class LibvirtDriver(driver.ComputeDriver): """resume the specified instance.""" xml = self._get_existing_domain_xml(instance, network_info, block_device_info) + # NOTE(gsantos): The mediated devices that were removed on suspension + # are still present in the xml. Let's take their references from it + # and re-attach them. + mdevs = self._get_mdevs_from_guest_config(xml) # NOTE(efried): The instance should already have a vtpm_secret_uuid # registered if appropriate. guest = self._create_guest_with_network( @@ -3975,6 +3979,7 @@ class LibvirtDriver(driver.ComputeDriver): pci_manager.get_instance_pci_devs(instance)) self._attach_direct_passthrough_ports( context, instance, guest, network_info) + self._attach_mediated_devices(guest, mdevs) timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_running, instance) timer.start(interval=0.5).wait() @@ -8000,12 +8005,6 @@ class LibvirtDriver(driver.ComputeDriver): guest.detach_device(mdev_cfg, live=True) except libvirt.libvirtError as ex: error_code = ex.get_error_code() - # NOTE(sbauza): There is a pending issue with libvirt that - # doesn't allow to hot-unplug mediated devices. Let's - # short-circuit the suspend action and set the instance back - # to ACTIVE. - # TODO(sbauza): Once libvirt supports this, amend the resume() - # operation to support reallocating mediated devices. if error_code == libvirt.VIR_ERR_CONFIG_UNSUPPORTED: reason = _("Suspend is not supported for instances having " "attached mediated devices.") @@ -8014,6 +8013,38 @@ class LibvirtDriver(driver.ComputeDriver): else: raise + def _attach_mediated_devices(self, guest, devs): + for mdev_cfg in devs: + try: + guest.attach_device(mdev_cfg, live=True) + except libvirt.libvirtError as ex: + error_code = ex.get_error_code() + if error_code == libvirt.VIR_ERR_DEVICE_MISSING: + LOG.warning("The mediated device %s was not found and " + "won't be reattached to %s.", mdev_cfg, guest) + else: + raise + + def _get_mdevs_from_guest_config(self, xml): + """Get all libvirt's mediated devices from a guest's config (XML) file. + We don't have to worry about those devices being used by another guest, + since they remain allocated for the current guest as long as they are + present in the XML. + + :param xml: The XML from the guest we want to get a list of mdevs from. + + :returns: A list containing the objects that represent the mediated + devices attached to the guest's config passed as argument. + """ + config = vconfig.LibvirtConfigGuest() + config.parse_str(xml) + + devs = [] + for dev in config.devices: + if isinstance(dev, vconfig.LibvirtConfigGuestHostdevMDEV): + devs.append(dev) + return devs + def _has_numa_support(self): # This means that the host can support LibvirtConfigGuestNUMATune # and the nodeset field in LibvirtConfigGuestMemoryBackingPage diff --git a/releasenotes/notes/bug-1948705-ff80ae392c525475.yaml b/releasenotes/notes/bug-1948705-ff80ae392c525475.yaml new file mode 100644 index 000000000000..cf46ebdca1f6 --- /dev/null +++ b/releasenotes/notes/bug-1948705-ff80ae392c525475.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Amended the guest resume operation to support mediated devices, as + libvirt's minimum required version (v6.0.0) supports the hot-plug/unplug of + mediated devices, which was addressed in v4.3.0.