From 16f7c601b63bd1e7ca13917261300a7064ec72bc Mon Sep 17 00:00:00 2001 From: Gustavo Santos Date: Mon, 25 Oct 2021 16:32:10 -0300 Subject: [PATCH] Reattach mdevs to guest on resume When suspending a VM in OpenStack, Nova detaches all the mediated devices from the guest machine, but does not reattach them on the resume operation. This patch makes Nova reattach the mdevs that were detached when the guest was suspended. This behavior is due to libvirt not supporting the hot-unplug of mediated devices at the time the feature was being developed. The limitation has been lifted since then, and now we have to amend the resume function so it will reattach the mediated devices that were detached on suspension. Closes-bug: #1948705 Signed-off-by: Gustavo Santos Change-Id: I083929f36d9e78bf7713a87cae6d581e0d946867 --- doc/source/admin/virtual-gpu.rst | 5 ++ nova/tests/unit/virt/libvirt/test_driver.py | 60 ++++++++++++++++++- nova/virt/libvirt/driver.py | 43 +++++++++++-- .../notes/bug-1948705-ff80ae392c525475.yaml | 6 ++ 4 files changed, 107 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/bug-1948705-ff80ae392c525475.yaml diff --git a/doc/source/admin/virtual-gpu.rst b/doc/source/admin/virtual-gpu.rst index 9bef6889c8a8..c7f295dda09f 100644 --- a/doc/source/admin/virtual-gpu.rst +++ b/doc/source/admin/virtual-gpu.rst @@ -301,6 +301,10 @@ Caveats that will cause the instance to be set back to ACTIVE. The ``suspend`` action in the ``os-instance-actions`` API will have an *Error* state. + .. versionchanged:: 25.0.0 + + This has been resolved in the Yoga release. See `bug 1948705`_. + * Resizing an instance with a new flavor that has vGPU resources doesn't allocate those vGPUs to the instance (the instance is created without vGPU resources). The proposed workaround is to rebuild the instance after @@ -350,6 +354,7 @@ For nested vGPUs: .. _bug 1778563: https://bugs.launchpad.net/nova/+bug/1778563 .. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688 +.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705 .. Links .. _Intel GVT-g: https://01.org/igvt-g diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 3fa6ae1e6567..35aa9f284f64 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -16563,9 +16563,15 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock.patch.object(guest, 'sync_guest_time'), mock.patch.object(drvr, '_wait_for_running', side_effect=loopingcall.LoopingCallDone()), + mock.patch.object(drvr, + '_get_mdevs_from_guest_config', + return_value='fake_mdevs'), + mock.patch.object(drvr, '_attach_mediated_devices'), ) as (_get_existing_domain_xml, _create_guest_with_network, _attach_pci_devices, get_instance_pci_devs, get_image_metadata, - mock_sync_time, mock_wait): + mock_sync_time, mock_wait, + _get_mdevs_from_guest_config, + _attach_mediated_devices): get_image_metadata.return_value = {'bar': 234} drvr.resume(self.context, instance, network_info, @@ -16580,6 +16586,9 @@ class LibvirtConnTestCase(test.NoDBTestCase, self.assertTrue(mock_sync_time.called) _attach_pci_devices.assert_has_calls([mock.call(guest, 'fake_pci_devs')]) + _attach_mediated_devices.assert_has_calls( + [mock.call(guest, 'fake_mdevs')] + ) @mock.patch.object(host.Host, '_get_domain') @mock.patch.object(libvirt_driver.LibvirtDriver, 'get_info') @@ -26073,6 +26082,55 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): self.assertRaises(test.TestingException, self._test_detach_mediated_devices, exc) + @mock.patch.object(libvirt_guest.Guest, 'attach_device') + def _test_attach_mediated_devices(self, side_effect, attach_device): + dom_without_vgpu = ( + """ + + + + + +
+ + """) + + vgpu_xml = ( + """ + + +
+ + +
+ + """) + + attach_device.side_effect = side_effect + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + guest = libvirt_guest.Guest(FakeVirtDomain(fake_xml=dom_without_vgpu)) + mdevs = drvr._get_mdevs_from_guest_config(vgpu_xml) + drvr._attach_mediated_devices(guest, mdevs) + return attach_device + + def test_attach_mediated_devices(self): + def fake_attach_device(cfg_obj, **kwargs): + self.assertIsInstance(cfg_obj, + vconfig.LibvirtConfigGuestHostdevMDEV) + + attach_mock = self._test_attach_mediated_devices(fake_attach_device) + attach_mock.assert_called_once_with(mock.ANY, live=True) + + def test_attach_mediated_devices_raises_exc(self): + exc = test.TestingException() + + self.assertRaises(test.TestingException, + self._test_attach_mediated_devices, exc) + def test_storage_bus_traits__qemu_kvm(self): """Test getting storage bus traits per virt type. """ diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 73953a7c0a48..a6b6146c92eb 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -3966,6 +3966,10 @@ class LibvirtDriver(driver.ComputeDriver): """resume the specified instance.""" xml = self._get_existing_domain_xml(instance, network_info, block_device_info) + # NOTE(gsantos): The mediated devices that were removed on suspension + # are still present in the xml. Let's take their references from it + # and re-attach them. + mdevs = self._get_mdevs_from_guest_config(xml) # NOTE(efried): The instance should already have a vtpm_secret_uuid # registered if appropriate. guest = self._create_guest_with_network( @@ -3975,6 +3979,7 @@ class LibvirtDriver(driver.ComputeDriver): pci_manager.get_instance_pci_devs(instance)) self._attach_direct_passthrough_ports( context, instance, guest, network_info) + self._attach_mediated_devices(guest, mdevs) timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_running, instance) timer.start(interval=0.5).wait() @@ -8000,12 +8005,6 @@ class LibvirtDriver(driver.ComputeDriver): guest.detach_device(mdev_cfg, live=True) except libvirt.libvirtError as ex: error_code = ex.get_error_code() - # NOTE(sbauza): There is a pending issue with libvirt that - # doesn't allow to hot-unplug mediated devices. Let's - # short-circuit the suspend action and set the instance back - # to ACTIVE. - # TODO(sbauza): Once libvirt supports this, amend the resume() - # operation to support reallocating mediated devices. if error_code == libvirt.VIR_ERR_CONFIG_UNSUPPORTED: reason = _("Suspend is not supported for instances having " "attached mediated devices.") @@ -8014,6 +8013,38 @@ class LibvirtDriver(driver.ComputeDriver): else: raise + def _attach_mediated_devices(self, guest, devs): + for mdev_cfg in devs: + try: + guest.attach_device(mdev_cfg, live=True) + except libvirt.libvirtError as ex: + error_code = ex.get_error_code() + if error_code == libvirt.VIR_ERR_DEVICE_MISSING: + LOG.warning("The mediated device %s was not found and " + "won't be reattached to %s.", mdev_cfg, guest) + else: + raise + + def _get_mdevs_from_guest_config(self, xml): + """Get all libvirt's mediated devices from a guest's config (XML) file. + We don't have to worry about those devices being used by another guest, + since they remain allocated for the current guest as long as they are + present in the XML. + + :param xml: The XML from the guest we want to get a list of mdevs from. + + :returns: A list containing the objects that represent the mediated + devices attached to the guest's config passed as argument. + """ + config = vconfig.LibvirtConfigGuest() + config.parse_str(xml) + + devs = [] + for dev in config.devices: + if isinstance(dev, vconfig.LibvirtConfigGuestHostdevMDEV): + devs.append(dev) + return devs + def _has_numa_support(self): # This means that the host can support LibvirtConfigGuestNUMATune # and the nodeset field in LibvirtConfigGuestMemoryBackingPage diff --git a/releasenotes/notes/bug-1948705-ff80ae392c525475.yaml b/releasenotes/notes/bug-1948705-ff80ae392c525475.yaml new file mode 100644 index 000000000000..cf46ebdca1f6 --- /dev/null +++ b/releasenotes/notes/bug-1948705-ff80ae392c525475.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Amended the guest resume operation to support mediated devices, as + libvirt's minimum required version (v6.0.0) supports the hot-plug/unplug of + mediated devices, which was addressed in v4.3.0.