From f304b9eaadfd33c7ccdd6af2f60f299c3362ba1c Mon Sep 17 00:00:00 2001 From: melanie witt Date: Fri, 18 Oct 2024 02:54:02 +0000 Subject: [PATCH] libvirt: Wrap un-proxied listDevices() and listAllDevices() This is similar to change I668643c836d46a25df46d4c99a973af5e50a39db where the objects returned in a list from a libvirt call were not tpool.Proxy wrapped. Because the objects are not wrapped, calling methods on them such as listCaps() can block all other greenthreads and can cause nova-compute to freeze for hours in certain scenarios. This adds the same wrapping to libvirt calls which return lists of virNodeDevice. Closes-Bug: #2091033 Change-Id: I60d6f04d374e9ede5895a43b7a75e955b0fea3c5 --- nova/tests/unit/virt/libvirt/test_host.py | 42 +++++++++++++++++++ nova/virt/libvirt/host.py | 9 +++- ...libvirt-list-devices-7cd218c1a33535c9.yaml | 11 +++++ 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/unproxied-libvirt-list-devices-7cd218c1a33535c9.yaml diff --git a/nova/tests/unit/virt/libvirt/test_host.py b/nova/tests/unit/virt/libvirt/test_host.py index 6f4f413befce..5394671584a5 100644 --- a/nova/tests/unit/virt/libvirt/test_host.py +++ b/nova/tests/unit/virt/libvirt/test_host.py @@ -2245,6 +2245,48 @@ class LibvirtTpoolProxyTestCase(test.NoDBTestCase): self.assertIsInstance(domain, tpool.Proxy) self.assertIn(domain.UUIDString(), (uuids.vm1, uuids.vm2)) + def _add_fake_host_devices(self): + self.conn._obj.pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=1, num_pfs=2, num_vfs=2, num_mdevcap=3) + mdevs = { + 'mdev_4b20d080_1b54_4048_85b3_a6a62d165c01': + fakelibvirt.FakeMdevDevice( + dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c01', + type_id=fakelibvirt.NVIDIA_11_VGPU_TYPE, + parent=fakelibvirt.MDEVCAP_DEV1_PCI_ADDR), + } + self.conn._obj.mdev_info = fakelibvirt.HostMdevDevicesInfo( + devices=mdevs) + + def test_tpool_list_all_devices(self): + self._add_fake_host_devices() + devs = self.host.list_all_devices( + fakelibvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV) + self.assertEqual(8, len(devs)) + for dev in devs: + self.assertIsInstance(dev, tpool.Proxy) + + def test_tpool_list_pci_devices(self): + self._add_fake_host_devices() + devs = self.host.list_pci_devices() + self.assertEqual(8, len(devs)) + for dev in devs: + self.assertIsInstance(dev, tpool.Proxy) + + def test_tpool_list_mdev_capable_devices(self): + self._add_fake_host_devices() + devs = self.host.list_mdev_capable_devices() + self.assertEqual(3, len(devs)) + for dev in devs: + self.assertIsInstance(dev, tpool.Proxy) + + def test_tpool_list_mediated_devices(self): + self._add_fake_host_devices() + devs = self.host.list_mediated_devices() + self.assertEqual(1, len(devs)) + for dev in devs: + self.assertIsInstance(dev, tpool.Proxy) + class LoadersTestCase(test.NoDBTestCase): diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py index 7f6ae47b44d2..73ceffdb10ac 100644 --- a/nova/virt/libvirt/host.py +++ b/nova/virt/libvirt/host.py @@ -1647,7 +1647,9 @@ class Host(object): :returns: a list of virNodeDevice instance """ try: - return self.get_connection().listDevices(cap, flags) + devs = [self._wrap_libvirt_proxy(dev) + for dev in self.get_connection().listDevices(cap, flags)] + return devs except libvirt.libvirtError as ex: error_code = ex.get_error_code() if error_code == libvirt.VIR_ERR_NO_SUPPORT: @@ -1667,7 +1669,10 @@ class Host(object): :returns: a list of virNodeDevice instances. """ try: - return self.get_connection().listAllDevices(flags) or [] + alldevs = [ + self._wrap_libvirt_proxy(dev) + for dev in self.get_connection().listAllDevices(flags)] or [] + return alldevs except libvirt.libvirtError as ex: LOG.warning(ex) return [] diff --git a/releasenotes/notes/unproxied-libvirt-list-devices-7cd218c1a33535c9.yaml b/releasenotes/notes/unproxied-libvirt-list-devices-7cd218c1a33535c9.yaml new file mode 100644 index 000000000000..eaf7b9b1ca82 --- /dev/null +++ b/releasenotes/notes/unproxied-libvirt-list-devices-7cd218c1a33535c9.yaml @@ -0,0 +1,11 @@ +fixes: + - | + `Bug #2091033`_: Fixed calls to libvirt ``listDevices()`` and + ``listAllDevices()`` from potentially blocking all other greenthreads + in ``nova-compute``. Under certain circumstances, it was possible for + the ``nova-compute`` service to freeze with all other greenthreads + blocked and unable to perform any other activities including logging. + This issue has been fixed by wrapping the libvirt ``listDevices()`` + and ``listAllDevices()`` calls with ``eventlet.tpool.Proxy``. + + .. _Bug #2091033: https://bugs.launchpad.net/nova/+bug/2091033