From 9ee5d2c66255f83cc8a66f1b5648fa13e1d73f47 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Tue, 19 Apr 2022 17:27:31 +0200 Subject: [PATCH] Simulate bug 1969496 As If9ab424cc7375a1f0d41b03f01c4a823216b3eb8 stated there is a way for the pci_device table to become inconsistent. Parent PF can be in 'available' state while children VFs are still in 'unavailable' state. In this situation the PF is schedulable but the PCI claim will fail to when try to mark the dependent VFs unavailable. This patch adds a test case that shows the error. Related-Bug: #1969496 Change-Id: I7b432d7a32aeb1ab765d1f731691c7841a8f1440 --- nova/tests/unit/pci/test_manager.py | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/nova/tests/unit/pci/test_manager.py b/nova/tests/unit/pci/test_manager.py index bbc8f7c56192..a924d71ef400 100644 --- a/nova/tests/unit/pci/test_manager.py +++ b/nova/tests/unit/pci/test_manager.py @@ -21,6 +21,7 @@ from oslo_utils.fixture import uuidsentinel from nova.compute import vm_states from nova import context +from nova import exception from nova import objects from nova.objects import fields from nova.pci import manager @@ -478,6 +479,61 @@ class PciDevTrackerTestCase(test.NoDBTestCase): self.assertEqual('allocated', dev4_vf.status) self.assertEqual(uuidsentinel.instance1, dev4_vf.instance_uuid) + def test_claim_available_pf_while_child_vf_is_unavailable(self): + # NOTE(gibi): this is bug 1969496. The state created here is + # inconsistent and should not happen. But it did happen in some cases + # where we were not able to track down the way how it happened. + + # We start with a PF parent and a VF child. The PF is available and + # the VF is unavailable. + pf = copy.deepcopy(fake_db_dev_3) + vf = copy.deepcopy(fake_db_dev_4) + vf['status'] = fields.PciDeviceStatus.UNAVAILABLE + self._create_tracker([pf, vf]) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf_dev = self._get_device_by_address(vf['address']) + self.assertEqual('unavailable', vf_dev.status) + + pci_requests_obj = self._create_pci_requests_object( + [ + { + 'count': 1, + 'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}] + } + ], + instance_uuid=uuidsentinel.instance1, + ) + # now try to claim and allocate the PF. It should work as it is + # available + # This is bug 1969496 as the claim fails with exception + ex = self.assertRaises( + exception.PciDevicePoolEmpty, + self.tracker.claim_instance, + mock.sentinel.context, + pci_requests_obj, + None + ) + self.assertIn( + 'Attempt to consume PCI device 1:0000:00:02.1 from empty pool', + str(ex) + ) + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf_dev = self._get_device_by_address(vf['address']) + self.assertEqual('unavailable', vf_dev.status) + + # This should work when the bug is fixed + # self.tracker.claim_instance( + # mock.sentinel.context, pci_requests_obj, None) + # self.tracker.allocate_instance({'uuid': uuidsentinel.instance1}) + + # pf_dev = self._get_device_by_address(pf['address']) + # self.assertEqual('allocated', pf_dev.status) + # vf_dev = self._get_device_by_address(vf['address']) + # self.assertEqual('unavailable', vf_dev.status) + def test_update_pci_for_instance_active(self): pci_requests_obj = self._create_pci_requests_object(fake_pci_requests) self.tracker.claim_instance(mock.sentinel.context,