Remove unavailable but not reported PCI devices at startup

We saw in the field that the pci_devices table can end up in
inconsistent state after a compute node HW failure and re-deployment.
There could be dependent devices where the parent PF is in available
state while the children VFs are in unavailable state. (Before the HW
fault the PF was allocated hence the VFs was marked unavailable).

In this state this PF is still schedulable but during the
PCI claim the handling of dependent devices in the PCI tracker fill fail
with the error: "Attempt to consume PCI device XXX from empty pool".

The reason of the failure is that when the PF is claimed, all the
children VFs are marked unavailable. But if the VF is already
unavailable such step fails.

One way the deployer might try to recover from this state is to remove
the VFs from the hypervisor and restart the compute agent. The compute
startup already has a logic to delete PCI devices that are unused and
not reported by the hypervisor. However this logic only removed devices
in 'available' state and ignored devices in 'unavailable' state.

If a device is unused and the hypervisor is not reporting the device any
more then it is safe to delete that device from the PCI tracker. So this
patch extends the logic to allow deleting 'unavailable' devices. There
is a small window when dependent PCI device is in 'unclaimable' state.
From cleanup perspective this is an analogous state. So it is also
added to the cleanup logic.

Related-Bug: #1969496
Change-Id: If9ab424cc7375a1f0d41b03f01c4a823216b3eb8
This commit is contained in:
Balazs Gibizer 2022-04-19 17:50:34 +02:00
parent c58376db75
commit 284ea72e96
4 changed files with 123 additions and 7 deletions

View File

@ -447,11 +447,30 @@ class PciDevice(base.NovaPersistentObject, base.NovaObject):
instance.pci_devices.objects.append(copy.copy(self))
def remove(self):
if self.status != fields.PciDeviceStatus.AVAILABLE:
# We allow removal of a device is if it is unused. It can be unused
# either by being in available state or being in a state that shows
# that the parent or child device blocks the consumption of this device
expected_states = [
fields.PciDeviceStatus.AVAILABLE,
fields.PciDeviceStatus.UNAVAILABLE,
fields.PciDeviceStatus.UNCLAIMABLE,
]
if self.status not in expected_states:
raise exception.PciDeviceInvalidStatus(
compute_node_id=self.compute_node_id,
address=self.address, status=self.status,
hopestatus=[fields.PciDeviceStatus.AVAILABLE])
hopestatus=expected_states)
# Just to be on the safe side, do not allow removal of device that has
# an owner even if the state of the device suggests that it is not
# owned.
if 'instance_uuid' in self and self.instance_uuid is not None:
raise exception.PciDeviceInvalidOwner(
compute_node_id=self.compute_node_id,
address=self.address,
owner=self.instance_uuid,
hopeowner=None,
)
self.status = fields.PciDeviceStatus.REMOVED
self.instance_uuid = None
self.request_id = None

View File

@ -217,10 +217,13 @@ class PciDevTracker(object):
# from the pci whitelist.
try:
existed.remove()
except exception.PciDeviceInvalidStatus as e:
LOG.warning("Unable to remove device with %(status)s "
"ownership %(instance_uuid)s because of "
"%(pci_exception)s. "
except (
exception.PciDeviceInvalidStatus,
exception.PciDeviceInvalidOwner,
) as e:
LOG.warning("Unable to remove device with status "
"'%(status)s' and ownership %(instance_uuid)s "
"because of %(pci_exception)s. "
"Check your [pci]passthrough_whitelist "
"configuration to make sure this allocated "
"device is whitelisted. If you have removed "
@ -250,7 +253,10 @@ class PciDevTracker(object):
else:
# Note(yjiang5): no need to update stats if an assigned
# device is hot removed.
self.stats.remove_device(existed)
# NOTE(gibi): only remove the device from the pools if it
# is not already removed
if existed in self.stats.get_free_devs():
self.stats.remove_device(existed)
else:
# Update tracked devices.
new_value: ty.Dict[str, ty.Any]

View File

@ -467,6 +467,16 @@ class _TestPciDeviceObject(object):
devobj.claim(self.inst.uuid)
self.assertRaises(exception.PciDeviceInvalidStatus, devobj.remove)
def test_remove_device_fail_owned_with_unavailable_state(self):
# This test creates an PCI device in an invalid state. This should
# not happen in any known scenario. But we want to be save not to allow
# removing a device that has an owner. See bug 1969496 for more details
self._create_fake_instance()
devobj = pci_device.PciDevice.create(None, dev_dict)
devobj.claim(self.inst.uuid)
devobj.status = fields.PciDeviceStatus.UNAVAILABLE
self.assertRaises(exception.PciDeviceInvalidOwner, devobj.remove)
class TestPciDeviceObject(test_objects._LocalTest,
_TestPciDeviceObject):

View File

@ -397,6 +397,87 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
self.assertEqual(len(self.tracker.stale), 1)
self.assertEqual(self.tracker.stale['0000:00:00.2']['vendor_id'], 'v2')
def _get_device_by_address(self, address):
devs = [dev for dev in self.tracker.pci_devs if dev.address == address]
if len(devs) == 1:
return devs[0]
if devs:
raise ValueError('ambiguous address', devs)
else:
raise ValueError('device not found', address)
def test_set_hvdevs_unavailable_vf_removed(self):
# We start with a PF parent and two VF children
self._create_tracker([fake_db_dev_3, fake_db_dev_4, fake_db_dev_5])
pci_requests_obj = self._create_pci_requests_object(
[
{
'count': 1,
'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}]
}
],
instance_uuid=uuidsentinel.instance1,
)
# then claim and allocate the PF that makes the VFs unavailable
self.tracker.claim_instance(
mock.sentinel.context, pci_requests_obj, None)
self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
dev3_pf = self._get_device_by_address(fake_db_dev_3['address'])
self.assertEqual('allocated', dev3_pf.status)
self.assertEqual(uuidsentinel.instance1, dev3_pf.instance_uuid)
dev4_vf = self._get_device_by_address(fake_db_dev_4['address'])
self.assertEqual('unavailable', dev4_vf.status)
dev5_vf = self._get_device_by_address(fake_db_dev_5['address'])
self.assertEqual('unavailable', dev5_vf.status)
# now simulate that one VF (dev_5) is removed from the hypervisor and
# the compute is restarted. As the VF is not claimed or allocated we
# are free to remove it from the tracker.
self.tracker._set_hvdevs(copy.deepcopy([fake_pci_3, fake_pci_4]))
dev3_pf = self._get_device_by_address(fake_db_dev_3['address'])
self.assertEqual('allocated', dev3_pf.status)
self.assertEqual(uuidsentinel.instance1, dev3_pf.instance_uuid)
dev4_vf = self._get_device_by_address(fake_db_dev_4['address'])
self.assertEqual('unavailable', dev4_vf.status)
dev5_vf = self._get_device_by_address(fake_db_dev_5['address'])
self.assertEqual('removed', dev5_vf.status)
def test_set_hvdevs_unavailable_pf_removed(self):
# We start with one PF parent and one child VF
self._create_tracker([fake_db_dev_3, fake_db_dev_4])
pci_requests_obj = self._create_pci_requests_object(
[
{
'count': 1,
'spec': [{'dev_type': fields.PciDeviceType.SRIOV_VF}]
}
],
instance_uuid=uuidsentinel.instance1,
)
# Then we claim and allocate the VF that makes the PF unavailable
self.tracker.claim_instance(
mock.sentinel.context, pci_requests_obj, None)
self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
dev3_pf = self._get_device_by_address(fake_db_dev_3['address'])
self.assertEqual('unavailable', dev3_pf.status)
dev4_vf = self._get_device_by_address(fake_db_dev_4['address'])
self.assertEqual('allocated', dev4_vf.status)
self.assertEqual(uuidsentinel.instance1, dev4_vf.instance_uuid)
# now simulate that the parent PF is removed from the hypervisor and
# the compute is restarted. As the PF is not claimed or allocated we
# are free to remove it from the tracker.
self.tracker._set_hvdevs(copy.deepcopy([fake_pci_4]))
dev3_pf = self._get_device_by_address(fake_db_dev_3['address'])
self.assertEqual('removed', dev3_pf.status)
dev4_vf = self._get_device_by_address(fake_db_dev_4['address'])
self.assertEqual('allocated', dev4_vf.status)
self.assertEqual(uuidsentinel.instance1, dev4_vf.instance_uuid)
def test_update_pci_for_instance_active(self):
pci_requests_obj = self._create_pci_requests_object(fake_pci_requests)
self.tracker.claim_instance(mock.sentinel.context,