Fix PCI passthrough cleanup on reschedule
The resource tracker Claim object works on a copy of the instance object
got from the compute manager. But the PCI claim logic does not use the
copy but use the original instance object. However the abort claim logic
including the abort PCI claim logic worked on the copy only. Therefore the
claimed PCI devices are visible to the compute manager in the
instance.pci_decives list even after the claim is aborted.
There was another bug in the PCIDevice object where the instance object
wasn't passed to the free() function and therefore the
instance.pci_devices list wasn't updated when the device was freed.
Closes-Bug: #1860555
Change-Id: Iff343d4d78996cd17a6a584fefa7071c81311673
(cherry picked from commit f8b98390dc
)
This commit is contained in:
@@ -18,7 +18,9 @@
|
||||
"ip_addresses": [],
|
||||
"launched_at": null,
|
||||
"power_state": "pending",
|
||||
"state": "building"
|
||||
"state": "building",
|
||||
"host": null,
|
||||
"node": null
|
||||
}
|
||||
},
|
||||
"priority":"ERROR",
|
||||
|
@@ -64,6 +64,7 @@ class Claim(NopClaim):
|
||||
super().__init__(migration=migration)
|
||||
# Stash a copy of the instance at the current point of time
|
||||
self.instance = instance.obj_clone()
|
||||
self.instance_ref = instance
|
||||
self.nodename = nodename
|
||||
self.tracker = tracker
|
||||
self._pci_requests = pci_requests
|
||||
@@ -82,7 +83,7 @@ class Claim(NopClaim):
|
||||
been aborted.
|
||||
"""
|
||||
LOG.debug("Aborting claim: %s", self, instance=self.instance)
|
||||
self.tracker.abort_instance_claim(self.context, self.instance,
|
||||
self.tracker.abort_instance_claim(self.context, self.instance_ref,
|
||||
self.nodename)
|
||||
|
||||
def _claim_test(self, compute_node, limits=None):
|
||||
|
@@ -407,7 +407,7 @@ class PciDevTracker(object):
|
||||
for dev in self.pci_devs:
|
||||
if (dev.status == fields.PciDeviceStatus.ALLOCATED and
|
||||
dev.instance_uuid == instance['uuid']):
|
||||
self._free_device(dev)
|
||||
self._free_device(dev, instance)
|
||||
|
||||
def free_instance_claims(
|
||||
self, context: ctx.RequestContext, instance: 'objects.Instance',
|
||||
@@ -423,7 +423,7 @@ class PciDevTracker(object):
|
||||
for dev in self.pci_devs:
|
||||
if (dev.status == fields.PciDeviceStatus.CLAIMED and
|
||||
dev.instance_uuid == instance['uuid']):
|
||||
self._free_device(dev)
|
||||
self._free_device(dev, instance)
|
||||
|
||||
def free_instance(
|
||||
self, context: ctx.RequestContext, instance: 'objects.Instance',
|
||||
|
@@ -3042,11 +3042,8 @@ class PCIResourceRequestReschedulingTest(_PCIServersTestBase):
|
||||
|
||||
def validate_group_policy(manager, instance, *args, **kwargs):
|
||||
nonlocal validate_group_policy_called
|
||||
if validate_group_policy_called:
|
||||
# FIXME(johngarbutt): This is bug 1860555, it should be 1.
|
||||
self.assertEqual(2, len(instance.pci_devices))
|
||||
else:
|
||||
self.assertEqual(1, len(instance.pci_devices))
|
||||
self.assertEqual(1, len(instance.pci_devices))
|
||||
if not validate_group_policy_called:
|
||||
validate_group_policy_called = True
|
||||
raise exception.RescheduledException(
|
||||
instance_uuid='fake-uuid',
|
||||
|
@@ -596,14 +596,6 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
|
||||
# This is before we've failed.
|
||||
self.assertEqual(task_states.SPAWNING, instance.task_state)
|
||||
tracking['last_state'] = instance.task_state
|
||||
elif tracking['last_state'] == task_states.SPAWNING:
|
||||
# This is after we've failed.
|
||||
self.assertIsNone(instance.host)
|
||||
self.assertIsNone(instance.node)
|
||||
self.assertIsNone(instance.task_state)
|
||||
tracking['last_state'] = instance.task_state
|
||||
else:
|
||||
self.fail('Unexpected save!')
|
||||
|
||||
with mock.patch.object(instance, 'save') as mock_save:
|
||||
mock_save.side_effect = check_save
|
||||
@@ -614,6 +606,10 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
|
||||
request_spec=objects.RequestSpec(),
|
||||
accel_uuids=[])
|
||||
|
||||
self.assertIsNone(instance.host)
|
||||
self.assertIsNone(instance.node)
|
||||
self.assertIsNone(instance.task_state)
|
||||
|
||||
mock_notify_instance_action.assert_called_once_with(
|
||||
self.context, instance, 'fake-mini', action='unshelve',
|
||||
phase='start', bdms=mock_bdms)
|
||||
|
@@ -528,7 +528,9 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
|
||||
# Ensure that the claim actually fixes the inconsistency so when the
|
||||
# parent if freed the children become available too.
|
||||
self.tracker.free_instance(
|
||||
mock.sentinel.context, {'uuid': uuidsentinel.instance1})
|
||||
mock.sentinel.context,
|
||||
{'uuid': uuidsentinel.instance1,
|
||||
'pci_devices': [objects.PciDevice(id=pf['id'])]})
|
||||
|
||||
pf_dev = self._get_device_by_address(pf['address'])
|
||||
self.assertEqual('available', pf_dev.status)
|
||||
@@ -586,7 +588,9 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
|
||||
# Ensure that the claim actually fixes the inconsistency so when the
|
||||
# parent if freed the children become available too.
|
||||
self.tracker.free_instance(
|
||||
mock.sentinel.context, {'uuid': uuidsentinel.instance1})
|
||||
mock.sentinel.context,
|
||||
{'uuid': uuidsentinel.instance1,
|
||||
'pci_devices': [objects.PciDevice(id=pf['id'])]})
|
||||
|
||||
pf_dev = self._get_device_by_address(pf['address'])
|
||||
self.assertEqual('available', pf_dev.status)
|
||||
|
Reference in New Issue
Block a user