Merge "PCI: do not force remove allocated devices" into stable/queens
This commit is contained in:
commit
09456e656a
|
@ -163,23 +163,47 @@ class PciDevTracker(object):
|
|||
|
||||
for existed in self.pci_devs:
|
||||
if existed.address in exist_addrs - new_addrs:
|
||||
# Remove previously tracked PCI devices that are either
|
||||
# no longer reported by the hypervisor or have been removed
|
||||
# from the pci whitelist.
|
||||
try:
|
||||
existed.remove()
|
||||
except exception.PciDeviceInvalidStatus as e:
|
||||
LOG.warning("Trying to remove device with %(status)s "
|
||||
LOG.warning("Unable to remove device with %(status)s "
|
||||
"ownership %(instance_uuid)s because of "
|
||||
"%(pci_exception)s",
|
||||
"%(pci_exception)s. "
|
||||
"Check your [pci]passthrough_whitelist "
|
||||
"configuration to make sure this allocated "
|
||||
"device is whitelisted. If you have removed "
|
||||
"the device from the whitelist intentionally "
|
||||
"or the device is no longer available on the "
|
||||
"host you will need to delete the server or "
|
||||
"migrate it to another host to silence this "
|
||||
"warning.",
|
||||
{'status': existed.status,
|
||||
'instance_uuid': existed.instance_uuid,
|
||||
'pci_exception': e.format_message()})
|
||||
# Note(yjiang5): remove the device by force so that
|
||||
# db entry is cleaned in next sync.
|
||||
existed.status = fields.PciDeviceStatus.REMOVED
|
||||
# NOTE(sean-k-mooney): the device may not be tracked for
|
||||
# two reasons: first the device could have been removed
|
||||
# from the host or second the whitelist could have been
|
||||
# updated. While force removing may seam reasonable, if
|
||||
# the device is allocated to a vm, force removing the
|
||||
# device entry from the resource tracker can prevent the vm
|
||||
# from rebooting. If the PCI device was removed due to an
|
||||
# update to the PCI whitelist which was later reverted,
|
||||
# removing the entry from the database and adding it back
|
||||
# later may lead to the scheduler incorrectly selecting
|
||||
# this host and the ResourceTracker assigning the PCI
|
||||
# device to a second vm. To prevent this bug we skip
|
||||
# deleting the device from the db in this iteration and
|
||||
# will try again on the next sync.
|
||||
continue
|
||||
else:
|
||||
# Note(yjiang5): no need to update stats if an assigned
|
||||
# device is hot removed.
|
||||
self.stats.remove_device(existed)
|
||||
else:
|
||||
# Update tracked devices.
|
||||
new_value = next((dev for dev in devices if
|
||||
dev['address'] == existed.address))
|
||||
new_value['compute_node_id'] = self.node_id
|
||||
|
@ -202,6 +226,7 @@ class PciDevTracker(object):
|
|||
else:
|
||||
existed.update_device(new_value)
|
||||
|
||||
# Track newly discovered devices.
|
||||
for dev in [dev for dev in devices if
|
||||
dev['address'] in new_addrs - exist_addrs]:
|
||||
dev['compute_node_id'] = self.node_id
|
||||
|
|
|
@ -312,6 +312,53 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
|
|||
self.assertEqual(vfs, pf.child_devices)
|
||||
self.assertEqual(vfs[0].parent_device, pf)
|
||||
|
||||
def test_set_hvdev_remove_tree_maintained_with_allocations(self):
|
||||
# Make sure the device tree is properly maintained when there are
|
||||
# devices removed from the system that are allocated to vms.
|
||||
|
||||
all_devs = fake_db_devs_tree[:]
|
||||
self._create_tracker(all_devs)
|
||||
# we start with 3 devices
|
||||
self.assertEqual(
|
||||
3,
|
||||
len([dev for dev in self.tracker.pci_devs
|
||||
if dev.status != fields.PciDeviceStatus.REMOVED]))
|
||||
# we then allocate one device
|
||||
pci_requests_obj = self._create_pci_requests_object(
|
||||
[{'count': 1, 'spec': [{'vendor_id': 'v2'}]}])
|
||||
# NOTE(sean-k-mooney): context, pci request, numa topology
|
||||
claimed_dev = self.tracker.claim_instance(
|
||||
mock.sentinel.context, pci_requests_obj, None)[0]
|
||||
|
||||
self.tracker._set_hvdevs(all_devs)
|
||||
# and assert that no devices were removed
|
||||
self.assertEqual(
|
||||
0,
|
||||
len([dev for dev in self.tracker.pci_devs
|
||||
if dev.status == fields.PciDeviceStatus.REMOVED]))
|
||||
# we then try to remove the allocated device from the set reported
|
||||
# by the driver.
|
||||
fake_pci_devs = [dev for dev in all_devs
|
||||
if dev['address'] != claimed_dev.address]
|
||||
with mock.patch("nova.pci.manager.LOG.warning") as log:
|
||||
self.tracker._set_hvdevs(fake_pci_devs)
|
||||
log.assert_called_once()
|
||||
args = log.call_args_list[0][0] # args of first call
|
||||
self.assertIn('Unable to remove device with', args[0])
|
||||
# and assert no devices are removed from the tracker
|
||||
self.assertEqual(
|
||||
0,
|
||||
len([dev for dev in self.tracker.pci_devs
|
||||
if dev.status == fields.PciDeviceStatus.REMOVED]))
|
||||
# free the device that was allocated and update tracker again
|
||||
self.tracker._free_device(claimed_dev)
|
||||
self.tracker._set_hvdevs(fake_pci_devs)
|
||||
# and assert that one device is removed from the tracker
|
||||
self.assertEqual(
|
||||
1,
|
||||
len([dev for dev in self.tracker.pci_devs
|
||||
if dev.status == fields.PciDeviceStatus.REMOVED]))
|
||||
|
||||
def test_set_hvdev_changed_stal(self):
|
||||
pci_requests_obj = self._create_pci_requests_object(
|
||||
[{'count': 1, 'spec': [{'vendor_id': 'v1'}]}])
|
||||
|
|
Loading…
Reference in New Issue