pci: make sure device relationships are kept in memory

`pci_devs` attribute of PciDevTracker class is the in-memory
"master copy" of all
devices on each compute host, and all data changes that happen when
claiming/allocating/freeing
devices HAVE TO be made against instances contained in `pci_devs`
list, because they are periodically flushed to the DB when the save()
method is called.

Due to this we need to make sure all the relationships are available to
the code using them (claiming/allocation/freeing methods).

We do this by simply keeping a tree structure by referencing
parent/children from objects themselves. This is done on every update of
the state of PCI devices (on compute service start up, and on every
resource tracker pass), so that this information is always as up to date
as the in memory view of devices.

This change adds the code to build up the tree, and subsequent changes
will make sure the newly added relationships are used when needed. We
also add 2 non-versioned fields added to PciDevice object to hold the
references.

Co-Authored-By: Sahid Ferdjaoui <sahid.ferdjaoui@redhat.com>

Change-Id: Id6868b7839efb2cd53f5f7aaac2c55d169356ce4
Partial-bug: #1565785
This commit is contained in:
Nikola Dipanov 2016-04-05 18:09:53 +01:00 committed by Sahid Orentino Ferdjaoui
parent 428603ec93
commit c469b8466f
3 changed files with 173 additions and 5 deletions

View File

@ -177,6 +177,11 @@ class PciDevice(base.NovaPersistentObject, base.NovaObject):
super(PciDevice, self).__init__(*args, **kwargs)
self.obj_reset_changes()
self.extra_info = {}
# NOTE(ndipanov): These are required to build an in-memory device tree
# but don't need to be proper fields (and can't easily be as they would
# hold circular references)
self.parent_device = None
self.child_devices = []
def __eq__(self, other):
return compare_pci_device_attributes(self, other)

View File

@ -63,6 +63,7 @@ class PciDevTracker(object):
context, node_id)
else:
self.pci_devs = objects.PciDeviceList(objects=[])
self._build_device_tree(self.pci_devs)
self._initial_instance_usage()
def _initial_instance_usage(self):
@ -117,6 +118,42 @@ class PciDevTracker(object):
devices.append(dev)
self._set_hvdevs(devices)
@staticmethod
def _build_device_tree(all_devs):
"""Build a tree of devices that represents parent-child relationships.
We need to have the relationships set up so that we can easily make
all the necessary changes to parent/child devices without having to
figure it out at each call site.
This method just adds references to relevant instances already found
in `pci_devs` to `child_devices` and `parent_device` fields of each
one.
Currently relationships are considered for SR-IOV PFs/VFs only.
"""
# Ensures that devices are ordered in ASC so VFs will come
# after their PFs.
all_devs.sort(key=lambda x: x.address)
parents = {}
for dev in all_devs:
if dev.status in (fields.PciDeviceStatus.REMOVED,
fields.PciDeviceStatus.DELETED):
# NOTE(ndipanov): Removed devs are pruned from
# self.pci_devs on save() so we need to make sure we
# are not looking at removed ones as we may build up
# the tree sooner than they are pruned.
continue
if dev.dev_type == fields.PciDeviceType.SRIOV_PF:
dev.child_devices = []
parents[dev.address] = dev
elif dev.dev_type == fields.PciDeviceType.SRIOV_VF:
dev.parent_device = parents.get(dev.parent_addr)
if dev.parent_device:
parents[dev.parent_addr].child_devices.append(dev)
def _set_hvdevs(self, devices):
exist_addrs = set([dev.address for dev in self.pci_devs])
new_addrs = set([dev['address'] for dev in devices])
@ -169,6 +206,8 @@ class PciDevTracker(object):
self.pci_devs.objects.append(dev_obj)
self.stats.add_device(dev_obj)
self._build_device_tree(self.pci_devs)
def _claim_instance(self, context, pci_requests, instance_numa_topology):
instance_cells = None
if instance_numa_topology:

View File

@ -43,6 +43,17 @@ fake_pci_1 = dict(fake_pci, address='0000:00:00.2',
product_id='p1', vendor_id='v1')
fake_pci_2 = dict(fake_pci, address='0000:00:00.3')
fake_pci_3 = dict(fake_pci, address='0000:00:01.1',
dev_type=fields.PciDeviceType.SRIOV_PF,
vendor_id='v2', product_id='p2', numa_node=None)
fake_pci_4 = dict(fake_pci, address='0000:00:02.1',
dev_type=fields.PciDeviceType.SRIOV_VF,
parent_addr='0000:00:01.1',
vendor_id='v2', product_id='p2', numa_node=None)
fake_pci_5 = dict(fake_pci, address='0000:00:02.2',
dev_type=fields.PciDeviceType.SRIOV_VF,
parent_addr='0000:00:01.1',
vendor_id='v2', product_id='p2', numa_node=None)
fake_db_dev = {
'created_at': None,
@ -72,6 +83,18 @@ fake_db_dev_2 = dict(fake_db_dev, id=3, address='0000:00:00.3',
numa_node=None, parent_addr='0000:00:00.1')
fake_db_devs = [fake_db_dev, fake_db_dev_1, fake_db_dev_2]
fake_db_dev_3 = dict(fake_db_dev, id=4, address='0000:00:01.1',
vendor_id='v2', product_id='p2',
numa_node=None, dev_type=fields.PciDeviceType.SRIOV_PF)
fake_db_dev_4 = dict(fake_db_dev, id=5, address='0000:00:02.1',
numa_node=None, dev_type=fields.PciDeviceType.SRIOV_VF,
vendor_id='v2', product_id='p2',
parent_addr='0000:00:01.1')
fake_db_dev_5 = dict(fake_db_dev, id=6, address='0000:00:02.2',
numa_node=None, dev_type=fields.PciDeviceType.SRIOV_VF,
vendor_id='v2', product_id='p2',
parent_addr='0000:00:01.1')
fake_db_devs_tree = [fake_db_dev_3, fake_db_dev_4, fake_db_dev_5]
fake_pci_requests = [
{'count': 1,
@ -90,7 +113,7 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
self.inst.numa_topology = None
def _fake_get_pci_devices(self, ctxt, node_id):
return fake_db_devs[:]
return self.fake_devs
def _fake_pci_device_update(self, ctxt, node_id, address, value):
self.update_called += 1
@ -113,9 +136,14 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
instance_uuid=instance_uuid,
requests=pci_reqs)
def _create_tracker(self, fake_devs):
self.fake_devs = fake_devs
self.tracker = manager.PciDevTracker(self.fake_context, 1)
def setUp(self):
super(PciDevTrackerTestCase, self).setUp()
self.fake_context = context.get_admin_context()
self.fake_devs = fake_db_devs[:]
self.stub_out('nova.db.pci_device_get_all_by_node',
self._fake_get_pci_devices)
# The fake_pci_whitelist must be called before creating the fake
@ -123,7 +151,7 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
patcher = pci_fakes.fake_pci_whitelist()
self.addCleanup(patcher.stop)
self._create_fake_instance()
self.tracker = manager.PciDevTracker(self.fake_context, 1)
self._create_tracker(fake_db_devs[:])
def test_pcidev_tracker_create(self):
self.assertEqual(len(self.tracker.pci_devs), 3)
@ -132,6 +160,55 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
self.assertEqual(self.tracker.stale.keys(), [])
self.assertEqual(len(self.tracker.stats.pools), 3)
self.assertEqual(self.tracker.node_id, 1)
for dev in self.tracker.pci_devs:
self.assertIsNone(dev.parent_device)
self.assertEqual(dev.child_devices, [])
def test_pcidev_tracker_create_device_tree(self):
self._create_tracker(fake_db_devs_tree)
self.assertEqual(len(self.tracker.pci_devs), 3)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 3)
self.assertEqual(self.tracker.stale.keys(), [])
self.assertEqual(len(self.tracker.stats.pools), 2)
self.assertEqual(self.tracker.node_id, 1)
pf = [dev for dev in self.tracker.pci_devs
if dev.dev_type == fields.PciDeviceType.SRIOV_PF].pop()
vfs = [dev for dev in self.tracker.pci_devs
if dev.dev_type == fields.PciDeviceType.SRIOV_VF]
self.assertEqual(2, len(vfs))
# Assert we build the device tree correctly
self.assertEqual(vfs, pf.child_devices)
for vf in vfs:
self.assertEqual(vf.parent_device, pf)
def test_pcidev_tracker_create_device_tree_pf_only(self):
self._create_tracker([fake_db_dev_3])
self.assertEqual(len(self.tracker.pci_devs), 1)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 1)
self.assertEqual(self.tracker.stale.keys(), [])
self.assertEqual(len(self.tracker.stats.pools), 1)
self.assertEqual(self.tracker.node_id, 1)
pf = self.tracker.pci_devs[0]
self.assertIsNone(pf.parent_device)
self.assertEqual([], pf.child_devices)
def test_pcidev_tracker_create_device_tree_vf_only(self):
self._create_tracker([fake_db_dev_4])
self.assertEqual(len(self.tracker.pci_devs), 1)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 1)
self.assertEqual(self.tracker.stale.keys(), [])
self.assertEqual(len(self.tracker.stats.pools), 1)
self.assertEqual(self.tracker.node_id, 1)
vf = self.tracker.pci_devs[0]
self.assertIsNone(vf.parent_device)
self.assertEqual([], vf.child_devices)
@mock.patch.object(nova.objects.PciDeviceList, 'get_by_compute_node')
def test_pcidev_tracker_create_no_nodeid(self, mock_get_cn):
@ -167,6 +244,30 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
dev in self.tracker.pci_devs]),
set(['v', 'v1', 'v2']))
def test_set_hvdev_new_dev_tree_maintained(self):
# Make sure the device tree is properly maintained when there are new
# devices reported by the driver
self._create_tracker(fake_db_devs_tree)
fake_new_device = dict(fake_pci_5, id=12, address='0000:00:02.3')
fake_pci_devs = [copy.deepcopy(fake_pci_3),
copy.deepcopy(fake_pci_4),
copy.deepcopy(fake_pci_5),
copy.deepcopy(fake_new_device)]
self.tracker._set_hvdevs(fake_pci_devs)
self.assertEqual(len(self.tracker.pci_devs), 4)
pf = [dev for dev in self.tracker.pci_devs
if dev.dev_type == fields.PciDeviceType.SRIOV_PF].pop()
vfs = [dev for dev in self.tracker.pci_devs
if dev.dev_type == fields.PciDeviceType.SRIOV_VF]
self.assertEqual(3, len(vfs))
# Assert we build the device tree correctly
self.assertEqual(vfs, pf.child_devices)
for vf in vfs:
self.assertEqual(vf.parent_device, pf)
def test_set_hvdev_changed(self):
fake_pci_v2 = dict(fake_pci, address='0000:00:00.2', vendor_id='v1')
fake_pci_devs = [copy.deepcopy(fake_pci), copy.deepcopy(fake_pci_2),
@ -178,9 +279,32 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
def test_set_hvdev_remove(self):
self.tracker._set_hvdevs([fake_pci])
self.assertEqual(len([dev for dev in self.tracker.pci_devs
if dev.status == 'removed']),
2)
self.assertEqual(
len([dev for dev in self.tracker.pci_devs
if dev.status == fields.PciDeviceStatus.REMOVED]),
2)
def test_set_hvdev_remove_tree_maintained(self):
# Make sure the device tree is properly maintained when there are
# devices removed from the system (not reported by the driver but known
# from previous scans)
self._create_tracker(fake_db_devs_tree)
fake_pci_devs = [copy.deepcopy(fake_pci_3), copy.deepcopy(fake_pci_4)]
self.tracker._set_hvdevs(fake_pci_devs)
self.assertEqual(
2,
len([dev for dev in self.tracker.pci_devs
if dev.status != fields.PciDeviceStatus.REMOVED]))
pf = [dev for dev in self.tracker.pci_devs
if dev.dev_type == fields.PciDeviceType.SRIOV_PF].pop()
vfs = [dev for dev in self.tracker.pci_devs
if (dev.dev_type == fields.PciDeviceType.SRIOV_VF and
dev.status != fields.PciDeviceStatus.REMOVED)]
self.assertEqual(1, len(vfs))
self.assertEqual(vfs, pf.child_devices)
self.assertEqual(vfs[0].parent_device, pf)
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance')
def test_set_hvdev_changed_stal(self, mock_get):