From e96601c606059c862e8066f91b49093f98ad46d2 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Fri, 19 Aug 2022 15:32:00 +0200 Subject: [PATCH] Map PCI pools to RP UUIDs Nova's PCI scheduling (and the PCI claim) works based on PCI device pools where the similar available PCI devices are assigned. The PCI devices are now represented in placement as RPs. And the allocation candidates during scheduling and the allocation after scheduling now contain PCI devices. This information needs to affect the PCI scheduling and PCI claim. To be able to do that we need to map PCI device pools to RPs. We achieve that here by first mapping PciDevice objects to RPs during placement PCI inventory reporting. Then mapping pools to RPs based on the PCI devices assigned to the pools. Also because now ResourceTracker._update_to_placement() call updates the PCI device pools the sequence of events needed to changed in the ResourceTracker to: 1) run _update_to_placement() 2) copy the pools to the CompouteNode object 3) save the compute to the DB 4) save the PCI tracker blueprint: pci-device-tracking-in-placement Change-Id: I9bb450ac235ab72ff0d8078635e7a11c04ff6c1e --- nova/compute/pci_placement_translator.py | 11 ++ nova/compute/resource_tracker.py | 16 ++- nova/pci/stats.py | 39 ++++++ .../compute/test_pci_placement_translator.py | 54 ++++++++ .../unit/compute/test_resource_tracker.py | 7 +- nova/tests/unit/pci/test_stats.py | 115 ++++++++++++++++++ 6 files changed, 236 insertions(+), 6 deletions(-) diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py index 8aa7a1f4a976..3ee52e303cae 100644 --- a/nova/compute/pci_placement_translator.py +++ b/nova/compute/pci_placement_translator.py @@ -261,6 +261,12 @@ class PciResourceProvider: ) provider_tree.update_traits(self.name, self.traits) + # Here we are sure the RP exists in the provider_tree. So, we can + # record the RP UUID in each PciDevice this RP represents + rp_uuid = provider_tree.data(self.name).uuid + for dev in self.devs: + dev.extra_info['rp_uuid'] = rp_uuid + def update_allocations( self, allocations: dict, @@ -598,6 +604,11 @@ def update_provider_tree_for_pci( pv.update_provider_tree(provider_tree) old_alloc = copy.deepcopy(allocations) + # update_provider_tree correlated the PciDevice objects with RPs in + # placement and recorded the RP UUID in the PciDevice object. We need to + # trigger an update on the device pools in the tracker to get the device + # RP UUID mapped to the device pools + pci_tracker.stats.populate_pools_metadata_from_assigned_devices() updated = pv.update_allocations(allocations, provider_tree) if updated: diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index ffbc7ed03fab..ab60c77d969b 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -991,8 +991,6 @@ class ResourceTracker(object): # notified when instances are deleted, we need remove all usages # from deleted instances. self.pci_tracker.clean_usage(instances, migrations) - dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj() - cn.pci_device_pools = dev_pools_obj self._report_final_resource_view(nodename) @@ -1314,13 +1312,23 @@ class ResourceTracker(object): def _update(self, context, compute_node, startup=False): """Update partial stats locally and populate them to Scheduler.""" + + self._update_to_placement(context, compute_node, startup) + + if self.pci_tracker: + # sync PCI device pool state stored in the compute node with + # the actual state from the PCI tracker as we commit changes in + # the DB and in the PCI tracker below + dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj() + compute_node.pci_device_pools = dev_pools_obj + # _resource_change will update self.old_resources if it detects changes # but we want to restore those if compute_node.save() fails. nodename = compute_node.hypervisor_hostname old_compute = self.old_resources[nodename] if self._resource_change(compute_node): # If the compute_node's resource changed, update to DB. Note that - # _update_to_placement below does not supersede the need to do this + # _update_to_placement above does not supersede the need to do this # because there are stats-related fields in the ComputeNode object # which could have changed and still need to be reported to the # scheduler filters/weighers (which could be out of tree as well). @@ -1333,8 +1341,6 @@ class ResourceTracker(object): with excutils.save_and_reraise_exception(logger=LOG): self.old_resources[nodename] = old_compute - self._update_to_placement(context, compute_node, startup) - if self.pci_tracker: self.pci_tracker.save(context) diff --git a/nova/pci/stats.py b/nova/pci/stats.py index 806ef2d661f1..771e0a6a8fbc 100644 --- a/nova/pci/stats.py +++ b/nova/pci/stats.py @@ -96,6 +96,8 @@ class PciDeviceStats(object): pool_keys = pool.copy() del pool_keys['count'] del pool_keys['devices'] + # FIXME(gibi): do we need this? + pool_keys.pop('rp_uuid', None) if (len(pool_keys.keys()) == len(dev_pool.keys()) and self._equal_properties(dev_pool, pool_keys, list(dev_pool))): return pool @@ -779,3 +781,40 @@ class PciDeviceStats(object): ) pools = self._filter_pools_for_spec(self.pools, dummy_req) return bool(pools) + + def populate_pools_metadata_from_assigned_devices(self): + """Populate the rp_uuid of each pool based on the rp_uuid of the + devices assigned to the pool. This can only be called from the compute + where devices are assigned to each pool. This should not be called from + the scheduler as there device - pool assignment is not known. + """ + # PciDevices are tracked in placement and flavor based PCI requests + # are scheduled and allocated in placement. To be able to correlate + # what is allocated in placement and what is consumed in nova we + # need to map device pools to RPs. We can do that as the PciDevice + # contains the RP UUID that represents it in placement. + # NOTE(gibi): We cannot do this when the device is originally added to + # the pool as the device -> placement translation, that creates the + # RPs, runs after all the device is created and assigned to pools. + for pool in self.pools: + pool_rps = { + dev.extra_info.get("rp_uuid") + for dev in pool["devices"] + if "rp_uuid" in dev.extra_info + } + if len(pool_rps) >= 2: + # FIXME(gibi): Do we have a 1:1 pool - RP mapping even + # if two PFs providing very similar VFs? + raise ValueError( + "We have a pool %s connected to more than one RPs %s in " + "placement via devs %s" % (pool, pool_rps, pool["devices"]) + ) + + if not pool_rps: + # this can happen if the nova-compute is upgraded to have the + # PCI in placement inventory handling code but + # [pci]report_in_placement is not turned on yet. + continue + + if pool_rps: # now we know that it is a single RP + pool['rp_uuid'] = next(iter(pool_rps)) diff --git a/nova/tests/unit/compute/test_pci_placement_translator.py b/nova/tests/unit/compute/test_pci_placement_translator.py index 4f5f9b658918..0592186e54e2 100644 --- a/nova/tests/unit/compute/test_pci_placement_translator.py +++ b/nova/tests/unit/compute/test_pci_placement_translator.py @@ -12,12 +12,15 @@ # License for the specific language governing permissions and limitations # under the License. import ddt +from oslo_utils.fixture import uuidsentinel as uuids from unittest import mock from nova.compute import pci_placement_translator as ppt +from nova.compute import provider_tree from nova import exception from nova.objects import fields from nova.objects import pci_device +from nova.pci import devspec from nova import test @@ -235,3 +238,54 @@ class TestTranslator(test.NoDBTestCase): "CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO for 0000:81:00.0,0000:81:00.1.", str(ex), ) + + def test_translator_maps_pci_device_to_rp(self): + pv = ppt.PlacementView( + "fake-node", instances_under_same_host_resize=[]) + vf = pci_device.PciDevice( + address="0000:81:00.1", + parent_addr="0000:71:00.0", + dev_type=fields.PciDeviceType.SRIOV_VF, + vendor_id="dead", + product_id="beef", + ) + pf = pci_device.PciDevice( + address="0000:72:00.0", + parent_addr=None, + dev_type=fields.PciDeviceType.SRIOV_PF, + vendor_id="dead", + product_id="beef", + ) + pt = provider_tree.ProviderTree() + pt.new_root("fake-node", uuids.compute_rp) + + pv._add_dev(vf, {}) + pv._add_dev(pf, {}) + pv.update_provider_tree(pt) + + self.assertEqual( + pt.data("fake-node_0000:71:00.0").uuid, vf.extra_info["rp_uuid"] + ) + self.assertEqual( + pt.data("fake-node_0000:72:00.0").uuid, pf.extra_info["rp_uuid"] + ) + + def test_update_provider_tree_for_pci_update_pools(self): + pt = provider_tree.ProviderTree() + pt.new_root("fake-node", uuids.compute_rp) + pf = pci_device.PciDevice( + address="0000:72:00.0", + parent_addr=None, + dev_type=fields.PciDeviceType.SRIOV_PF, + vendor_id="dead", + product_id="beef", + status=fields.PciDeviceStatus.AVAILABLE, + ) + pci_tracker = mock.Mock() + pci_tracker.pci_devs = [pf] + pci_tracker.dev_filter.specs = [devspec.PciDeviceSpec({})] + + ppt.update_provider_tree_for_pci(pt, 'fake-node', pci_tracker, {}, []) + + pci_tracker.stats.populate_pools_metadata_from_assigned_devices.\ + assert_called_once_with() diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index 28eb5107408e..6258054aa71c 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -1580,6 +1580,7 @@ class TestUpdateComputeNode(BaseTestCase): self.rt._update(mock.sentinel.ctx, new_compute) save_mock.assert_called_once_with() + @mock.patch('nova.objects.ComputeNode.save', new=mock.Mock()) @mock.patch( 'nova.pci.stats.PciDeviceStats.has_remote_managed_device_pools', return_value=True) @@ -1773,7 +1774,7 @@ class TestUpdateComputeNode(BaseTestCase): self.assertEqual(4, ufpt_mock.call_count) self.assertEqual(4, mock_sync_disabled.call_count) # The retry is restricted to _update_to_placement - self.assertEqual(1, mock_resource_change.call_count) + self.assertEqual(0, mock_resource_change.call_count) @mock.patch( 'nova.compute.resource_tracker.ResourceTracker.' @@ -2041,6 +2042,10 @@ class TestUpdateComputeNode(BaseTestCase): self.assertIn('Unable to find services table record for nova-compute', mock_log_error.call_args[0][0]) + @mock.patch( + 'nova.compute.resource_tracker.ResourceTracker.' + '_update_to_placement', + new=mock.Mock()) def test_update_compute_node_save_fails_restores_old_resources(self): """Tests the scenario that compute_node.save() fails and the old_resources value for the node is restored to its previous value diff --git a/nova/tests/unit/pci/test_stats.py b/nova/tests/unit/pci/test_stats.py index a9e349aba4ea..d9b5b7bca118 100644 --- a/nova/tests/unit/pci/test_stats.py +++ b/nova/tests/unit/pci/test_stats.py @@ -17,6 +17,7 @@ from unittest import mock from oslo_config import cfg from oslo_serialization import jsonutils +from oslo_utils.fixture import uuidsentinel as uuids from nova import exception from nova import objects @@ -896,6 +897,120 @@ class PciDeviceStatsPlacementSupportTestCase(test.NoDBTestCase): self.assertEqual(pools, matching_pools) + def test_populate_pools_metadata_from_assigned_devices(self): + device_spec = [ + jsonutils.dumps( + { + "address": "0000:81:00.*", + } + ), + ] + self.flags(device_spec=device_spec, group="pci") + dev_filter = whitelist.Whitelist(device_spec) + pci_stats = stats.PciDeviceStats( + objects.NUMATopology(), + dev_filter=dev_filter) + pci_dev1 = objects.PciDevice( + vendor_id="dead", + product_id="beef", + address="0000:81:00.1", + parent_addr="0000:81:00.0", + numa_node=0, + dev_type="type-VF", + ) + pci_dev2 = objects.PciDevice( + vendor_id="dead", + product_id="beef", + address="0000:81:00.2", + parent_addr="0000:81:00.0", + numa_node=0, + dev_type="type-VF", + ) + pci_stats.add_device(pci_dev1) + pci_dev1.extra_info = {'rp_uuid': uuids.rp1} + pci_stats.add_device(pci_dev2) + pci_dev2.extra_info = {'rp_uuid': uuids.rp1} + + self.assertEqual(1, len(pci_stats.pools)) + + pci_stats.populate_pools_metadata_from_assigned_devices() + + self.assertEqual(uuids.rp1, pci_stats.pools[0]['rp_uuid']) + + def test_populate_pools_metadata_from_assigned_devices_device_without_rp( + self + ): + device_spec = [ + jsonutils.dumps( + { + "address": "0000:81:00.*", + } + ), + ] + self.flags(device_spec=device_spec, group="pci") + dev_filter = whitelist.Whitelist(device_spec) + pci_stats = stats.PciDeviceStats( + objects.NUMATopology(), + dev_filter=dev_filter) + pci_dev1 = objects.PciDevice( + vendor_id="dead", + product_id="beef", + address="0000:81:00.1", + parent_addr="0000:81:00.0", + numa_node=0, + dev_type="type-VF", + ) + pci_stats.add_device(pci_dev1) + + self.assertEqual(1, len(pci_stats.pools)) + + pci_stats.populate_pools_metadata_from_assigned_devices() + + self.assertNotIn('rp_uuid', pci_stats.pools[0]) + + def test_populate_pools_metadata_from_assigned_devices_multiple_rp(self): + device_spec = [ + jsonutils.dumps( + { + "address": "0000:81:00.*", + } + ), + ] + self.flags(device_spec=device_spec, group="pci") + dev_filter = whitelist.Whitelist(device_spec) + pci_stats = stats.PciDeviceStats( + objects.NUMATopology(), + dev_filter=dev_filter) + pci_dev1 = objects.PciDevice( + compute_node_id=1, + vendor_id="dead", + product_id="beef", + address="0000:81:00.1", + parent_addr="0000:81:00.0", + numa_node=0, + dev_type="type-VF", + ) + pci_dev2 = objects.PciDevice( + compute_node_id=1, + vendor_id="dead", + product_id="beef", + address="0000:81:00.2", + parent_addr="0000:81:00.0", + numa_node=0, + dev_type="type-VF", + ) + pci_stats.add_device(pci_dev1) + pci_dev1.extra_info = {'rp_uuid': uuids.rp1} + pci_stats.add_device(pci_dev2) + pci_dev2.extra_info = {'rp_uuid': uuids.rp2} + + self.assertEqual(1, len(pci_stats.pools)) + + self.assertRaises( + ValueError, + pci_stats.populate_pools_metadata_from_assigned_devices, + ) + class PciDeviceVFPFStatsTestCase(test.NoDBTestCase):