Merge "Map PCI pools to RP UUIDs"
This commit is contained in:
commit
2f4feeabc2
@ -261,6 +261,12 @@ class PciResourceProvider:
|
||||
)
|
||||
provider_tree.update_traits(self.name, self.traits)
|
||||
|
||||
# Here we are sure the RP exists in the provider_tree. So, we can
|
||||
# record the RP UUID in each PciDevice this RP represents
|
||||
rp_uuid = provider_tree.data(self.name).uuid
|
||||
for dev in self.devs:
|
||||
dev.extra_info['rp_uuid'] = rp_uuid
|
||||
|
||||
def update_allocations(
|
||||
self,
|
||||
allocations: dict,
|
||||
@ -598,6 +604,11 @@ def update_provider_tree_for_pci(
|
||||
|
||||
pv.update_provider_tree(provider_tree)
|
||||
old_alloc = copy.deepcopy(allocations)
|
||||
# update_provider_tree correlated the PciDevice objects with RPs in
|
||||
# placement and recorded the RP UUID in the PciDevice object. We need to
|
||||
# trigger an update on the device pools in the tracker to get the device
|
||||
# RP UUID mapped to the device pools
|
||||
pci_tracker.stats.populate_pools_metadata_from_assigned_devices()
|
||||
updated = pv.update_allocations(allocations, provider_tree)
|
||||
|
||||
if updated:
|
||||
|
@ -991,8 +991,6 @@ class ResourceTracker(object):
|
||||
# notified when instances are deleted, we need remove all usages
|
||||
# from deleted instances.
|
||||
self.pci_tracker.clean_usage(instances, migrations)
|
||||
dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj()
|
||||
cn.pci_device_pools = dev_pools_obj
|
||||
|
||||
self._report_final_resource_view(nodename)
|
||||
|
||||
@ -1314,13 +1312,23 @@ class ResourceTracker(object):
|
||||
|
||||
def _update(self, context, compute_node, startup=False):
|
||||
"""Update partial stats locally and populate them to Scheduler."""
|
||||
|
||||
self._update_to_placement(context, compute_node, startup)
|
||||
|
||||
if self.pci_tracker:
|
||||
# sync PCI device pool state stored in the compute node with
|
||||
# the actual state from the PCI tracker as we commit changes in
|
||||
# the DB and in the PCI tracker below
|
||||
dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj()
|
||||
compute_node.pci_device_pools = dev_pools_obj
|
||||
|
||||
# _resource_change will update self.old_resources if it detects changes
|
||||
# but we want to restore those if compute_node.save() fails.
|
||||
nodename = compute_node.hypervisor_hostname
|
||||
old_compute = self.old_resources[nodename]
|
||||
if self._resource_change(compute_node):
|
||||
# If the compute_node's resource changed, update to DB. Note that
|
||||
# _update_to_placement below does not supersede the need to do this
|
||||
# _update_to_placement above does not supersede the need to do this
|
||||
# because there are stats-related fields in the ComputeNode object
|
||||
# which could have changed and still need to be reported to the
|
||||
# scheduler filters/weighers (which could be out of tree as well).
|
||||
@ -1333,8 +1341,6 @@ class ResourceTracker(object):
|
||||
with excutils.save_and_reraise_exception(logger=LOG):
|
||||
self.old_resources[nodename] = old_compute
|
||||
|
||||
self._update_to_placement(context, compute_node, startup)
|
||||
|
||||
if self.pci_tracker:
|
||||
self.pci_tracker.save(context)
|
||||
|
||||
|
@ -96,6 +96,8 @@ class PciDeviceStats(object):
|
||||
pool_keys = pool.copy()
|
||||
del pool_keys['count']
|
||||
del pool_keys['devices']
|
||||
# FIXME(gibi): do we need this?
|
||||
pool_keys.pop('rp_uuid', None)
|
||||
if (len(pool_keys.keys()) == len(dev_pool.keys()) and
|
||||
self._equal_properties(dev_pool, pool_keys, list(dev_pool))):
|
||||
return pool
|
||||
@ -779,3 +781,40 @@ class PciDeviceStats(object):
|
||||
)
|
||||
pools = self._filter_pools_for_spec(self.pools, dummy_req)
|
||||
return bool(pools)
|
||||
|
||||
def populate_pools_metadata_from_assigned_devices(self):
|
||||
"""Populate the rp_uuid of each pool based on the rp_uuid of the
|
||||
devices assigned to the pool. This can only be called from the compute
|
||||
where devices are assigned to each pool. This should not be called from
|
||||
the scheduler as there device - pool assignment is not known.
|
||||
"""
|
||||
# PciDevices are tracked in placement and flavor based PCI requests
|
||||
# are scheduled and allocated in placement. To be able to correlate
|
||||
# what is allocated in placement and what is consumed in nova we
|
||||
# need to map device pools to RPs. We can do that as the PciDevice
|
||||
# contains the RP UUID that represents it in placement.
|
||||
# NOTE(gibi): We cannot do this when the device is originally added to
|
||||
# the pool as the device -> placement translation, that creates the
|
||||
# RPs, runs after all the device is created and assigned to pools.
|
||||
for pool in self.pools:
|
||||
pool_rps = {
|
||||
dev.extra_info.get("rp_uuid")
|
||||
for dev in pool["devices"]
|
||||
if "rp_uuid" in dev.extra_info
|
||||
}
|
||||
if len(pool_rps) >= 2:
|
||||
# FIXME(gibi): Do we have a 1:1 pool - RP mapping even
|
||||
# if two PFs providing very similar VFs?
|
||||
raise ValueError(
|
||||
"We have a pool %s connected to more than one RPs %s in "
|
||||
"placement via devs %s" % (pool, pool_rps, pool["devices"])
|
||||
)
|
||||
|
||||
if not pool_rps:
|
||||
# this can happen if the nova-compute is upgraded to have the
|
||||
# PCI in placement inventory handling code but
|
||||
# [pci]report_in_placement is not turned on yet.
|
||||
continue
|
||||
|
||||
if pool_rps: # now we know that it is a single RP
|
||||
pool['rp_uuid'] = next(iter(pool_rps))
|
||||
|
@ -12,12 +12,15 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import ddt
|
||||
from oslo_utils.fixture import uuidsentinel as uuids
|
||||
from unittest import mock
|
||||
|
||||
from nova.compute import pci_placement_translator as ppt
|
||||
from nova.compute import provider_tree
|
||||
from nova import exception
|
||||
from nova.objects import fields
|
||||
from nova.objects import pci_device
|
||||
from nova.pci import devspec
|
||||
from nova import test
|
||||
|
||||
|
||||
@ -235,3 +238,54 @@ class TestTranslator(test.NoDBTestCase):
|
||||
"CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO for 0000:81:00.0,0000:81:00.1.",
|
||||
str(ex),
|
||||
)
|
||||
|
||||
def test_translator_maps_pci_device_to_rp(self):
|
||||
pv = ppt.PlacementView(
|
||||
"fake-node", instances_under_same_host_resize=[])
|
||||
vf = pci_device.PciDevice(
|
||||
address="0000:81:00.1",
|
||||
parent_addr="0000:71:00.0",
|
||||
dev_type=fields.PciDeviceType.SRIOV_VF,
|
||||
vendor_id="dead",
|
||||
product_id="beef",
|
||||
)
|
||||
pf = pci_device.PciDevice(
|
||||
address="0000:72:00.0",
|
||||
parent_addr=None,
|
||||
dev_type=fields.PciDeviceType.SRIOV_PF,
|
||||
vendor_id="dead",
|
||||
product_id="beef",
|
||||
)
|
||||
pt = provider_tree.ProviderTree()
|
||||
pt.new_root("fake-node", uuids.compute_rp)
|
||||
|
||||
pv._add_dev(vf, {})
|
||||
pv._add_dev(pf, {})
|
||||
pv.update_provider_tree(pt)
|
||||
|
||||
self.assertEqual(
|
||||
pt.data("fake-node_0000:71:00.0").uuid, vf.extra_info["rp_uuid"]
|
||||
)
|
||||
self.assertEqual(
|
||||
pt.data("fake-node_0000:72:00.0").uuid, pf.extra_info["rp_uuid"]
|
||||
)
|
||||
|
||||
def test_update_provider_tree_for_pci_update_pools(self):
|
||||
pt = provider_tree.ProviderTree()
|
||||
pt.new_root("fake-node", uuids.compute_rp)
|
||||
pf = pci_device.PciDevice(
|
||||
address="0000:72:00.0",
|
||||
parent_addr=None,
|
||||
dev_type=fields.PciDeviceType.SRIOV_PF,
|
||||
vendor_id="dead",
|
||||
product_id="beef",
|
||||
status=fields.PciDeviceStatus.AVAILABLE,
|
||||
)
|
||||
pci_tracker = mock.Mock()
|
||||
pci_tracker.pci_devs = [pf]
|
||||
pci_tracker.dev_filter.specs = [devspec.PciDeviceSpec({})]
|
||||
|
||||
ppt.update_provider_tree_for_pci(pt, 'fake-node', pci_tracker, {}, [])
|
||||
|
||||
pci_tracker.stats.populate_pools_metadata_from_assigned_devices.\
|
||||
assert_called_once_with()
|
||||
|
@ -1580,6 +1580,7 @@ class TestUpdateComputeNode(BaseTestCase):
|
||||
self.rt._update(mock.sentinel.ctx, new_compute)
|
||||
save_mock.assert_called_once_with()
|
||||
|
||||
@mock.patch('nova.objects.ComputeNode.save', new=mock.Mock())
|
||||
@mock.patch(
|
||||
'nova.pci.stats.PciDeviceStats.has_remote_managed_device_pools',
|
||||
return_value=True)
|
||||
@ -1773,7 +1774,7 @@ class TestUpdateComputeNode(BaseTestCase):
|
||||
self.assertEqual(4, ufpt_mock.call_count)
|
||||
self.assertEqual(4, mock_sync_disabled.call_count)
|
||||
# The retry is restricted to _update_to_placement
|
||||
self.assertEqual(1, mock_resource_change.call_count)
|
||||
self.assertEqual(0, mock_resource_change.call_count)
|
||||
|
||||
@mock.patch(
|
||||
'nova.compute.resource_tracker.ResourceTracker.'
|
||||
@ -2041,6 +2042,10 @@ class TestUpdateComputeNode(BaseTestCase):
|
||||
self.assertIn('Unable to find services table record for nova-compute',
|
||||
mock_log_error.call_args[0][0])
|
||||
|
||||
@mock.patch(
|
||||
'nova.compute.resource_tracker.ResourceTracker.'
|
||||
'_update_to_placement',
|
||||
new=mock.Mock())
|
||||
def test_update_compute_node_save_fails_restores_old_resources(self):
|
||||
"""Tests the scenario that compute_node.save() fails and the
|
||||
old_resources value for the node is restored to its previous value
|
||||
|
@ -17,6 +17,7 @@ from unittest import mock
|
||||
|
||||
from oslo_config import cfg
|
||||
from oslo_serialization import jsonutils
|
||||
from oslo_utils.fixture import uuidsentinel as uuids
|
||||
|
||||
from nova import exception
|
||||
from nova import objects
|
||||
@ -896,6 +897,120 @@ class PciDeviceStatsPlacementSupportTestCase(test.NoDBTestCase):
|
||||
|
||||
self.assertEqual(pools, matching_pools)
|
||||
|
||||
def test_populate_pools_metadata_from_assigned_devices(self):
|
||||
device_spec = [
|
||||
jsonutils.dumps(
|
||||
{
|
||||
"address": "0000:81:00.*",
|
||||
}
|
||||
),
|
||||
]
|
||||
self.flags(device_spec=device_spec, group="pci")
|
||||
dev_filter = whitelist.Whitelist(device_spec)
|
||||
pci_stats = stats.PciDeviceStats(
|
||||
objects.NUMATopology(),
|
||||
dev_filter=dev_filter)
|
||||
pci_dev1 = objects.PciDevice(
|
||||
vendor_id="dead",
|
||||
product_id="beef",
|
||||
address="0000:81:00.1",
|
||||
parent_addr="0000:81:00.0",
|
||||
numa_node=0,
|
||||
dev_type="type-VF",
|
||||
)
|
||||
pci_dev2 = objects.PciDevice(
|
||||
vendor_id="dead",
|
||||
product_id="beef",
|
||||
address="0000:81:00.2",
|
||||
parent_addr="0000:81:00.0",
|
||||
numa_node=0,
|
||||
dev_type="type-VF",
|
||||
)
|
||||
pci_stats.add_device(pci_dev1)
|
||||
pci_dev1.extra_info = {'rp_uuid': uuids.rp1}
|
||||
pci_stats.add_device(pci_dev2)
|
||||
pci_dev2.extra_info = {'rp_uuid': uuids.rp1}
|
||||
|
||||
self.assertEqual(1, len(pci_stats.pools))
|
||||
|
||||
pci_stats.populate_pools_metadata_from_assigned_devices()
|
||||
|
||||
self.assertEqual(uuids.rp1, pci_stats.pools[0]['rp_uuid'])
|
||||
|
||||
def test_populate_pools_metadata_from_assigned_devices_device_without_rp(
|
||||
self
|
||||
):
|
||||
device_spec = [
|
||||
jsonutils.dumps(
|
||||
{
|
||||
"address": "0000:81:00.*",
|
||||
}
|
||||
),
|
||||
]
|
||||
self.flags(device_spec=device_spec, group="pci")
|
||||
dev_filter = whitelist.Whitelist(device_spec)
|
||||
pci_stats = stats.PciDeviceStats(
|
||||
objects.NUMATopology(),
|
||||
dev_filter=dev_filter)
|
||||
pci_dev1 = objects.PciDevice(
|
||||
vendor_id="dead",
|
||||
product_id="beef",
|
||||
address="0000:81:00.1",
|
||||
parent_addr="0000:81:00.0",
|
||||
numa_node=0,
|
||||
dev_type="type-VF",
|
||||
)
|
||||
pci_stats.add_device(pci_dev1)
|
||||
|
||||
self.assertEqual(1, len(pci_stats.pools))
|
||||
|
||||
pci_stats.populate_pools_metadata_from_assigned_devices()
|
||||
|
||||
self.assertNotIn('rp_uuid', pci_stats.pools[0])
|
||||
|
||||
def test_populate_pools_metadata_from_assigned_devices_multiple_rp(self):
|
||||
device_spec = [
|
||||
jsonutils.dumps(
|
||||
{
|
||||
"address": "0000:81:00.*",
|
||||
}
|
||||
),
|
||||
]
|
||||
self.flags(device_spec=device_spec, group="pci")
|
||||
dev_filter = whitelist.Whitelist(device_spec)
|
||||
pci_stats = stats.PciDeviceStats(
|
||||
objects.NUMATopology(),
|
||||
dev_filter=dev_filter)
|
||||
pci_dev1 = objects.PciDevice(
|
||||
compute_node_id=1,
|
||||
vendor_id="dead",
|
||||
product_id="beef",
|
||||
address="0000:81:00.1",
|
||||
parent_addr="0000:81:00.0",
|
||||
numa_node=0,
|
||||
dev_type="type-VF",
|
||||
)
|
||||
pci_dev2 = objects.PciDevice(
|
||||
compute_node_id=1,
|
||||
vendor_id="dead",
|
||||
product_id="beef",
|
||||
address="0000:81:00.2",
|
||||
parent_addr="0000:81:00.0",
|
||||
numa_node=0,
|
||||
dev_type="type-VF",
|
||||
)
|
||||
pci_stats.add_device(pci_dev1)
|
||||
pci_dev1.extra_info = {'rp_uuid': uuids.rp1}
|
||||
pci_stats.add_device(pci_dev2)
|
||||
pci_dev2.extra_info = {'rp_uuid': uuids.rp2}
|
||||
|
||||
self.assertEqual(1, len(pci_stats.pools))
|
||||
|
||||
self.assertRaises(
|
||||
ValueError,
|
||||
pci_stats.populate_pools_metadata_from_assigned_devices,
|
||||
)
|
||||
|
||||
|
||||
class PciDeviceVFPFStatsTestCase(test.NoDBTestCase):
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user