Strictly follow placement allocation during PCI claim

The current PCI claim logic only ensures that the PCI devices are
allocated from the pools that are also used by the placement allocation.
But it does not make sure that the correct amount of device is consumed
from the correct pool. This could lead to a situation where the
placement allocation used one dev from RP1 and two devs from RP2 while
the PCI claim did the opposite and allocated two devs from the pool
mapped to RP1 and one from the other.

This patch fixes the logic in the stats module to not only consider the
pools but also consider the amount of devices based on the placement
allocation (candidate).

blueprint: pci-device-tracking-in-placement
Change-Id: Ibc8cacc85a09ffc2985c1eb637ae35014b8e595e
This commit is contained in:
Balazs Gibizer 2022-09-01 16:56:20 +02:00
parent 01b5d6ca42
commit 8911da6923
5 changed files with 192 additions and 63 deletions

View File

@ -1536,7 +1536,7 @@ def update_pci_request_with_placement_allocations(
for spec in pci_request.spec:
# FIXME(gibi): this is baaad but spec is a dict of strings so
# we need to serialize
spec['rp_uuids'] = ','.join(set(mapping.values()))
spec['rp_uuids'] = ','.join(mapping.values())
elif needs_update_due_to_qos(pci_request, provider_mapping):

View File

@ -13,7 +13,7 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
import copy
import typing as ty
@ -244,6 +244,17 @@ class PciDeviceStats(object):
free_devs.extend(pool['devices'])
return free_devs
def _allocate_devs(
self, pool: Pool, num: int, request_id: str
) -> ty.List["objects.PciDevice"]:
alloc_devices = []
for _ in range(num):
pci_dev = pool['devices'].pop()
self._handle_device_dependents(pci_dev)
pci_dev.request_id = request_id
alloc_devices.append(pci_dev)
return alloc_devices
def consume_requests(
self,
pci_requests: 'objects.InstancePCIRequests',
@ -274,20 +285,29 @@ class PciDeviceStats(object):
self.add_device(alloc_devices.pop())
raise exception.PciDeviceRequestFailed(requests=pci_requests)
for pool in pools:
if pool['count'] >= count:
num_alloc = count
else:
num_alloc = pool['count']
count -= num_alloc
pool['count'] -= num_alloc
for d in range(num_alloc):
pci_dev = pool['devices'].pop()
self._handle_device_dependents(pci_dev)
pci_dev.request_id = request.request_id
alloc_devices.append(pci_dev)
if count == 0:
break
if not rp_uuids:
# if there is no placement allocation then we are free to
# consume from the pools in any order:
for pool in pools:
if pool['count'] >= count:
num_alloc = count
else:
num_alloc = pool['count']
count -= num_alloc
pool['count'] -= num_alloc
alloc_devices += self._allocate_devs(
pool, num_alloc, request.request_id)
if count == 0:
break
else:
# but if there is placement allocation then we have to follow
# it
requested_devs_per_pool_rp = collections.Counter(rp_uuids)
for pool in pools:
count = requested_devs_per_pool_rp[pool['rp_uuid']]
pool['count'] -= count
alloc_devices += self._allocate_devs(
pool, count, request.request_id)
return alloc_devices
@ -543,7 +563,7 @@ class PciDeviceStats(object):
self,
pools: ty.List[Pool],
request: 'objects.InstancePCIRequest',
rp_uuids: ty.Set[str],
rp_uuids: ty.List[str],
) -> ty.List[Pool]:
if not rp_uuids:
# If there is no placement allocation then we don't need to filter
@ -554,6 +574,7 @@ class PciDeviceStats(object):
# configuration option is not enabled in the scheduler.
return pools
requested_dev_count_per_rp = collections.Counter(rp_uuids)
matching_pools = []
for pool in pools:
rp_uuid = pool.get('rp_uuid')
@ -572,7 +593,13 @@ class PciDeviceStats(object):
"scheduler. This pool is ignored now.", pool)
continue
if rp_uuid in rp_uuids:
if (
# the placement allocation contains this pool
rp_uuid in requested_dev_count_per_rp and
# the amount of dev allocated in placement can be consumed
# from the pool
pool["count"] >= requested_dev_count_per_rp[rp_uuid]
):
matching_pools.append(pool)
return matching_pools
@ -582,7 +609,7 @@ class PciDeviceStats(object):
pools: ty.List[Pool],
request: 'objects.InstancePCIRequest',
numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']],
rp_uuids: ty.Set[str],
rp_uuids: ty.List[str],
) -> ty.Optional[ty.List[Pool]]:
"""Determine if an individual PCI request can be met.
@ -751,7 +778,7 @@ class PciDeviceStats(object):
self,
pools: ty.List[Pool],
request: 'objects.InstancePCIRequest',
rp_uuids: ty.Set[str],
rp_uuids: ty.List[str],
numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']] = None,
) -> bool:
"""Apply an individual PCI request.
@ -782,11 +809,22 @@ class PciDeviceStats(object):
if not filtered_pools:
return False
count = request.count
for pool in filtered_pools:
count = self._decrease_pool_count(pools, pool, count)
if not count:
break
if not rp_uuids:
# If there is no placement allocation for this request then we are
# free to consume from the filtered pools in any order
count = request.count
for pool in filtered_pools:
count = self._decrease_pool_count(pools, pool, count)
if not count:
break
else:
# but if there is placement allocation then we have to follow that
requested_devs_per_pool_rp = collections.Counter(rp_uuids)
for pool in filtered_pools:
count = requested_devs_per_pool_rp[pool['rp_uuid']]
pool['count'] -= count
if pool['count'] == 0:
pools.remove(pool)
return True
@ -794,13 +832,18 @@ class PciDeviceStats(object):
self,
provider_mapping: ty.Optional[ty.Dict[str, ty.List[str]]],
request: 'objects.InstancePCIRequest'
) -> ty.Set[str]:
"""Return the list of RP uuids that are fulfilling the request"""
) -> ty.List[str]:
"""Return the list of RP uuids that are fulfilling the request.
An RP will be in the list as many times as many devices needs to
be allocated from that RP.
"""
if request.source == objects.InstancePCIRequest.NEUTRON_PORT:
# TODO(gibi): support neutron based requests in a later cycle
# set() will signal that any PCI pool can be used for this request
return set()
# an empty list will signal that any PCI pool can be used for this
# request
return []
if not provider_mapping:
# NOTE(gibi): AFAIK specs is always a list of a single dict
@ -809,23 +852,23 @@ class PciDeviceStats(object):
if not rp_uuids:
# This can happen if [filter_scheduler]pci_in_placement is not
# enabled yet
# set() will signal that any PCI pool can be used for this
# request
return set()
# An empty list will signal that any PCI pool can be used for
# this request
return []
# TODO(gibi): this is baaad but spec is a dict of string so
# the list is serialized
return set(rp_uuids.split(','))
return rp_uuids.split(',')
# NOTE(gibi): the PCI prefilter generates RequestGroup suffixes from
# InstancePCIRequests in the form of {request_id}-{count_index}
# NOTE(gibi): a suffixed request group always fulfilled from a single
# RP
return {
return [
rp_uuids[0]
for group_id, rp_uuids in provider_mapping.items()
if group_id.startswith(request.request_id)
}
]
def apply_requests(
self,

View File

@ -1978,27 +1978,18 @@ class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
"compute1_0000:81:00.0", "CUSTOM_MY_VF", 1)
extra_spec = {"pci_passthrough:alias": "a-vf:3"}
flavor_id = self._create_flavor(extra_spec=extra_spec)
# We expect this to fit, but it does not. The pool filtering logic
# only considers which pools can be used based on the allocation
# candidate, but does not consider how much device needs to be used
# from which pool. So the PCI claim logic sees both PF pools as usable
# but allocates 2 dev from 81:00 in nova. Then the PCI allocation
# healing logic sees the difference between the placement allocation
# and the nova allocation and fails when trys to correct it.
self._create_server(
flavor_id=flavor_id, networks=[], expected_state="ERROR"
)
# server_3vf = self._create_server(flavor_id=flavor_id, networks=[])
#
# self.assertPCIDeviceCounts('compute1', total=4, free=1)
# compute1_expected_placement_view["usages"] = {
# "0000:81:00.0": {"CUSTOM_MY_VF": 1},
# "0000:81:01.0": {"CUSTOM_MY_VF": 2},
# }
# compute1_expected_placement_view["allocations"][server_3vf["id"]] = {
# "0000:81:00.0": {"CUSTOM_MY_VF": 1},
# "0000:81:01.0": {"CUSTOM_MY_VF": 2},
# }
# self.assert_placement_pci_view(
# "compute1", **compute1_expected_placement_view)
# self.assert_no_pci_healing("compute1")
# We expect this to fit.
server_3vf = self._create_server(flavor_id=flavor_id, networks=[])
self.assertPCIDeviceCounts('compute1', total=4, free=1)
compute1_expected_placement_view["usages"] = {
"0000:81:00.0": {"CUSTOM_MY_VF": 1},
"0000:81:01.0": {"CUSTOM_MY_VF": 2},
}
compute1_expected_placement_view["allocations"][server_3vf["id"]] = {
"0000:81:00.0": {"CUSTOM_MY_VF": 1},
"0000:81:01.0": {"CUSTOM_MY_VF": 2},
}
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
self.assert_no_pci_healing("compute1")

View File

@ -1597,7 +1597,7 @@ class PciRequestUpdateTestCase(test.NoDBTestCase):
provider_mapping)
self.assertEqual(
",".join({uuids.rp1, uuids.rp2}), req.spec[0]["rp_uuids"]
{uuids.rp1, uuids.rp2}, set(req.spec[0]["rp_uuids"].split(','))
)
def test_pci_request_has_no_mapping(self):

View File

@ -12,7 +12,7 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
from unittest import mock
from oslo_config import cfg
@ -1039,9 +1039,9 @@ class PciDeviceStatsProviderMappingTestCase(test.NoDBTestCase):
),
]
self.flags(device_spec=device_spec, group="pci")
dev_filter = whitelist.Whitelist(device_spec)
self.dev_filter = whitelist.Whitelist(device_spec)
self.pci_stats = stats.PciDeviceStats(
objects.NUMATopology(), dev_filter=dev_filter
objects.NUMATopology(), dev_filter=self.dev_filter
)
# add devices represented by different RPs in placement
# two VFs on the same PF
@ -1384,6 +1384,101 @@ class PciDeviceStatsProviderMappingTestCase(test.NoDBTestCase):
{pool['rp_uuid'] for pool in self.pci_stats.pools},
)
def _create_two_pools_with_two_vfs(self):
# create two pools (PFs) with two VFs each
self.pci_stats = stats.PciDeviceStats(
objects.NUMATopology(), dev_filter=self.dev_filter
)
for pf_index in [1, 2]:
for vf_index in [1, 2]:
dev = objects.PciDevice(
compute_node_id=1,
vendor_id="dead",
product_id="beef",
address=f"0000:81:0{pf_index}.{vf_index}",
parent_addr=f"0000:81:0{pf_index}.0",
numa_node=0,
dev_type="type-VF",
)
self.pci_stats.add_device(dev)
dev.extra_info = {'rp_uuid': getattr(uuids, f"pf{pf_index}")}
# populate the RP -> pool mapping from the devices to its pools
self.pci_stats.populate_pools_metadata_from_assigned_devices()
# we have 2 pool and 4 devs in total
self.num_pools = 2
self.assertEqual(self.num_pools, len(self.pci_stats.pools))
self.num_devs = 4
self.assertEqual(
self.num_devs, sum(pool["count"] for pool in self.pci_stats.pools)
)
def test_apply_asymmetric_allocation(self):
self._create_two_pools_with_two_vfs()
# ask for 3 VFs
vf_req = objects.InstancePCIRequest(
count=3,
alias_name='a-vf',
request_id=uuids.vf_req,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-VF",
}
],
)
# Simulate that placement returned an allocation candidate where 1 VF
# is consumed from PF1 and two from PF2
mapping = {
# the VF is represented by the parent PF RP
f"{uuids.vf_req}-0": [uuids.pf1],
f"{uuids.vf_req}-1": [uuids.pf2],
f"{uuids.vf_req}-2": [uuids.pf2],
}
# This should fit
self.assertTrue(
self.pci_stats.support_requests([vf_req], mapping)
)
# and when consumed the consumption from the pools should be in sync
# with the placement allocation. So the PF2 pool is expected to
# disappear as it is fully consumed and the PF1 pool should have
# one free device.
self.pci_stats.apply_requests([vf_req], mapping)
self.assertEqual(1, len(self.pci_stats.pools))
self.assertEqual(uuids.pf1, self.pci_stats.pools[0]['rp_uuid'])
self.assertEqual(1, self.pci_stats.pools[0]['count'])
def test_consume_asymmetric_allocation(self):
self._create_two_pools_with_two_vfs()
# ask for 3 VFs
vf_req = objects.InstancePCIRequest(
count=3,
alias_name='a-vf',
request_id=uuids.vf_req,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-VF",
# Simulate that the scheduler already allocate a candidate
# and the mapping is stored in the request.
# In placement 1 VF is allocated from PF1 and two from PF2
"rp_uuids": ",".join([uuids.pf1, uuids.pf2, uuids.pf2])
}
],
)
# So when the PCI claim consumes devices based on this request we
# expect that nova follows what is allocated in placement.
devs = self.pci_stats.consume_requests([vf_req])
self.assertEqual(
{"0000:81:01.0": 1, "0000:81:02.0": 2},
collections.Counter(dev.parent_addr for dev in devs),
)
def test_consume_restricted_by_allocation(self):
pf_req = objects.InstancePCIRequest(
count=1,