Strictly follow placement allocation during PCI claim

The current PCI claim logic only ensures that the PCI devices are
allocated from the pools that are also used by the placement allocation.
But it does not make sure that the correct amount of device is consumed
from the correct pool. This could lead to a situation where the
placement allocation used one dev from RP1 and two devs from RP2 while
the PCI claim did the opposite and allocated two devs from the pool
mapped to RP1 and one from the other.

This patch fixes the logic in the stats module to not only consider the
pools but also consider the amount of devices based on the placement
allocation (candidate).

blueprint: pci-device-tracking-in-placement
Change-Id: Ibc8cacc85a09ffc2985c1eb637ae35014b8e595e
This commit is contained in:
Balazs Gibizer 2022-09-01 16:56:20 +02:00
parent 01b5d6ca42
commit 8911da6923
5 changed files with 192 additions and 63 deletions

View File

@ -1536,7 +1536,7 @@ def update_pci_request_with_placement_allocations(
for spec in pci_request.spec: for spec in pci_request.spec:
# FIXME(gibi): this is baaad but spec is a dict of strings so # FIXME(gibi): this is baaad but spec is a dict of strings so
# we need to serialize # we need to serialize
spec['rp_uuids'] = ','.join(set(mapping.values())) spec['rp_uuids'] = ','.join(mapping.values())
elif needs_update_due_to_qos(pci_request, provider_mapping): elif needs_update_due_to_qos(pci_request, provider_mapping):

View File

@ -13,7 +13,7 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import collections
import copy import copy
import typing as ty import typing as ty
@ -244,6 +244,17 @@ class PciDeviceStats(object):
free_devs.extend(pool['devices']) free_devs.extend(pool['devices'])
return free_devs return free_devs
def _allocate_devs(
self, pool: Pool, num: int, request_id: str
) -> ty.List["objects.PciDevice"]:
alloc_devices = []
for _ in range(num):
pci_dev = pool['devices'].pop()
self._handle_device_dependents(pci_dev)
pci_dev.request_id = request_id
alloc_devices.append(pci_dev)
return alloc_devices
def consume_requests( def consume_requests(
self, self,
pci_requests: 'objects.InstancePCIRequests', pci_requests: 'objects.InstancePCIRequests',
@ -274,20 +285,29 @@ class PciDeviceStats(object):
self.add_device(alloc_devices.pop()) self.add_device(alloc_devices.pop())
raise exception.PciDeviceRequestFailed(requests=pci_requests) raise exception.PciDeviceRequestFailed(requests=pci_requests)
for pool in pools: if not rp_uuids:
if pool['count'] >= count: # if there is no placement allocation then we are free to
num_alloc = count # consume from the pools in any order:
else: for pool in pools:
num_alloc = pool['count'] if pool['count'] >= count:
count -= num_alloc num_alloc = count
pool['count'] -= num_alloc else:
for d in range(num_alloc): num_alloc = pool['count']
pci_dev = pool['devices'].pop() count -= num_alloc
self._handle_device_dependents(pci_dev) pool['count'] -= num_alloc
pci_dev.request_id = request.request_id alloc_devices += self._allocate_devs(
alloc_devices.append(pci_dev) pool, num_alloc, request.request_id)
if count == 0: if count == 0:
break break
else:
# but if there is placement allocation then we have to follow
# it
requested_devs_per_pool_rp = collections.Counter(rp_uuids)
for pool in pools:
count = requested_devs_per_pool_rp[pool['rp_uuid']]
pool['count'] -= count
alloc_devices += self._allocate_devs(
pool, count, request.request_id)
return alloc_devices return alloc_devices
@ -543,7 +563,7 @@ class PciDeviceStats(object):
self, self,
pools: ty.List[Pool], pools: ty.List[Pool],
request: 'objects.InstancePCIRequest', request: 'objects.InstancePCIRequest',
rp_uuids: ty.Set[str], rp_uuids: ty.List[str],
) -> ty.List[Pool]: ) -> ty.List[Pool]:
if not rp_uuids: if not rp_uuids:
# If there is no placement allocation then we don't need to filter # If there is no placement allocation then we don't need to filter
@ -554,6 +574,7 @@ class PciDeviceStats(object):
# configuration option is not enabled in the scheduler. # configuration option is not enabled in the scheduler.
return pools return pools
requested_dev_count_per_rp = collections.Counter(rp_uuids)
matching_pools = [] matching_pools = []
for pool in pools: for pool in pools:
rp_uuid = pool.get('rp_uuid') rp_uuid = pool.get('rp_uuid')
@ -572,7 +593,13 @@ class PciDeviceStats(object):
"scheduler. This pool is ignored now.", pool) "scheduler. This pool is ignored now.", pool)
continue continue
if rp_uuid in rp_uuids: if (
# the placement allocation contains this pool
rp_uuid in requested_dev_count_per_rp and
# the amount of dev allocated in placement can be consumed
# from the pool
pool["count"] >= requested_dev_count_per_rp[rp_uuid]
):
matching_pools.append(pool) matching_pools.append(pool)
return matching_pools return matching_pools
@ -582,7 +609,7 @@ class PciDeviceStats(object):
pools: ty.List[Pool], pools: ty.List[Pool],
request: 'objects.InstancePCIRequest', request: 'objects.InstancePCIRequest',
numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']], numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']],
rp_uuids: ty.Set[str], rp_uuids: ty.List[str],
) -> ty.Optional[ty.List[Pool]]: ) -> ty.Optional[ty.List[Pool]]:
"""Determine if an individual PCI request can be met. """Determine if an individual PCI request can be met.
@ -751,7 +778,7 @@ class PciDeviceStats(object):
self, self,
pools: ty.List[Pool], pools: ty.List[Pool],
request: 'objects.InstancePCIRequest', request: 'objects.InstancePCIRequest',
rp_uuids: ty.Set[str], rp_uuids: ty.List[str],
numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']] = None, numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']] = None,
) -> bool: ) -> bool:
"""Apply an individual PCI request. """Apply an individual PCI request.
@ -782,11 +809,22 @@ class PciDeviceStats(object):
if not filtered_pools: if not filtered_pools:
return False return False
count = request.count if not rp_uuids:
for pool in filtered_pools: # If there is no placement allocation for this request then we are
count = self._decrease_pool_count(pools, pool, count) # free to consume from the filtered pools in any order
if not count: count = request.count
break for pool in filtered_pools:
count = self._decrease_pool_count(pools, pool, count)
if not count:
break
else:
# but if there is placement allocation then we have to follow that
requested_devs_per_pool_rp = collections.Counter(rp_uuids)
for pool in filtered_pools:
count = requested_devs_per_pool_rp[pool['rp_uuid']]
pool['count'] -= count
if pool['count'] == 0:
pools.remove(pool)
return True return True
@ -794,13 +832,18 @@ class PciDeviceStats(object):
self, self,
provider_mapping: ty.Optional[ty.Dict[str, ty.List[str]]], provider_mapping: ty.Optional[ty.Dict[str, ty.List[str]]],
request: 'objects.InstancePCIRequest' request: 'objects.InstancePCIRequest'
) -> ty.Set[str]: ) -> ty.List[str]:
"""Return the list of RP uuids that are fulfilling the request""" """Return the list of RP uuids that are fulfilling the request.
An RP will be in the list as many times as many devices needs to
be allocated from that RP.
"""
if request.source == objects.InstancePCIRequest.NEUTRON_PORT: if request.source == objects.InstancePCIRequest.NEUTRON_PORT:
# TODO(gibi): support neutron based requests in a later cycle # TODO(gibi): support neutron based requests in a later cycle
# set() will signal that any PCI pool can be used for this request # an empty list will signal that any PCI pool can be used for this
return set() # request
return []
if not provider_mapping: if not provider_mapping:
# NOTE(gibi): AFAIK specs is always a list of a single dict # NOTE(gibi): AFAIK specs is always a list of a single dict
@ -809,23 +852,23 @@ class PciDeviceStats(object):
if not rp_uuids: if not rp_uuids:
# This can happen if [filter_scheduler]pci_in_placement is not # This can happen if [filter_scheduler]pci_in_placement is not
# enabled yet # enabled yet
# set() will signal that any PCI pool can be used for this # An empty list will signal that any PCI pool can be used for
# request # this request
return set() return []
# TODO(gibi): this is baaad but spec is a dict of string so # TODO(gibi): this is baaad but spec is a dict of string so
# the list is serialized # the list is serialized
return set(rp_uuids.split(',')) return rp_uuids.split(',')
# NOTE(gibi): the PCI prefilter generates RequestGroup suffixes from # NOTE(gibi): the PCI prefilter generates RequestGroup suffixes from
# InstancePCIRequests in the form of {request_id}-{count_index} # InstancePCIRequests in the form of {request_id}-{count_index}
# NOTE(gibi): a suffixed request group always fulfilled from a single # NOTE(gibi): a suffixed request group always fulfilled from a single
# RP # RP
return { return [
rp_uuids[0] rp_uuids[0]
for group_id, rp_uuids in provider_mapping.items() for group_id, rp_uuids in provider_mapping.items()
if group_id.startswith(request.request_id) if group_id.startswith(request.request_id)
} ]
def apply_requests( def apply_requests(
self, self,

View File

@ -1978,27 +1978,18 @@ class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
"compute1_0000:81:00.0", "CUSTOM_MY_VF", 1) "compute1_0000:81:00.0", "CUSTOM_MY_VF", 1)
extra_spec = {"pci_passthrough:alias": "a-vf:3"} extra_spec = {"pci_passthrough:alias": "a-vf:3"}
flavor_id = self._create_flavor(extra_spec=extra_spec) flavor_id = self._create_flavor(extra_spec=extra_spec)
# We expect this to fit, but it does not. The pool filtering logic # We expect this to fit.
# only considers which pools can be used based on the allocation server_3vf = self._create_server(flavor_id=flavor_id, networks=[])
# candidate, but does not consider how much device needs to be used
# from which pool. So the PCI claim logic sees both PF pools as usable self.assertPCIDeviceCounts('compute1', total=4, free=1)
# but allocates 2 dev from 81:00 in nova. Then the PCI allocation compute1_expected_placement_view["usages"] = {
# healing logic sees the difference between the placement allocation "0000:81:00.0": {"CUSTOM_MY_VF": 1},
# and the nova allocation and fails when trys to correct it. "0000:81:01.0": {"CUSTOM_MY_VF": 2},
self._create_server( }
flavor_id=flavor_id, networks=[], expected_state="ERROR" compute1_expected_placement_view["allocations"][server_3vf["id"]] = {
) "0000:81:00.0": {"CUSTOM_MY_VF": 1},
# server_3vf = self._create_server(flavor_id=flavor_id, networks=[]) "0000:81:01.0": {"CUSTOM_MY_VF": 2},
# }
# self.assertPCIDeviceCounts('compute1', total=4, free=1) self.assert_placement_pci_view(
# compute1_expected_placement_view["usages"] = { "compute1", **compute1_expected_placement_view)
# "0000:81:00.0": {"CUSTOM_MY_VF": 1}, self.assert_no_pci_healing("compute1")
# "0000:81:01.0": {"CUSTOM_MY_VF": 2},
# }
# compute1_expected_placement_view["allocations"][server_3vf["id"]] = {
# "0000:81:00.0": {"CUSTOM_MY_VF": 1},
# "0000:81:01.0": {"CUSTOM_MY_VF": 2},
# }
# self.assert_placement_pci_view(
# "compute1", **compute1_expected_placement_view)
# self.assert_no_pci_healing("compute1")

View File

@ -1597,7 +1597,7 @@ class PciRequestUpdateTestCase(test.NoDBTestCase):
provider_mapping) provider_mapping)
self.assertEqual( self.assertEqual(
",".join({uuids.rp1, uuids.rp2}), req.spec[0]["rp_uuids"] {uuids.rp1, uuids.rp2}, set(req.spec[0]["rp_uuids"].split(','))
) )
def test_pci_request_has_no_mapping(self): def test_pci_request_has_no_mapping(self):

View File

@ -12,7 +12,7 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import collections
from unittest import mock from unittest import mock
from oslo_config import cfg from oslo_config import cfg
@ -1039,9 +1039,9 @@ class PciDeviceStatsProviderMappingTestCase(test.NoDBTestCase):
), ),
] ]
self.flags(device_spec=device_spec, group="pci") self.flags(device_spec=device_spec, group="pci")
dev_filter = whitelist.Whitelist(device_spec) self.dev_filter = whitelist.Whitelist(device_spec)
self.pci_stats = stats.PciDeviceStats( self.pci_stats = stats.PciDeviceStats(
objects.NUMATopology(), dev_filter=dev_filter objects.NUMATopology(), dev_filter=self.dev_filter
) )
# add devices represented by different RPs in placement # add devices represented by different RPs in placement
# two VFs on the same PF # two VFs on the same PF
@ -1384,6 +1384,101 @@ class PciDeviceStatsProviderMappingTestCase(test.NoDBTestCase):
{pool['rp_uuid'] for pool in self.pci_stats.pools}, {pool['rp_uuid'] for pool in self.pci_stats.pools},
) )
def _create_two_pools_with_two_vfs(self):
# create two pools (PFs) with two VFs each
self.pci_stats = stats.PciDeviceStats(
objects.NUMATopology(), dev_filter=self.dev_filter
)
for pf_index in [1, 2]:
for vf_index in [1, 2]:
dev = objects.PciDevice(
compute_node_id=1,
vendor_id="dead",
product_id="beef",
address=f"0000:81:0{pf_index}.{vf_index}",
parent_addr=f"0000:81:0{pf_index}.0",
numa_node=0,
dev_type="type-VF",
)
self.pci_stats.add_device(dev)
dev.extra_info = {'rp_uuid': getattr(uuids, f"pf{pf_index}")}
# populate the RP -> pool mapping from the devices to its pools
self.pci_stats.populate_pools_metadata_from_assigned_devices()
# we have 2 pool and 4 devs in total
self.num_pools = 2
self.assertEqual(self.num_pools, len(self.pci_stats.pools))
self.num_devs = 4
self.assertEqual(
self.num_devs, sum(pool["count"] for pool in self.pci_stats.pools)
)
def test_apply_asymmetric_allocation(self):
self._create_two_pools_with_two_vfs()
# ask for 3 VFs
vf_req = objects.InstancePCIRequest(
count=3,
alias_name='a-vf',
request_id=uuids.vf_req,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-VF",
}
],
)
# Simulate that placement returned an allocation candidate where 1 VF
# is consumed from PF1 and two from PF2
mapping = {
# the VF is represented by the parent PF RP
f"{uuids.vf_req}-0": [uuids.pf1],
f"{uuids.vf_req}-1": [uuids.pf2],
f"{uuids.vf_req}-2": [uuids.pf2],
}
# This should fit
self.assertTrue(
self.pci_stats.support_requests([vf_req], mapping)
)
# and when consumed the consumption from the pools should be in sync
# with the placement allocation. So the PF2 pool is expected to
# disappear as it is fully consumed and the PF1 pool should have
# one free device.
self.pci_stats.apply_requests([vf_req], mapping)
self.assertEqual(1, len(self.pci_stats.pools))
self.assertEqual(uuids.pf1, self.pci_stats.pools[0]['rp_uuid'])
self.assertEqual(1, self.pci_stats.pools[0]['count'])
def test_consume_asymmetric_allocation(self):
self._create_two_pools_with_two_vfs()
# ask for 3 VFs
vf_req = objects.InstancePCIRequest(
count=3,
alias_name='a-vf',
request_id=uuids.vf_req,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-VF",
# Simulate that the scheduler already allocate a candidate
# and the mapping is stored in the request.
# In placement 1 VF is allocated from PF1 and two from PF2
"rp_uuids": ",".join([uuids.pf1, uuids.pf2, uuids.pf2])
}
],
)
# So when the PCI claim consumes devices based on this request we
# expect that nova follows what is allocated in placement.
devs = self.pci_stats.consume_requests([vf_req])
self.assertEqual(
{"0000:81:01.0": 1, "0000:81:02.0": 2},
collections.Counter(dev.parent_addr for dev in devs),
)
def test_consume_restricted_by_allocation(self): def test_consume_restricted_by_allocation(self):
pf_req = objects.InstancePCIRequest( pf_req = objects.InstancePCIRequest(
count=1, count=1,