Filter PCI pools based on Placement allocation

The stats module is used to decide if the InstancePCIRequests of a boot
request can fit to a given compute host and to decide which PCI device
pool can fulfill the requests. It is used both during scheduling and also
during the PCI claim code.

PCI devices now modelled in placement and the allocation_candidate query
now requests PCI resources therefore each allocation candidate returned
from placement already restricts which PCI devices can be used during
the PciPassthroughFilter, the NumaTopologyFilter, and PCI claim code
paths. This patch adapts the stats module to consider the PCI
allocation candidate or the already made placement PCI allocation when
filtering the PCI device pools.

blueprint: pci-device-tracking-in-placement
Change-Id: If363981c4aeeb09a96ee94b140070d3d0f6af48f
This commit is contained in:
Balazs Gibizer 2022-08-19 12:24:42 +02:00
parent 3d818c3473
commit f1d82c0d0a
13 changed files with 808 additions and 87 deletions

View File

@ -124,7 +124,13 @@ class Claim(NopClaim):
pci_requests = self._pci_requests
if pci_requests.requests:
stats = self.tracker.pci_tracker.stats
if not stats.support_requests(pci_requests.requests):
if not stats.support_requests(
pci_requests.requests,
# We explicitly signal that we are _after_ the scheduler made
# allocations in placement and therefore pci_requests.requests
# carry its own placement provider mapping information
provider_mapping=None,
):
return _('Claim pci failed')
def _test_numa_topology(self, compute_node, limit):
@ -139,12 +145,17 @@ class Claim(NopClaim):
if pci_requests.requests:
pci_stats = self.tracker.pci_tracker.stats
instance_topology = (
hardware.numa_fit_instance_to_host(
host_topology, requested_topology,
limits=limit,
pci_requests=pci_requests.requests,
pci_stats=pci_stats))
instance_topology = hardware.numa_fit_instance_to_host(
host_topology,
requested_topology,
limits=limit,
pci_requests=pci_requests.requests,
pci_stats=pci_stats,
# We explicitly signal that we are _after_ the scheduler made
# allocations in placement and therefore pci_requests.requests
# carry its own placement provider mapping information
provider_mapping=None,
)
if requested_topology and not instance_topology:
if pci_requests.requests:

View File

@ -253,7 +253,10 @@ class PciDeviceStats(object):
for request in pci_requests:
count = request.count
pools = self._filter_pools(self.pools, request, numa_cells)
# FIXME(gibi): we need to gather the rp_uuids from the
# InstancePCIRequests once stored there
pools = self._filter_pools(
self.pools, request, numa_cells, rp_uuids=set())
# Failed to allocate the required number of devices. Return the
# devices already allocated during previous iterations back to
@ -534,11 +537,50 @@ class PciDeviceStats(object):
pool.get(PCI_REMOTE_MANAGED_TAG))]
return pools
def _filter_pools_based_on_placement_allocation(
self,
pools: ty.List[Pool],
request: 'objects.InstancePCIRequest',
rp_uuids: ty.Set[str],
) -> ty.List[Pool]:
if not rp_uuids:
# If there is no placement allocation then we don't need to filter
# by it. This could happen if the instance only has neutron port
# based InstancePCIRequest as that is currently not having
# placement allocation (except for QoS ports, but that handled in a
# separate codepath) or if the [scheduler]pci_in_placement
# configuration option is not enabled in the scheduler.
return pools
matching_pools = []
for pool in pools:
rp_uuid = pool.get('rp_uuid')
if rp_uuid is None:
# NOTE(gibi): There can be pools without rp_uuid field if the
# [pci]report_in_placement is not enabled for a compute with
# viable PCI devices. We have a non-empty rp_uuids, so we know
# that the [scheduler]pci_in_placement is enabled. This is a
# configuration error.
LOG.warning(
"The PCI pool %s isn't mapped to an RP UUID but the "
"scheduler is configured to create PCI allocations in "
"placement. This should not happen. Please enable "
"[pci]report_in_placement on all compute hosts before "
"enabling [scheduler]pci_in_placement in the scheduler. "
"This pool is ignored now.", pool)
continue
if rp_uuid in rp_uuids:
matching_pools.append(pool)
return matching_pools
def _filter_pools(
self,
pools: ty.List[Pool],
request: 'objects.InstancePCIRequest',
numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']],
rp_uuids: ty.Set[str],
) -> ty.Optional[ty.List[Pool]]:
"""Determine if an individual PCI request can be met.
@ -553,6 +595,9 @@ class PciDeviceStats(object):
quantity and required NUMA affinity of device(s) we want.
:param numa_cells: A list of InstanceNUMACell objects whose ``id``
corresponds to the ``id`` of host NUMACell objects.
:param rp_uuids: A list of PR uuids this request fulfilled from in
placement. So here we have to consider only the pools matching with
thes RP uuids
:returns: A list of pools that can be used to support the request if
this is possible, else None.
"""
@ -637,6 +682,19 @@ class PciDeviceStats(object):
before_count - after_count
)
# if there is placement allocation for the request then we have to
# remove the pools that are not in the placement allocation
before_count = after_count
pools = self._filter_pools_based_on_placement_allocation(
pools, request, rp_uuids)
after_count = sum([pool['count'] for pool in pools])
if after_count < before_count:
LOG.debug(
'Dropped %d device(s) that are not part of the placement '
'allocation',
before_count - after_count
)
if after_count < request.count:
LOG.debug('Not enough PCI devices left to satisfy request')
return None
@ -646,6 +704,7 @@ class PciDeviceStats(object):
def support_requests(
self,
requests: ty.List['objects.InstancePCIRequest'],
provider_mapping: ty.Optional[ty.Dict[str, ty.List[str]]],
numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']] = None,
) -> bool:
"""Determine if the PCI requests can be met.
@ -659,6 +718,12 @@ class PciDeviceStats(object):
:param requests: A list of InstancePCIRequest object describing the
types, quantities and required NUMA affinities of devices we want.
:type requests: nova.objects.InstancePCIRequests
:param provider_mapping: A dict keyed by RequestGroup requester_id,
to a list of resource provider UUIDs which provide resource
for that RequestGroup. If it is None then it signals that the
InstancePCIRequest objects already stores a mapping per request.
I.e.: we are called _after_ the scheduler made allocations for this
request in placement.
:param numa_cells: A list of InstanceNUMACell objects whose ``id``
corresponds to the ``id`` of host NUMACells, or None.
:returns: Whether this compute node can satisfy the given request.
@ -674,7 +739,7 @@ class PciDeviceStats(object):
# objects.
stats = copy.deepcopy(self)
try:
stats.apply_requests(requests, numa_cells)
stats.apply_requests(requests, provider_mapping, numa_cells)
except exception.PciDeviceRequestFailed:
return False
@ -684,6 +749,7 @@ class PciDeviceStats(object):
self,
pools: ty.List[Pool],
request: 'objects.InstancePCIRequest',
rp_uuids: ty.Set[str],
numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']] = None,
) -> bool:
"""Apply an individual PCI request.
@ -697,6 +763,8 @@ class PciDeviceStats(object):
:param pools: A list of PCI device pool dicts
:param request: An InstancePCIRequest object describing the type,
quantity and required NUMA affinity of device(s) we want.
:param rp_uuids: A list of PR uuids this request fulfilled from in
placement
:param numa_cells: A list of InstanceNUMACell objects whose ``id``
corresponds to the ``id`` of host NUMACell objects.
:returns: True if the request was applied against the provided pools
@ -706,7 +774,8 @@ class PciDeviceStats(object):
# Two concurrent requests may succeed when called support_requests
# because this method does not remove related devices from the pools
filtered_pools = self._filter_pools(pools, request, numa_cells)
filtered_pools = self._filter_pools(
pools, request, numa_cells, rp_uuids)
if not filtered_pools:
return False
@ -719,9 +788,36 @@ class PciDeviceStats(object):
return True
def _get_rp_uuids_for_request(
self,
provider_mapping: ty.Optional[ty.Dict[str, ty.List[str]]],
request: 'objects.InstancePCIRequest'
) -> ty.Set[str]:
"""Return the list of RP uuids that are fulfilling the request"""
if not provider_mapping:
# FIXME(gibi): read the mapping from the request
return set()
if request.source == objects.InstancePCIRequest.NEUTRON_PORT:
# TODO(gibi): support neutron based requests in a later cycle
# set() will signal that any PCI pool can be used for this request
return set()
# NOTE(gibi): the PCI prefilter generates RequestGroup suffixes from
# InstancePCIRequests in the form of {request_id}-{count_index}
# NOTE(gibi): a suffixed request group always fulfilled from a single
# RP
return {
rp_uuids[0]
for group_id, rp_uuids in provider_mapping.items()
if group_id.startswith(request.request_id)
}
def apply_requests(
self,
requests: ty.List['objects.InstancePCIRequest'],
provider_mapping: ty.Optional[ty.Dict[str, ty.List[str]]],
numa_cells: ty.Optional[ty.List['objects.InstanceNUMACell']] = None,
) -> None:
"""Apply PCI requests to the PCI stats.
@ -735,15 +831,23 @@ class PciDeviceStats(object):
:param requests: A list of InstancePCIRequest object describing the
types, quantities and required NUMA affinities of devices we want.
:type requests: nova.objects.InstancePCIRequests
:param provider_mapping: A dict keyed by RequestGroup requester_id,
to a list of resource provider UUIDs which provide resource
for that RequestGroup. If it is None then it signals that the
InstancePCIRequest objects already stores a mapping per request.
I.e.: we are called _after_ the scheduler made allocations for this
request in placement.
:param numa_cells: A list of InstanceNUMACell objects whose ``id``
corresponds to the ``id`` of host NUMACells, or None.
:raises: exception.PciDeviceRequestFailed if this compute node cannot
satisfy the given request.
"""
if not all(
self._apply_request(self.pools, r, numa_cells) for r in requests
):
raise exception.PciDeviceRequestFailed(requests=requests)
for r in requests:
rp_uuids = self._get_rp_uuids_for_request(provider_mapping, r)
if not self._apply_request(self.pools, r, rp_uuids, numa_cells):
raise exception.PciDeviceRequestFailed(requests=requests)
def __iter__(self) -> ty.Iterator[Pool]:
pools: ty.List[Pool] = []

View File

@ -97,12 +97,34 @@ class NUMATopologyFilter(filters.BaseHostFilter):
if network_metadata:
limits.network_metadata = network_metadata
instance_topology = (hardware.numa_fit_instance_to_host(
host_topology, requested_topology,
limits=limits,
pci_requests=pci_requests,
pci_stats=host_state.pci_stats))
if not instance_topology:
good_candidates = []
for candidate in host_state.allocation_candidates:
LOG.debug(
'NUMATopologyFilter tries allocation candidate: %s, %s',
candidate, requested_topology
)
instance_topology = (hardware.numa_fit_instance_to_host(
host_topology, requested_topology,
limits=limits,
pci_requests=pci_requests,
pci_stats=host_state.pci_stats,
provider_mapping=candidate['mappings'],
))
if instance_topology:
LOG.debug(
'NUMATopologyFilter accepted allocation candidate: %s',
candidate
)
good_candidates.append(candidate)
else:
LOG.debug(
'NUMATopologyFilter rejected allocation candidate: %s',
candidate
)
host_state.allocation_candidates = good_candidates
if not host_state.allocation_candidates:
LOG.debug("%(host)s, %(node)s fails NUMA topology "
"requirements. The instance does not fit on this "
"host.", {'host': host_state.host,

View File

@ -47,10 +47,40 @@ class PciPassthroughFilter(filters.BaseHostFilter):
pci_requests = spec_obj.pci_requests
if not pci_requests or not pci_requests.requests:
return True
if (not host_state.pci_stats or
not host_state.pci_stats.support_requests(pci_requests.requests)):
if not host_state.pci_stats:
LOG.debug("%(host_state)s doesn't have the required PCI devices"
" (%(requests)s)",
{'host_state': host_state, 'requests': pci_requests})
return False
good_candidates = []
for candidate in host_state.allocation_candidates:
LOG.debug(
'PciPassthroughFilter tries allocation candidate: %s',
candidate
)
if host_state.pci_stats.support_requests(
pci_requests.requests,
provider_mapping=candidate['mappings']
):
LOG.debug(
'PciPassthroughFilter accepted allocation candidate: %s',
candidate
)
good_candidates.append(candidate)
else:
LOG.debug(
'PciPassthroughFilter rejected allocation candidate: %s',
candidate
)
host_state.allocation_candidates = good_candidates
if not good_candidates:
LOG.debug("%(host_state)s doesn't have the required PCI devices"
" (%(requests)s)",
{'host_state': host_state, 'requests': pci_requests})
return False
return True

View File

@ -298,7 +298,9 @@ class HostState(object):
spec_obj.numa_topology = hardware.numa_fit_instance_to_host(
self.numa_topology, spec_obj.numa_topology,
limits=self.limits.get('numa_topology'),
pci_requests=pci_requests, pci_stats=self.pci_stats)
pci_requests=pci_requests,
pci_stats=self.pci_stats,
provider_mapping=spec_obj.get_request_group_mapping())
self.numa_topology = hardware.numa_usage_from_instance_numa(
self.numa_topology, spec_obj.numa_topology)
@ -308,7 +310,11 @@ class HostState(object):
instance_cells = None
if spec_obj.numa_topology:
instance_cells = spec_obj.numa_topology.cells
self.pci_stats.apply_requests(pci_requests, instance_cells)
self.pci_stats.apply_requests(
pci_requests,
spec_obj.get_request_group_mapping(),
instance_cells
)
# NOTE(sbauza): By considering all cases when the scheduler is called
# and when consume_from_request() is run, we can safely say that there

View File

@ -169,7 +169,8 @@ class ClaimTestCase(test.NoDBTestCase):
spec=[{'vendor_id': 'v', 'product_id': 'p'}])
requests = objects.InstancePCIRequests(requests=[request])
self._claim(requests=requests)
mock_pci_supports_requests.assert_called_once_with([request])
mock_pci_supports_requests.assert_called_once_with(
[request], provider_mapping=None)
@mock.patch('nova.pci.stats.PciDeviceStats.support_requests',
return_value=False)
@ -181,7 +182,8 @@ class ClaimTestCase(test.NoDBTestCase):
exception.ComputeResourcesUnavailable,
'Claim pci failed.',
self._claim, requests=requests)
mock_pci_supports_requests.assert_called_once_with([request])
mock_pci_supports_requests.assert_called_once_with(
[request], provider_mapping=None)
@mock.patch('nova.pci.stats.PciDeviceStats.support_requests')
def test_pci_pass_no_requests(self, mock_pci_supports_requests):

View File

@ -2430,7 +2430,8 @@ class TestInstanceClaim(BaseTestCase):
self.allocations, None)
cn = self.rt.compute_nodes[_NODENAME]
update_mock.assert_called_once_with(self.elevated, cn)
pci_stats_mock.assert_called_once_with([request])
pci_stats_mock.assert_called_once_with(
[request], provider_mapping=None)
self.assertTrue(obj_base.obj_equal_prims(expected, cn))
@mock.patch('nova.compute.utils.is_volume_backed_instance',

View File

@ -156,24 +156,27 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
def test_apply_requests(self):
self.assertEqual(len(self.pci_stats.pools), 4)
self.pci_stats.apply_requests(pci_requests)
self.pci_stats.apply_requests(pci_requests, {})
self.assertEqual(len(self.pci_stats.pools), 2)
self.assertEqual(self.pci_stats.pools[0]['vendor_id'], 'v1')
self.assertEqual(self.pci_stats.pools[0]['count'], 1)
def test_apply_requests_failed(self):
self.assertRaises(exception.PciDeviceRequestFailed,
self.assertRaises(
exception.PciDeviceRequestFailed,
self.pci_stats.apply_requests,
pci_requests_multiple)
pci_requests_multiple,
{},
)
def test_support_requests(self):
self.assertTrue(self.pci_stats.support_requests(pci_requests))
self.assertTrue(self.pci_stats.support_requests(pci_requests, {}))
self.assertEqual(len(self.pci_stats.pools), 4)
self.assertEqual([d['count'] for d in self.pci_stats], [1, 1, 1, 1])
def test_support_requests_failed(self):
self.assertFalse(
self.pci_stats.support_requests(pci_requests_multiple))
self.pci_stats.support_requests(pci_requests_multiple, {}))
self.assertEqual(len(self.pci_stats.pools), 4)
self.assertEqual([d['count'] for d in self.pci_stats], [1, 1, 1, 1])
@ -184,14 +187,18 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
objects.InstanceNUMACell(
id=1, cpuset=set(), pcpuset=set(), memory=0),
]
self.assertTrue(self.pci_stats.support_requests(pci_requests, cells))
self.assertTrue(
self.pci_stats.support_requests(pci_requests, {}, cells)
)
def test_support_requests_numa_failed(self):
cells = [
objects.InstanceNUMACell(
id=0, cpuset=set(), pcpuset=set(), memory=0),
]
self.assertFalse(self.pci_stats.support_requests(pci_requests, cells))
self.assertFalse(
self.pci_stats.support_requests(pci_requests, {}, cells)
)
def test_support_requests_no_numa_info(self):
cells = [
@ -199,12 +206,16 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
id=0, cpuset=set(), pcpuset=set(), memory=0),
]
pci_requests = self._get_fake_requests(vendor_ids=['v3'])
self.assertTrue(self.pci_stats.support_requests(pci_requests, cells))
self.assertTrue(
self.pci_stats.support_requests(pci_requests, {}, cells)
)
# 'legacy' is the default numa_policy so the result must be same
pci_requests = self._get_fake_requests(vendor_ids=['v3'],
numa_policy = fields.PCINUMAAffinityPolicy.LEGACY)
self.assertTrue(self.pci_stats.support_requests(pci_requests, cells))
self.assertTrue(
self.pci_stats.support_requests(pci_requests, {}, cells)
)
def test_support_requests_numa_pci_numa_policy_preferred(self):
# numa node 0 has 2 devices with vendor_id 'v1'
@ -218,7 +229,9 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
pci_requests = self._get_fake_requests(
numa_policy=fields.PCINUMAAffinityPolicy.PREFERRED)
self.assertTrue(self.pci_stats.support_requests(pci_requests, cells))
self.assertTrue(
self.pci_stats.support_requests(pci_requests, {}, cells)
)
def test_support_requests_no_numa_info_pci_numa_policy_required(self):
# pci device with vendor_id 'v3' has numa_node=None.
@ -230,7 +243,9 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
pci_requests = self._get_fake_requests(vendor_ids=['v3'],
numa_policy=fields.PCINUMAAffinityPolicy.REQUIRED)
self.assertFalse(self.pci_stats.support_requests(pci_requests, cells))
self.assertFalse(
self.pci_stats.support_requests(pci_requests, {}, cells)
)
def test_filter_pools_for_socket_affinity_no_socket(self):
self.pci_stats.numa_topology = objects.NUMATopology(
@ -1012,6 +1027,329 @@ class PciDeviceStatsPlacementSupportTestCase(test.NoDBTestCase):
)
class PciDeviceStatsProviderMappingTestCase(test.NoDBTestCase):
def setUp(self):
super().setUp()
# for simplicity accept any devices
device_spec = [
jsonutils.dumps(
{
"address": "*:*:*.*",
}
),
]
self.flags(device_spec=device_spec, group="pci")
dev_filter = whitelist.Whitelist(device_spec)
self.pci_stats = stats.PciDeviceStats(
objects.NUMATopology(), dev_filter=dev_filter
)
# add devices represented by different RPs in placement
# two VFs on the same PF
self.vf1 = objects.PciDevice(
compute_node_id=1,
vendor_id="dead",
product_id="beef",
address="0000:81:00.1",
parent_addr="0000:81:00.0",
numa_node=0,
dev_type="type-VF",
)
self.vf2 = objects.PciDevice(
compute_node_id=1,
vendor_id="dead",
product_id="beef",
address="0000:81:00.2",
parent_addr="0000:81:00.0",
numa_node=0,
dev_type="type-VF",
)
self.pci_stats.add_device(self.vf1)
self.vf1.extra_info = {'rp_uuid': uuids.pf1}
self.pci_stats.add_device(self.vf2)
self.vf2.extra_info = {'rp_uuid': uuids.pf1}
# two PFs pf2 and pf3 (pf1 is used for the paren of the above VFs)
self.pf2 = objects.PciDevice(
compute_node_id=1,
vendor_id="dead",
product_id="beef",
address="0000:82:00.0",
parent_addr=None,
numa_node=0,
dev_type="type-PF",
)
self.pci_stats.add_device(self.pf2)
self.pf2.extra_info = {'rp_uuid': uuids.pf2}
self.pf3 = objects.PciDevice(
compute_node_id=1,
vendor_id="dead",
product_id="beef",
address="0000:83:00.0",
parent_addr=None,
numa_node=0,
dev_type="type-PF",
)
self.pci_stats.add_device(self.pf3)
self.pf3.extra_info = {'rp_uuid': uuids.pf3}
# a PCI
self.pci1 = objects.PciDevice(
compute_node_id=1,
vendor_id="dead",
product_id="beef",
address="0000:84:00.0",
parent_addr=None,
numa_node=0,
dev_type="type-PCI",
)
self.pci_stats.add_device(self.pci1)
self.pci1.extra_info = {'rp_uuid': uuids.pci1}
# populate the RP -> pool mapping from the devices to its pools
self.pci_stats.populate_pools_metadata_from_assigned_devices()
# we have 1 pool for the two VFs then the rest has it own pool one by
# one
self.num_pools = 4
self.assertEqual(self.num_pools, len(self.pci_stats.pools))
self.num_devs = 5
self.assertEqual(
self.num_devs, sum(pool["count"] for pool in self.pci_stats.pools)
)
def test_support_request_unrestricted(self):
reqs = []
for dev_type in ["type-VF", "type-PF", "type-PCI"]:
req = objects.InstancePCIRequest(
count=1,
alias_name='a-dev',
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": dev_type,
}
],
)
reqs.append(req)
# an empty mapping means unrestricted by any provider
# we have devs for all type so each request should fit
self.assertTrue(self.pci_stats.support_requests(reqs, {}))
# the support_requests call is expected not to consume any device
self.assertEqual(self.num_pools, len(self.pci_stats.pools))
self.assertEqual(
self.num_devs, sum(pool["count"] for pool in self.pci_stats.pools)
)
# now apply the same request to consume the pools
self.pci_stats.apply_requests(reqs, {})
# we have consumed a 3 devs (a VF, a PF, and a PCI)
self.assertEqual(
self.num_devs - 3,
sum(pool["count"] for pool in self.pci_stats.pools),
)
# the empty pools are purged. We have one pool for the remaining VF
# and the remaining PF
self.assertEqual(2, len(self.pci_stats.pools))
def test_support_request_restricted_by_provider_mapping(self):
pf_req = objects.InstancePCIRequest(
count=1,
alias_name='a-dev',
request_id=uuids.req1,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-PF",
}
],
)
# simulate the placement restricted the possible RPs to pf3
self.assertTrue(
self.pci_stats.support_requests(
[pf_req], {f"{uuids.req1}-0": [uuids.pf3]}
)
)
# the support_requests call is expected not to consume any device
self.assertEqual(self.num_pools, len(self.pci_stats.pools))
self.assertEqual(
self.num_devs, sum(pool["count"] for pool in self.pci_stats.pools)
)
# now apply the request and see if the right device is consumed
self.pci_stats.apply_requests(
[pf_req], {f"{uuids.req1}-0": [uuids.pf3]}
)
self.assertEqual(self.num_pools - 1, len(self.pci_stats.pools))
self.assertEqual(
self.num_devs - 1,
sum(pool["count"] for pool in self.pci_stats.pools),
)
# pf3 is not available in the pools any more
self.assertEqual(
{uuids.pf1, uuids.pf2, uuids.pci1},
{pool['rp_uuid'] for pool in self.pci_stats.pools},
)
def test_support_request_restricted_by_provider_mapping_does_not_fit(self):
pf_req = objects.InstancePCIRequest(
count=1,
alias_name='a-dev',
request_id=uuids.req1,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-PF",
}
],
)
# Simulate that placement returned an allocation candidate with a PF
# that is not in the pools anymore, e.g. filtered out by numa cell.
# We expect the request to fail
self.assertFalse(
self.pci_stats.support_requests(
[pf_req], {f"{uuids.req1}-0": [uuids.pf4]}
)
)
self.assertRaises(
exception.PciDeviceRequestFailed,
self.pci_stats.apply_requests,
[pf_req],
{f"{uuids.req1}-0": [uuids.pf4]},
)
# and the pools are not changed
self.assertEqual(self.num_pools, len(self.pci_stats.pools))
self.assertEqual(
self.num_devs, sum(pool["count"] for pool in self.pci_stats.pools)
)
def test_support_request_neutron_port_based_request_ignore_mapping(self):
# by not having the alias_name set this becomes a neutron port based
# PCI request
pf_req = objects.InstancePCIRequest(
count=1,
request_id=uuids.req1,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-PF",
}
],
)
# Simulate that placement returned an allocation candidate with a PF
# that is not in the pools anymore, e.g. filtered out by numa cell.
# We expect that the placement selection is ignored for neutron port
# based requests so this request should fit as we have PFs in the pools
self.assertTrue(
self.pci_stats.support_requests(
[pf_req], {f"{uuids.req1}-0": [uuids.pf4]}
)
)
self.pci_stats.apply_requests(
[pf_req],
{f"{uuids.req1}-0": [uuids.pf4]},
)
# and a PF is consumed
self.assertEqual(self.num_pools - 1, len(self.pci_stats.pools))
self.assertEqual(
self.num_devs - 1,
sum(pool["count"] for pool in self.pci_stats.pools),
)
def test_support_request_req_with_count_2(self):
# now ask for two PFs in a single request
pf_req = objects.InstancePCIRequest(
count=2,
alias_name='a-dev',
request_id=uuids.req1,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-PF",
}
],
)
# Simulate that placement returned one candidate RP for both PF reqs
mapping = {
f"{uuids.req1}-0": [uuids.pf2],
f"{uuids.req1}-1": [uuids.pf3],
}
# so the request fits
self.assertTrue(self.pci_stats.support_requests([pf_req], mapping))
self.pci_stats.apply_requests([pf_req], mapping)
# and both PFs are consumed
self.assertEqual(self.num_pools - 2, len(self.pci_stats.pools))
self.assertEqual(
self.num_devs - 2,
sum(pool["count"] for pool in self.pci_stats.pools),
)
self.assertEqual(
{uuids.pf1, uuids.pci1},
{pool['rp_uuid'] for pool in self.pci_stats.pools},
)
def test_support_requests_multiple_reqs(self):
# request both a VF and a PF
vf_req = objects.InstancePCIRequest(
count=1,
alias_name='a-dev',
request_id=uuids.vf_req,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-VF",
}
],
)
pf_req = objects.InstancePCIRequest(
count=1,
alias_name='a-dev',
request_id=uuids.pf_req,
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"dev_type": "type-PF",
}
],
)
# Simulate that placement returned one candidate RP for both reqs
mapping = {
# the VF is represented by the parent PF RP
f"{uuids.vf_req}-0": [uuids.pf1],
f"{uuids.pf_req}-0": [uuids.pf3],
}
# so the request fits
self.assertTrue(
self.pci_stats.support_requests([vf_req, pf_req], mapping)
)
self.pci_stats.apply_requests([vf_req, pf_req], mapping)
# and the proper devices are consumed
# Note that the VF pool still has a device so it remains
self.assertEqual(self.num_pools - 1, len(self.pci_stats.pools))
self.assertEqual(
self.num_devs - 2,
sum(pool["count"] for pool in self.pci_stats.pools),
)
self.assertEqual(
{uuids.pf1, uuids.pf2, uuids.pci1},
{pool['rp_uuid'] for pool in self.pci_stats.pools},
)
class PciDeviceVFPFStatsTestCase(test.NoDBTestCase):
def setUp(self):

View File

@ -11,6 +11,7 @@
# under the License.
import itertools
from unittest import mock
from oslo_utils.fixture import uuidsentinel as uuids
@ -53,7 +54,9 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
{'numa_topology': fakes.NUMA_TOPOLOGY,
'pci_stats': None,
'cpu_allocation_ratio': 16.0,
'ram_allocation_ratio': 1.5})
'ram_allocation_ratio': 1.5,
'allocation_candidates': [{"mappings": {}}]
})
self.assertTrue(self.filt_cls.host_passes(host, spec_obj))
def test_numa_topology_filter_numa_instance_no_numa_host_fail(self):
@ -132,7 +135,9 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
{'numa_topology': fakes.NUMA_TOPOLOGY,
'pci_stats': None,
'cpu_allocation_ratio': 21,
'ram_allocation_ratio': 1.3})
'ram_allocation_ratio': 1.3,
'allocation_candidates': [{"mappings": {}}]
})
self.assertTrue(self.filt_cls.host_passes(host, spec_obj))
limits = host.limits['numa_topology']
self.assertEqual(limits.cpu_allocation_ratio, 21)
@ -180,7 +185,9 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
'numa_topology': numa_topology,
'pci_stats': None,
'cpu_allocation_ratio': 1,
'ram_allocation_ratio': 1.5})
'ram_allocation_ratio': 1.5,
'allocation_candidates': [{"mappings": {}}],
})
assertion = self.assertTrue if passes else self.assertFalse
# test combinations of image properties and extra specs
@ -237,7 +244,9 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
{'numa_topology': fakes.NUMA_TOPOLOGY,
'pci_stats': None,
'cpu_allocation_ratio': 16.0,
'ram_allocation_ratio': 1.5})
'ram_allocation_ratio': 1.5,
'allocation_candidates': [{"mappings": {}}]
})
self.assertTrue(self.filt_cls.host_passes(host, spec_obj))
def test_numa_topology_filter_fail_mempages(self):
@ -287,7 +296,9 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
'numa_topology': host_topology,
'pci_stats': None,
'cpu_allocation_ratio': 16.0,
'ram_allocation_ratio': 1.5})
'ram_allocation_ratio': 1.5,
'allocation_candidates': [{"mappings": {}}],
})
def test_numa_topology_filter_pass_networks(self):
host = self._get_fake_host_state_with_networks()
@ -329,3 +340,79 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
network_metadata=network_metadata)
self.assertFalse(self.filt_cls.host_passes(host, spec_obj))
@mock.patch("nova.virt.hardware.numa_fit_instance_to_host")
def test_filters_candidates(self, mock_numa_fit):
instance_topology = objects.InstanceNUMATopology(
cells=[
objects.InstanceNUMACell(
id=0, cpuset=set([1]), pcpuset=set(), memory=512
),
]
)
spec_obj = self._get_spec_obj(numa_topology=instance_topology)
host = fakes.FakeHostState(
"host1",
"node1",
{
"numa_topology": fakes.NUMA_TOPOLOGY,
"pci_stats": None,
"cpu_allocation_ratio": 16.0,
"ram_allocation_ratio": 1.5,
# simulate that placement returned 3 candidates for this host
"allocation_candidates": [
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_1"]}},
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_2"]}},
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_3"]}},
],
},
)
# and that from those candidates only the second matches the numa logic
mock_numa_fit.side_effect = [False, True, False]
# run the filter and expect that the host passes as it has at least
# one viable candidate
self.assertTrue(self.filt_cls.host_passes(host, spec_obj))
# also assert that the filter checked all three candidates
self.assertEqual(3, len(mock_numa_fit.mock_calls))
# and also it reduced the candidates in the host state to the only
# matching one
self.assertEqual(1, len(host.allocation_candidates))
self.assertEqual(
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_2"]}},
host.allocation_candidates[0],
)
@mock.patch("nova.virt.hardware.numa_fit_instance_to_host")
def test_filter_fails_if_no_matching_candidate_left(self, mock_numa_fit):
instance_topology = objects.InstanceNUMATopology(
cells=[
objects.InstanceNUMACell(
id=0, cpuset=set([1]), pcpuset=set(), memory=512
),
]
)
spec_obj = self._get_spec_obj(numa_topology=instance_topology)
host = fakes.FakeHostState(
"host1",
"node1",
{
"numa_topology": fakes.NUMA_TOPOLOGY,
"pci_stats": None,
"cpu_allocation_ratio": 16.0,
"ram_allocation_ratio": 1.5,
# simulate that placement returned 1 candidate for this host
"allocation_candidates": [
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_1"]}},
],
},
)
# simulate that the only candidate we have does not match
mock_numa_fit.side_effect = [False]
# run the filter and expect that it fails the host as there is no
# viable candidate left
self.assertFalse(self.filt_cls.host_passes(host, spec_obj))
self.assertEqual(1, len(mock_numa_fit.mock_calls))
# and also it made the candidates list empty in the host state
self.assertEqual(0, len(host.allocation_candidates))

View File

@ -12,6 +12,8 @@
from unittest import mock
from oslo_utils.fixture import uuidsentinel as uuids
from nova import objects
from nova.pci import stats
from nova.scheduler.filters import pci_passthrough_filter
@ -33,11 +35,16 @@ class TestPCIPassthroughFilter(test.NoDBTestCase):
requests = objects.InstancePCIRequests(requests=[request])
spec_obj = objects.RequestSpec(pci_requests=requests)
host = fakes.FakeHostState(
'host1', 'node1',
attribute_dict={'pci_stats': pci_stats_mock})
"host1",
"node1",
attribute_dict={
"pci_stats": pci_stats_mock,
"allocation_candidates": [{"mappings": {}}],
},
)
self.assertTrue(self.filt_cls.host_passes(host, spec_obj))
pci_stats_mock.support_requests.assert_called_once_with(
requests.requests)
requests.requests, provider_mapping={})
def test_pci_passthrough_fail(self):
pci_stats_mock = mock.MagicMock()
@ -47,11 +54,16 @@ class TestPCIPassthroughFilter(test.NoDBTestCase):
requests = objects.InstancePCIRequests(requests=[request])
spec_obj = objects.RequestSpec(pci_requests=requests)
host = fakes.FakeHostState(
'host1', 'node1',
attribute_dict={'pci_stats': pci_stats_mock})
"host1",
"node1",
attribute_dict={
"pci_stats": pci_stats_mock,
"allocation_candidates": [{"mappings": {}}],
},
)
self.assertFalse(self.filt_cls.host_passes(host, spec_obj))
pci_stats_mock.support_requests.assert_called_once_with(
requests.requests)
requests.requests, provider_mapping={})
def test_pci_passthrough_no_pci_request(self):
spec_obj = objects.RequestSpec(pci_requests=None)
@ -82,3 +94,92 @@ class TestPCIPassthroughFilter(test.NoDBTestCase):
host = fakes.FakeHostState('host1', 'node1',
attribute_dict={'pci_stats': None})
self.assertFalse(self.filt_cls.host_passes(host, spec_obj))
def test_filters_candidates(self):
pci_stats_mock = mock.MagicMock()
# simulate that only the second allocation candidate fits
pci_stats_mock.support_requests.side_effect = [False, True, False]
request = objects.InstancePCIRequest(
count=1,
spec=[{"vendor_id": "8086"}],
request_id=uuids.req1,
)
requests = objects.InstancePCIRequests(requests=[request])
spec_obj = objects.RequestSpec(pci_requests=requests)
host = fakes.FakeHostState(
"host1",
"node1",
attribute_dict={
"pci_stats": pci_stats_mock,
# simulate the placement returned 3 possible candidates
"allocation_candidates": [
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_1"]}},
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_2"]}},
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_3"]}},
],
},
)
# run the filter and expect that it passes the host as there is at
# least one viable candidate
self.assertTrue(self.filt_cls.host_passes(host, spec_obj))
# also assert that the filter checked all three candidates
pci_stats_mock.support_requests.assert_has_calls(
[
mock.call(
requests.requests,
provider_mapping={f"{uuids.req1}-0": ["candidate_rp_1"]},
),
mock.call(
requests.requests,
provider_mapping={f"{uuids.req1}-0": ["candidate_rp_2"]},
),
mock.call(
requests.requests,
provider_mapping={f"{uuids.req1}-0": ["candidate_rp_3"]},
),
]
)
# and also it reduced the candidates in the host state to the only
# matching one
self.assertEqual(1, len(host.allocation_candidates))
self.assertEqual(
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_2"]}},
host.allocation_candidates[0],
)
def test_filter_fails_if_no_matching_candidate_left(self):
pci_stats_mock = mock.MagicMock()
# simulate that the only candidate we have does not match
pci_stats_mock.support_requests.side_effect = [False]
request = objects.InstancePCIRequest(
count=1,
spec=[{"vendor_id": "8086"}],
request_id=uuids.req1,
)
requests = objects.InstancePCIRequests(requests=[request])
spec_obj = objects.RequestSpec(pci_requests=requests)
host = fakes.FakeHostState(
"host1",
"node1",
attribute_dict={
"pci_stats": pci_stats_mock,
# simulate the placement returned 3 possible candidates
"allocation_candidates": [
{"mappings": {f"{uuids.req1}-0": ["candidate_rp_1"]}},
],
},
)
# run the filter and expect that it fails the host as there is no
# viable candidate left
self.assertFalse(self.filt_cls.host_passes(host, spec_obj))
# also assert that the filter checked our candidate
pci_stats_mock.support_requests.assert_called_once_with(
requests.requests,
provider_mapping={f"{uuids.req1}-0": ["candidate_rp_1"]},
)
# and also it made the candidates list empty in the host state
self.assertEqual(0, len(host.allocation_candidates))

View File

@ -1565,7 +1565,9 @@ class HostStateTestCase(test.NoDBTestCase):
numa_fit_mock.assert_called_once_with(fake_host_numa_topology,
fake_numa_topology,
limits=None, pci_requests=None,
pci_stats=None)
pci_stats=None,
provider_mapping=None,
)
numa_usage_mock.assert_called_once_with(fake_host_numa_topology,
fake_numa_topology)
sync_mock.assert_called_once_with(("fakehost", "fakenode"))

View File

@ -2638,45 +2638,45 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
def test_get_fitting_success_no_limits(self):
fitted_instance1 = hw.numa_fit_instance_to_host(
self.host, self.instance1)
self.host, self.instance1, {})
self.assertIsInstance(fitted_instance1, objects.InstanceNUMATopology)
self.host = hw.numa_usage_from_instance_numa(
self.host, fitted_instance1)
fitted_instance2 = hw.numa_fit_instance_to_host(
self.host, self.instance3)
self.host, self.instance3, {})
self.assertIsInstance(fitted_instance2, objects.InstanceNUMATopology)
def test_get_fitting_success_limits(self):
fitted_instance = hw.numa_fit_instance_to_host(
self.host, self.instance3, self.limits)
self.host, self.instance3, {}, self.limits)
self.assertIsInstance(fitted_instance, objects.InstanceNUMATopology)
self.assertEqual(1, fitted_instance.cells[0].id)
def test_get_fitting_fails_no_limits(self):
fitted_instance = hw.numa_fit_instance_to_host(
self.host, self.instance2, self.limits)
self.host, self.instance2, {}, self.limits)
self.assertIsNone(fitted_instance)
def test_get_fitting_cumulative_fails_limits(self):
fitted_instance1 = hw.numa_fit_instance_to_host(
self.host, self.instance1, self.limits)
self.host, self.instance1, {}, self.limits)
self.assertIsInstance(fitted_instance1, objects.InstanceNUMATopology)
self.assertEqual(1, fitted_instance1.cells[0].id)
self.host = hw.numa_usage_from_instance_numa(
self.host, fitted_instance1)
fitted_instance2 = hw.numa_fit_instance_to_host(
self.host, self.instance2, self.limits)
self.host, self.instance2, {}, self.limits)
self.assertIsNone(fitted_instance2)
def test_get_fitting_cumulative_success_limits(self):
fitted_instance1 = hw.numa_fit_instance_to_host(
self.host, self.instance1, self.limits)
self.host, self.instance1, {}, self.limits)
self.assertIsInstance(fitted_instance1, objects.InstanceNUMATopology)
self.assertEqual(1, fitted_instance1.cells[0].id)
self.host = hw.numa_usage_from_instance_numa(
self.host, fitted_instance1)
fitted_instance2 = hw.numa_fit_instance_to_host(
self.host, self.instance3, self.limits)
self.host, self.instance3, {}, self.limits)
self.assertIsInstance(fitted_instance2, objects.InstanceNUMATopology)
self.assertEqual(2, fitted_instance2.cells[0].id)
@ -2691,7 +2691,7 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
network_metadata=network_metadata)
fitted_instance = hw.numa_fit_instance_to_host(
self.host, self.instance1, limits=limits)
self.host, self.instance1, {}, limits=limits)
self.assertIsInstance(fitted_instance, objects.InstanceNUMATopology)
mock_supports.assert_called_once_with(
@ -2708,7 +2708,7 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
network_metadata=network_metadata)
fitted_instance = hw.numa_fit_instance_to_host(
self.host, self.instance1, limits=limits)
self.host, self.instance1, {}, limits=limits)
self.assertIsNone(fitted_instance)
mock_supports.assert_has_calls([
@ -2725,6 +2725,7 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
'support_requests', return_value= True):
fitted_instance1 = hw.numa_fit_instance_to_host(self.host,
self.instance1,
{},
pci_requests=pci_reqs,
pci_stats=pci_stats)
self.assertIsInstance(fitted_instance1,
@ -2740,6 +2741,7 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
fitted_instance1 = hw.numa_fit_instance_to_host(
self.host,
self.instance1,
{},
pci_requests=pci_reqs,
pci_stats=pci_stats)
self.assertIsNone(fitted_instance1)
@ -2758,7 +2760,7 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
# ...therefore an instance without a PCI device should get host cell 2
instance_topology = hw.numa_fit_instance_to_host(
self.host, self.instance1, pci_stats=pci_stats)
self.host, self.instance1, {}, pci_stats=pci_stats)
self.assertIsInstance(instance_topology, objects.InstanceNUMATopology)
# TODO(sfinucan): We should be comparing this against the HOST cell
self.assertEqual(2, instance_topology.cells[0].id)
@ -2768,7 +2770,7 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
# ...therefore an instance without a PCI device should get host cell 1
instance_topology = hw.numa_fit_instance_to_host(
self.host, self.instance1, pci_stats=pci_stats)
self.host, self.instance1, {}, pci_stats=pci_stats)
self.assertIsInstance(instance_topology, objects.InstanceNUMATopology)
self.assertEqual(1, instance_topology.cells[0].id)
@ -3895,7 +3897,7 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo, {})
for cell in inst_topo.cells:
self.assertInstanceCellPinned(cell, cell_ids=(0, 1))
@ -3933,7 +3935,7 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo, {})
for cell in inst_topo.cells:
self.assertInstanceCellPinned(cell, cell_ids=(1,))
@ -3971,7 +3973,7 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo, {})
self.assertIsNone(inst_topo)
def test_host_numa_fit_instance_to_host_fit(self):
@ -4014,7 +4016,7 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo, {})
for cell in inst_topo.cells:
self.assertInstanceCellPinned(cell, cell_ids=(0, 1))
@ -4069,7 +4071,7 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo, {})
for cell in inst_topo.cells:
self.assertInstanceCellPinned(cell, cell_ids=(0, 2))
@ -4114,7 +4116,7 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
]
)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo, {})
self.assertIsNone(inst_topo)
def test_host_numa_fit_instance_to_host_fail_topology(self):
@ -4148,7 +4150,7 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
objects.InstanceNUMACell(
cpuset=set(), pcpuset=set([4, 5]), memory=1024,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)])
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo, {})
self.assertIsNone(inst_topo)
def test_cpu_pinning_usage_from_instances(self):
@ -4788,7 +4790,7 @@ class EmulatorThreadsTestCase(test.NoDBTestCase):
cpuset=set(), pcpuset=set([0]), memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)])
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo)
inst_topo = hw.numa_fit_instance_to_host(host_topo, inst_topo</