Merge "Support resource_class and traits in PCI alias"

This commit is contained in:
Zuul 2022-12-13 17:33:41 +00:00 committed by Gerrit Code Review
commit 035b1c6fbe
11 changed files with 449 additions and 48 deletions

View File

@ -442,6 +442,21 @@ removed and VFs from the same PF is configured (or vice versa) then
nova-compute will refuse to start as it would create a situation where both
the PF and its VFs are made available for consumption.
If a flavor requests multiple ``type-VF`` devices via
:nova:extra-spec:`pci_passthrough:alias` then it is important to consider the
value of :nova:extra-spec:`group_policy` as well. The value ``none``
allows nova to select VFs from the same parent PF to fulfill the request. The
value ``isolate`` restricts nova to select each VF from a different parent PF
to fulfill the request. If :nova:extra-spec:`group_policy` is not provided in
such flavor then it will defaulted to ``none``.
Symmetrically with the ``resource_class`` and ``traits`` fields of
:oslo.config:option:`pci.device_spec` the :oslo.config:option:`pci.alias`
configuration option supports requesting devices by Placement resource class
name via the ``resource_class`` field and also support requesting traits to
be present on the selected devices via the ``traits`` field in the alias. If
the ``resource_class`` field is not specified in the alias then it is defaulted
by nova to ``CUSTOM_PCI_<vendor_id>_<product_id>``.
For deeper technical details please read the `nova specification. <https://specs.openstack.org/openstack/nova-specs/specs/zed/approved/pci-device-tracking-in-placement.html>`_

View File

@ -65,20 +65,50 @@ def _normalize_traits(traits: ty.List[str]) -> ty.List[str]:
return list(standard_traits) + custom_traits
def get_traits(traits_str: str) -> ty.Set[str]:
"""Return a normalized set of placement standard and custom traits from
a string of comma separated trait names.
"""
# traits is a comma separated list of placement trait names
if not traits_str:
return set()
return set(_normalize_traits(traits_str.split(',')))
def _get_traits_for_dev(
dev_spec_tags: ty.Dict[str, str],
) -> ty.Set[str]:
# traits is a comma separated list of placement trait names
traits_str = dev_spec_tags.get("traits")
if not traits_str:
return {os_traits.COMPUTE_MANAGED_PCI_DEVICE}
traits = traits_str.split(',')
return set(_normalize_traits(traits)) | {
return get_traits(dev_spec_tags.get("traits", "")) | {
os_traits.COMPUTE_MANAGED_PCI_DEVICE
}
def _normalize_resource_class(rc: str) -> str:
rc = rc.upper()
if (
rc not in os_resource_classes.STANDARDS and
not os_resource_classes.is_custom(rc)
):
rc = os_resource_classes.normalize_name(rc)
# mypy: normalize_name will return non None for non None input
assert rc
return rc
def get_resource_class(
requested_name: ty.Optional[str], vendor_id: str, product_id: str
) -> str:
"""Return the normalized resource class name based on what is requested
or if nothing is requested then generated from the vendor_id and product_id
"""
if requested_name:
rc = _normalize_resource_class(requested_name)
else:
rc = f"CUSTOM_PCI_{vendor_id}_{product_id}".upper()
return rc
def _get_rc_for_dev(
dev: pci_device.PciDevice,
dev_spec_tags: ty.Dict[str, str],
@ -91,23 +121,8 @@ def _get_rc_for_dev(
The user specified resource class is normalized if it is not already an
acceptable standard or custom resource class.
"""
# Either use the resource class from the config or the vendor_id and
# product_id of the device to generate the RC
rc = dev_spec_tags.get("resource_class")
if rc:
rc = rc.upper()
if (
rc not in os_resource_classes.STANDARDS and
not os_resource_classes.is_custom(rc)
):
rc = os_resource_classes.normalize_name(rc)
# mypy: normalize_name will return non None for non None input
assert rc
else:
rc = f"CUSTOM_PCI_{dev.vendor_id}_{dev.product_id}".upper()
return rc
return get_resource_class(rc, dev.vendor_id, dev.product_id)
class PciResourceProvider:

View File

@ -67,6 +67,32 @@ Possible Values:
Required NUMA affinity of device. Valid values are: ``legacy``,
``preferred`` and ``required``.
``resource_class``
The optional Placement resource class name that is used
to track the requested PCI devices in Placement. It can be a standard
resource class from the ``os-resource-classes`` lib. Or can be any string.
In that case Nova will normalize it to a proper Placement resource class by
making it upper case, replacing any consecutive character outside of
``[A-Z0-9_]`` with a single '_', and prefixing the name with ``CUSTOM_`` if
not yet prefixed. The maximum allowed length is 255 character including the
prefix. If ``resource_class`` is not provided Nova will generate it from
``vendor_id`` and ``product_id`` values of the alias in the form of
``CUSTOM_PCI_{vendor_id}_{product_id}``. The ``resource_class`` requested
in the alias is matched against the ``resource_class`` defined in the
``[pci]device_spec``.
``traits``
An optional comma separated list of Placement trait names requested to be
present on the resource provider that fulfills this alias. Each trait can
be a standard trait from ``os-traits`` lib or can be any string. If it is
not a standard trait then Nova will normalize the trait name by making it
upper case, replacing any consecutive character outside of ``[A-Z0-9_]``
with a single '_', and prefixing the name with ``CUSTOM_`` if not yet
prefixed. The maximum allowed length of a trait name is 255 character
including the prefix. Every trait in ``traits`` requested in the alias
ensured to be in the list of traits provided in the ``traits`` field of
the ``[pci]device_spec`` when scheduling the request.
* Supports multiple aliases by repeating the option (not by specifying
a list value)::

View File

@ -14,12 +14,14 @@
import copy
import itertools
import typing as ty
import os_resource_classes as orc
from oslo_log import log as logging
from oslo_serialization import jsonutils
from oslo_utils import versionutils
from nova.compute import pci_placement_translator
from nova.db.api import api as api_db_api
from nova.db.api import models as api_models
from nova import exception
@ -474,14 +476,16 @@ class RequestSpec(base.NovaObject):
return filt_props
@staticmethod
def _rc_from_request(pci_request: 'objects.InstancePCIRequest') -> str:
# FIXME(gibi): refactor this and the copy of the logic from the
# translator to a common function
# FIXME(gibi): handle directly requested resource_class
# ??? can there be more than one spec???
spec = pci_request.spec[0]
rc = f"CUSTOM_PCI_{spec['vendor_id']}_{spec['product_id']}".upper()
return rc
def _rc_from_request(spec: ty.Dict[str, ty.Any]) -> str:
return pci_placement_translator.get_resource_class(
spec.get("resource_class"),
spec.get("vendor_id"),
spec.get("product_id"),
)
@staticmethod
def _traits_from_request(spec: ty.Dict[str, ty.Any]) -> ty.Set[str]:
return pci_placement_translator.get_traits(spec.get("traits", ""))
# This is here temporarily until the PCI placement scheduling is under
# implementation. When that is done there will be a config option
@ -501,6 +505,34 @@ class RequestSpec(base.NovaObject):
# cycle.
continue
if len(pci_request.spec) != 1:
# We are instantiating InstancePCIRequest objects with spec in
# two cases:
# 1) when a neutron port is translated to InstancePCIRequest
# object in
# nova.network.neutron.API.create_resource_requests
# 2) when the pci_passthrough:alias flavor extra_spec is
# translated to InstancePCIRequest objects in
# nova.pci.request._get_alias_from_config which enforces the
# json schema defined in nova.pci.request.
#
# In both cases only a single dict is added to the spec list.
# If we ever want to add support for multiple specs per request
# then we have to solve the issue that each spec can request a
# different resource class from placement. The only place in
# nova that currently handles multiple specs per request is
# nova.pci.utils.pci_device_prop_match() and it considers them
# as alternatives. So specs with different resource classes
# would mean alternative resource_class requests. This cannot
# be expressed today in the allocation_candidate query towards
# placement.
raise ValueError(
"PCI tracking in placement does not support multiple "
"specs per PCI request"
)
spec = pci_request.spec[0]
# The goal is to translate InstancePCIRequest to RequestGroup. Each
# InstancePCIRequest can be fulfilled from the whole RP tree. And
# a flavor based InstancePCIRequest might request more than one
@ -533,9 +565,13 @@ class RequestSpec(base.NovaObject):
# per requested device. So for InstancePCIRequest(count=2) we need
# to generate two separate RequestGroup(RC:1) objects.
# FIXME(gibi): make sure that if we have count=2 requests then
# group_policy=none is in the request as group_policy=isolate
# would prevent allocating two VFs from the same PF.
# NOTE(gibi): If we have count=2 requests then the multiple
# RequestGroup split below only works if group_policy is set to
# none as group_policy=isolate would prevent allocating two VFs
# from the same PF. Fortunately
# nova.scheduler.utils.resources_from_request_spec() already
# defaults group_policy to none if it is not specified in the
# flavor and there are multiple RequestGroups in the RequestSpec.
for i in range(pci_request.count):
rg = objects.RequestGroup(
@ -546,8 +582,11 @@ class RequestSpec(base.NovaObject):
# as we split count >= 2 requests to independent groups
# each group will have a resource request of one
resources={
self._rc_from_request(pci_request): 1}
# FIXME(gibi): handle traits requested from alias
self._rc_from_request(spec): 1
},
required_traits=self._traits_from_request(spec),
# TODO(gibi): later we can add support for complex trait
# queries here including forbidden_traits.
)
self.requested_resources.append(rg)

View File

@ -106,6 +106,12 @@ _ALIAS_SCHEMA = {
"type": "string",
"enum": list(obj_fields.PCINUMAAffinityPolicy.ALL),
},
"resource_class": {
"type": "string",
},
"traits": {
"type": "string",
},
},
"required": ["name"],
}
@ -114,7 +120,7 @@ _ALIAS_SCHEMA = {
def _get_alias_from_config() -> Alias:
"""Parse and validate PCI aliases from the nova config.
:returns: A dictionary where the keys are device names and the values are
:returns: A dictionary where the keys are alias names and the values are
tuples of form ``(numa_policy, specs)``. ``numa_policy`` describes the
required NUMA affinity of the device(s), while ``specs`` is a list of
PCI device specs.

View File

@ -64,6 +64,11 @@ class PciDeviceStats(object):
"""
pool_keys = ['product_id', 'vendor_id', 'numa_node', 'dev_type']
# these can be specified in the [pci]device_spec and can be requested via
# the PCI alias, but they are matched by the placement
# allocation_candidates query, so we can ignore them during pool creation
# and during filtering here
ignored_tags = ['resource_class', 'traits']
def __init__(
self,
@ -135,7 +140,9 @@ class PciDeviceStats(object):
tags = devspec.get_tags()
pool = {k: getattr(dev, k) for k in self.pool_keys}
if tags:
pool.update(tags)
pool.update(
{k: v for k, v in tags.items() if k not in self.ignored_tags}
)
# NOTE(gibi): parent_ifname acts like a tag during pci claim but
# not provided as part of the whitelist spec as it is auto detected
# by the virt driver.
@ -313,7 +320,13 @@ class PciDeviceStats(object):
:returns: A list of pools that can be used to support the request if
this is possible.
"""
request_specs = request.spec
def ignore_keys(spec):
return {
k: v for k, v in spec.items() if k not in self.ignored_tags
}
request_specs = [ignore_keys(spec) for spec in request.spec]
return [
pool for pool in pools
if utils.pci_device_prop_match(pool, request_specs)

View File

@ -1618,3 +1618,127 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
self._run_periodics()
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
def setUp(self):
super().setUp()
# TODO(gibi): replace this with setting the [scheduler]pci_prefilter
# confing to True once that config is added
self.mock_pci_in_placement_enabled = self.useFixture(
fixtures.MockPatch(
'nova.objects.request_spec.RequestSpec.'
'_pci_in_placement_enabled',
return_value=True
)
).mock
def test_boot_with_custom_rc_and_traits(self):
# The fake libvirt will emulate on the host:
# * one type-PCI in slot 0
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=1, num_pfs=0, num_vfs=0)
device_spec = self._to_device_spec_conf(
[
{
"address": "0000:81:00.0",
"resource_class": "gpu",
"traits": ",".join(
[
os_traits.HW_GPU_API_VULKAN,
"purple",
"round",
]
),
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assertPCIDeviceCounts("compute1", total=1, free=1)
compute1_expected_placement_view = {
"inventories": {
"0000:81:00.0": {"CUSTOM_GPU": 1},
},
"traits": {
"0000:81:00.0": [
"HW_GPU_API_VULKAN",
"CUSTOM_PURPLE",
"CUSTOM_ROUND",
],
},
"usages": {
"0000:81:00.0": {"CUSTOM_GPU": 0},
},
"allocations": {},
}
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
pci_alias_wrong_rc = {
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PCI_PROD_ID,
"name": "a-gpu-wrong-rc",
}
pci_alias_wrong_rc_2 = {
"resource_class": os_resource_classes.PGPU,
"name": "a-gpu-wrong-rc-2",
}
pci_alias_asking_for_missing_trait = {
"resource_class": "GPU",
# NOTE(gibi): "big" is missing from device spec
"traits": "purple,big",
"name": "a-gpu-missing-trait",
}
pci_alias_gpu = {
"resource_class": "GPU",
"traits": "HW_GPU_API_VULKAN,PURPLE",
"name": "a-gpu",
}
self.flags(
group="pci",
# FIXME(gibi): make _to_device_spec_conf a general util for both
# device spec and pci alias
alias=self._to_device_spec_conf(
[
pci_alias_wrong_rc,
pci_alias_wrong_rc_2,
pci_alias_asking_for_missing_trait,
pci_alias_gpu,
]
),
)
# try to boot with each alias that does not match
for alias in [
"a-gpu-wrong-rc",
"a-gpu-wrong-rc-2",
"a-gpu-missing-trait",
]:
extra_spec = {"pci_passthrough:alias": f"{alias}:1"}
flavor_id = self._create_flavor(extra_spec=extra_spec)
server = self._create_server(
flavor_id=flavor_id, networks=[], expected_state='ERROR')
self.assertIn('fault', server)
self.assertIn('No valid host', server['fault']['message'])
self.assertPCIDeviceCounts("compute1", total=1, free=1)
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
# then boot with the matching alias
extra_spec = {"pci_passthrough:alias": "a-gpu:1"}
flavor_id = self._create_flavor(extra_spec=extra_spec)
server = self._create_server(
flavor_id=flavor_id, networks=[])
self.assertPCIDeviceCounts("compute1", total=1, free=0)
compute1_expected_placement_view[
"usages"]["0000:81:00.0"]["CUSTOM_GPU"] = 1
compute1_expected_placement_view["allocations"][server["id"]] = {
"0000:81:00.0": {"CUSTOM_GPU": 1}
}
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
self.assert_no_pci_healing("compute1")

View File

@ -88,8 +88,8 @@ class TestTranslator(test.NoDBTestCase):
)
def test_trait_normalization(self, trait_names, expected_traits):
self.assertEqual(
expected_traits | {"COMPUTE_MANAGED_PCI_DEVICE"},
ppt._get_traits_for_dev({"traits": trait_names})
expected_traits,
ppt.get_traits(trait_names)
)
@ddt.unpack
@ -110,7 +110,9 @@ class TestTranslator(test.NoDBTestCase):
def test_resource_class_normalization(self, pci_dev, rc_name, expected_rc):
self.assertEqual(
expected_rc,
ppt._get_rc_for_dev(pci_dev, {"resource_class": rc_name})
ppt.get_resource_class(
rc_name, pci_dev.vendor_id, pci_dev.product_id
),
)
def test_dependent_device_pf_then_vf(self):
@ -118,12 +120,16 @@ class TestTranslator(test.NoDBTestCase):
"fake-node", instances_under_same_host_resize=[])
pf = pci_device.PciDevice(
address="0000:81:00.0",
dev_type=fields.PciDeviceType.SRIOV_PF
dev_type=fields.PciDeviceType.SRIOV_PF,
vendor_id="dead",
product_id="beef",
)
vf = pci_device.PciDevice(
address="0000:81:00.1",
parent_addr=pf.address,
dev_type=fields.PciDeviceType.SRIOV_VF
dev_type=fields.PciDeviceType.SRIOV_VF,
vendor_id="dead",
product_id="beef",
)
pv._add_dev(pf, {"resource_class": "foo"})
@ -146,17 +152,23 @@ class TestTranslator(test.NoDBTestCase):
"fake-node", instances_under_same_host_resize=[])
pf = pci_device.PciDevice(
address="0000:81:00.0",
dev_type=fields.PciDeviceType.SRIOV_PF
dev_type=fields.PciDeviceType.SRIOV_PF,
vendor_id="dead",
product_id="beef",
)
vf = pci_device.PciDevice(
address="0000:81:00.1",
parent_addr=pf.address,
dev_type=fields.PciDeviceType.SRIOV_VF
dev_type=fields.PciDeviceType.SRIOV_VF,
vendor_id="dead",
product_id="beef",
)
vf2 = pci_device.PciDevice(
address="0000:81:00.2",
parent_addr=pf.address,
dev_type=fields.PciDeviceType.SRIOV_VF
dev_type=fields.PciDeviceType.SRIOV_VF,
vendor_id="dead",
product_id="beef",
)
pv._add_dev(vf, {"resource_class": "foo"})
@ -182,7 +194,10 @@ class TestTranslator(test.NoDBTestCase):
pci_device.PciDevice(
address="0000:81:00.%d" % f,
parent_addr="0000:71:00.0",
dev_type=fields.PciDeviceType.SRIOV_VF)
dev_type=fields.PciDeviceType.SRIOV_VF,
vendor_id="dead",
product_id="beef",
)
for f in range(0, 4)
]

View File

@ -1244,6 +1244,62 @@ class TestInstancePCIRequestToRequestGroups(test.NoDBTestCase):
spec.requested_resources[1].obj_to_primitive(),
)
def test_with_rc_and_traits_from_the_pci_req_spec(self):
spec = request_spec.RequestSpec(
requested_resources=[],
pci_requests=objects.InstancePCIRequests(
requests=[
objects.InstancePCIRequest(
count=1,
request_id=uuids.req1,
spec=[
{
"vendor_id": "de12",
"product_id": "1234",
"resource_class": "gpu",
}
],
alias_name="a-dev",
),
objects.InstancePCIRequest(
count=1,
request_id=uuids.req2,
spec=[
{
"vendor_id": "fff",
"product_id": "dead",
"traits": "foo,bar,CUSTOM_BLUE",
}
],
alias_name="a-dev",
),
]
),
)
spec._generate_request_groups_from_pci_requests()
self.assertEqual(2, len(spec.requested_resources))
self.assertEqual(
request_spec.RequestGroup(
requester_id=f"{uuids.req1}-0",
resources={"CUSTOM_GPU": 1},
use_same_provider=True,
).obj_to_primitive(),
spec.requested_resources[0].obj_to_primitive(),
)
# Note that sets would be serialized to tuples by obj_to_primitive in
# random order, so we need to match this spec field by field
expected = request_spec.RequestGroup(
requester_id=f"{uuids.req2}-0",
resources={"CUSTOM_PCI_FFF_DEAD": 1},
required_traits={"CUSTOM_FOO", "CUSTOM_BAR", "CUSTOM_BLUE"},
use_same_provider=True,
)
actual = spec.requested_resources[1]
for field in request_spec.RequestGroup.fields.keys():
self.assertEqual(getattr(expected, field), getattr(actual, field))
class TestRequestGroupObject(test.NoDBTestCase):
def setUp(self):

View File

@ -187,6 +187,21 @@ class PciRequestTestCase(test.NoDBTestCase):
self.assertIn("xxx", aliases)
self.assertEqual(policy, aliases["xxx"][0])
def test_get_alias_from_config_valid_rc_and_traits(self):
fake_alias = jsonutils.dumps({
"name": "xxx",
"resource_class": "foo",
"traits": "bar,baz",
})
self.flags(alias=[fake_alias], group='pci')
aliases = request._get_alias_from_config()
self.assertIsNotNone(aliases)
self.assertIn("xxx", aliases)
self.assertEqual(
("legacy", [{"resource_class": "foo", "traits": "bar,baz"}]),
aliases["xxx"],
)
def test_get_alias_from_config_conflicting_device_type(self):
"""Check behavior when device_type conflicts occur."""
fake_alias_a = jsonutils.dumps({

View File

@ -732,6 +732,83 @@ class PciDeviceStatsWithTagsTestCase(test.NoDBTestCase):
self.pci_stats.pools[5]['devices'][0])
class PciDeviceStatsPlacementSupportTestCase(test.NoDBTestCase):
def test_device_spec_rc_and_traits_ignored_during_pooling(self):
"""Assert that resource_class and traits from the device spec are not
used as discriminator for pool creation.
"""
device_spec = [
jsonutils.dumps(
{
"resource_class": "foo",
"address": "*:81:00.*",
"traits": "gold",
}
),
jsonutils.dumps(
{
"resource_class": "baar",
"address": "*:81:01.*",
"traits": "silver",
}
),
]
self.flags(device_spec=device_spec, group="pci")
dev_filter = whitelist.Whitelist(device_spec)
pci_stats = stats.PciDeviceStats(
objects.NUMATopology(),
dev_filter=dev_filter)
pci_dev1 = objects.PciDevice(
vendor_id="dead",
product_id="beef",
address="0000:81:00.0",
parent_addr=None,
numa_node=0,
dev_type="type-PF",
)
pci_dev2 = objects.PciDevice(
vendor_id="dead",
product_id="beef",
address="0000:81:01.0",
parent_addr=None,
numa_node=0,
dev_type="type-PF",
)
# the two device matched by different device_specs with different
# resource_class and traits fields
pci_stats.add_device(pci_dev1)
pci_stats.add_device(pci_dev2)
# but they are put in the same pool as all the other fields are
# matching
self.assertEqual(1, len(pci_stats.pools))
self.assertEqual(2, pci_stats.pools[0]["count"])
def test_filter_pools_for_spec_ignores_rc_and_traits_in_spec(self):
"""Assert that resource_class and traits are ignored in the pci
request spec during matching the request to pools.
"""
pci_stats = stats.PciDeviceStats(objects.NUMATopology())
pools = [{"vendor_id": "dead", "product_id": "beef"}]
matching_pools = pci_stats._filter_pools_for_spec(
pools=pools,
request=objects.InstancePCIRequest(
spec=[
{
"vendor_id": "dead",
"product_id": "beef",
"resource_class": "foo",
"traits": "blue",
}
]
),
)
self.assertEqual(pools, matching_pools)
class PciDeviceVFPFStatsTestCase(test.NoDBTestCase):
def setUp(self):