nova/nova/compute/pci_placement_translator.py

360 lines
13 KiB
Python

# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import typing as ty
import os_resource_classes
import os_traits
from oslo_log import log as logging
from nova.compute import provider_tree
from nova import exception
from nova.i18n import _
from nova.objects import fields
from nova.objects import pci_device
from nova.pci import devspec
from nova.pci import manager as pci_manager
LOG = logging.getLogger(__name__)
# Devs with this type are in one to one mapping with an RP in placement
PARENT_TYPES = (
fields.PciDeviceType.STANDARD, fields.PciDeviceType.SRIOV_PF)
# Devs with these type need to have a parent and that parent is the one
# that mapped to a placement RP
CHILD_TYPES = (
fields.PciDeviceType.SRIOV_VF, fields.PciDeviceType.VDPA)
def _is_placement_tracking_enabled() -> bool:
# This is false to act as a feature flag while we develop the feature
# step by step. It will be replaced with a config check when the feature is
# ready for production.
#
# return CONF.pci.report_in_placement
# Test code will mock this function to enable the feature in the test env
return False
def _normalize_traits(traits: ty.List[str]) -> ty.List[str]:
"""Make the trait names acceptable for placement.
It keeps the already valid standard or custom traits but normalizes trait
names that are not already normalized.
"""
standard_traits, rest = os_traits.check_traits(traits)
custom_traits = []
for name in rest:
name = name.upper()
if os_traits.is_custom(name):
custom_traits.append(name)
else:
custom_traits.append(os_traits.normalize_name(name))
return list(standard_traits) + custom_traits
def _get_traits_for_dev(
dev_spec_tags: ty.Dict[str, str],
) -> ty.Set[str]:
# traits is a comma separated list of placement trait names
traits_str = dev_spec_tags.get("traits")
if not traits_str:
return set()
traits = traits_str.split(',')
return set(_normalize_traits(traits))
def _get_rc_for_dev(
dev: pci_device.PciDevice,
dev_spec_tags: ty.Dict[str, str],
) -> str:
"""Return the resource class to represent the device.
It is either provided by the user in the configuration as the
resource_class tag, or we are generating one from vendor_id and product_id.
The user specified resource class is normalized if it is not already an
acceptable standard or custom resource class.
"""
# Either use the resource class from the config or the vendor_id and
# product_id of the device to generate the RC
rc = dev_spec_tags.get("resource_class")
if rc:
rc = rc.upper()
if (
rc not in os_resource_classes.STANDARDS and
not os_resource_classes.is_custom(rc)
):
rc = os_resource_classes.normalize_name(rc)
# mypy: normalize_name will return non None for non None input
assert rc
else:
rc = f"CUSTOM_PCI_{dev.vendor_id}_{dev.product_id}".upper()
return rc
class PciResourceProvider:
"""A PCI Resource Provider"""
def __init__(self, name: str) -> None:
self.name = name
self.parent_dev = None
self.children_devs: ty.List[pci_device.PciDevice] = []
self.resource_class: ty.Optional[str] = None
self.traits: ty.Optional[ty.Set[str]] = None
@property
def devs(self) -> ty.List[pci_device.PciDevice]:
return [self.parent_dev] if self.parent_dev else self.children_devs
def add_child(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None:
if self.parent_dev:
raise exception.PlacementPciDependentDeviceException(
parent_dev=dev.address,
children_devs=",".join(dev.address for dev in self.devs)
)
rc = _get_rc_for_dev(dev, dev_spec_tags)
if self.resource_class and rc != self.resource_class:
raise exception.PlacementPciMixedResourceClassException(
new_rc=rc,
new_dev=dev.address,
current_rc=self.resource_class,
current_devs=",".join(
dev.address for dev in self.children_devs)
)
traits = _get_traits_for_dev(dev_spec_tags)
if self.traits is not None and self.traits != traits:
raise exception.PlacementPciMixedTraitsException(
new_traits=",".join(sorted(traits)),
new_dev=dev.address,
current_traits=",".join(sorted(self.traits)),
current_devs=",".join(
dev.address for dev in self.children_devs),
)
self.children_devs.append(dev)
self.resource_class = rc
self.traits = traits
def add_parent(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None:
if self.parent_dev or self.children_devs:
raise exception.PlacementPciDependentDeviceException(
parent_dev=dev.address,
children_devs=",".join(dev.address for dev in self.devs)
)
self.parent_dev = dev
self.resource_class = _get_rc_for_dev(dev, dev_spec_tags)
self.traits = _get_traits_for_dev(dev_spec_tags)
def update_provider_tree(
self, provider_tree: provider_tree.ProviderTree
) -> None:
provider_tree.update_inventory(
self.name,
# NOTE(gibi): The rest of the inventory fields (reserved,
# allocation_ratio, etc.) are defaulted by placement and the
# default value make sense for PCI devices, i.e. no overallocation
# and PCI can be allocated one by one.
# Also, this way if the operator sets reserved value in placement
# for the PCI inventories directly then nova will not override that
# value periodically.
{
self.resource_class: {
"total": len(self.devs),
"max_unit": len(self.devs),
}
},
)
provider_tree.update_traits(self.name, self.traits)
def __str__(self) -> str:
return (
f"RP({self.name}, {self.resource_class}={len(self.devs)}, "
f"traits={','.join(self.traits or set())})"
)
class PlacementView:
"""The PCI Placement view"""
def __init__(self, hypervisor_hostname: str) -> None:
self.rps: ty.Dict[str, PciResourceProvider] = {}
self.root_rp_name = hypervisor_hostname
def _get_rp_name_for_address(self, addr: str) -> str:
return f"{self.root_rp_name}_{addr.upper()}"
def _ensure_rp(self, rp_name: str) -> PciResourceProvider:
return self.rps.setdefault(rp_name, PciResourceProvider(rp_name))
def _add_child(
self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str]
) -> None:
if not dev.parent_addr:
msg = _(
"Missing parent address for PCI device s(dev)% with "
"type s(type)s"
) % {
"dev": dev.address,
"type": dev.dev_type,
}
raise exception.PlacementPciException(error=msg)
rp_name = self._get_rp_name_for_address(dev.parent_addr)
self._ensure_rp(rp_name).add_child(dev, dev_spec_tags)
def _add_parent(
self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str]
) -> None:
rp_name = self._get_rp_name_for_address(dev.address)
self._ensure_rp(rp_name).add_parent(dev, dev_spec_tags)
def add_dev(
self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str]
) -> None:
if dev_spec_tags.get("physical_network"):
# NOTE(gibi): We ignore devices that has physnet configured as
# those are there for Neutron based SRIOV and that is out of scope
# for now. Later these devices will be tracked as PCI_NETDEV
# devices in placement.
return
if dev.dev_type in PARENT_TYPES:
self._add_parent(dev, dev_spec_tags)
elif dev.dev_type in CHILD_TYPES:
self._add_child(dev, dev_spec_tags)
else:
msg = _(
"Unhandled PCI device type %(type)s for %(dev)s. Please "
"report a bug."
) % {
"type": dev.dev_type,
"dev": dev.address,
}
raise exception.PlacementPciException(error=msg)
if 'instance_uuid' in dev and dev.instance_uuid:
# The device is allocated to an instance, so we need to make sure
# the device will be allocated to the instance in placement too
# FIXME(gibi): During migration the source host allocation should
# be tight to the migration_uuid as consumer in placement. But
# the PciDevice.instance_uuid is still pointing to the
# instance_uuid both on the source and the dest. So we need to
# check for running migrations.
pass
def __str__(self) -> str:
return (
f"Placement PCI view on {self.root_rp_name}: "
f"{', '.join(str(rp) for rp in self.rps.values())}"
)
def update_provider_tree(
self, provider_tree: provider_tree.ProviderTree
) -> None:
for rp_name, rp in self.rps.items():
if not provider_tree.exists(rp_name):
provider_tree.new_child(rp_name, self.root_rp_name)
rp.update_provider_tree(provider_tree)
def ensure_no_dev_spec_with_devname(dev_specs: ty.List[devspec.PciDeviceSpec]):
for dev_spec in dev_specs:
if dev_spec.dev_spec_conf.get("devname"):
msg = _(
"Invalid [pci]device_spec configuration. PCI Placement "
"reporting does not support 'devname' based device "
"specification but we got %(dev_spec)s. "
"Please use PCI address in the configuration instead."
) % {"dev_spec": dev_spec.dev_spec_conf}
raise exception.PlacementPciException(error=msg)
def update_provider_tree_for_pci(
provider_tree: provider_tree.ProviderTree,
nodename: str,
pci_tracker: pci_manager.PciDevTracker,
allocations: dict,
) -> bool:
"""Based on the PciDevice objects in the pci_tracker it calculates what
inventories and allocations needs to exist in placement and create the
missing peaces.
It returns True if not just the provider_tree but also allocations needed
to be changed.
:param allocations:
Dict of allocation data of the form:
{ $CONSUMER_UUID: {
# The shape of each "allocations" dict below is identical
# to the return from GET /allocations/{consumer_uuid}
"allocations": {
$RP_UUID: {
"generation": $RP_GEN,
"resources": {
$RESOURCE_CLASS: $AMOUNT,
...
},
},
...
},
"project_id": $PROJ_ID,
"user_id": $USER_ID,
"consumer_generation": $CONSUMER_GEN,
},
...
}
"""
if not _is_placement_tracking_enabled():
# If tracking is not enabled we just return without touching anything
return False
ensure_no_dev_spec_with_devname(pci_tracker.dev_filter.specs)
LOG.debug(
'Collecting PCI inventories and allocations to track them in Placement'
)
pv = PlacementView(nodename)
for dev in pci_tracker.pci_devs:
# match the PCI device with the [pci]dev_spec config to access
# the configuration metadata tags
dev_spec = pci_tracker.dev_filter.get_devspec(dev)
if not dev_spec:
LOG.warning(
"Device spec is not found for device %s in [pci]device_spec. "
"Ignoring device in Placement resource view. "
"This should not happen. Please file a bug.", dev.address)
continue
pv.add_dev(dev, dev_spec.get_tags())
LOG.info("Placement PCI resource view: %s", pv)
pv.update_provider_tree(provider_tree)
# FIXME(gibi): Check allocations too based on pci_dev.instance_uuid and
# if here was any update then we have to return True to trigger a reshape.
return False