Allow enabling PCI tracking in Placement

This patch introduces the [pci]report_in_placement config option that is
False by default but if set to True will enable reporting of the PCI
passthrough inventories to Placement.

blueprint: pci-device-tracking-in-placement
Change-Id: I49a3dbf4c5708d2d92dedd29a9dc3ef25b6cd66c
This commit is contained in:
Balazs Gibizer 2022-07-20 10:54:16 +02:00
parent 9268bc36a3
commit 06389f8d84
6 changed files with 49 additions and 35 deletions

View File

@ -351,6 +351,14 @@ information, refer to :oslo.config:option:`the documentation <pci.alias>`.
PCI tracking in Placement
-------------------------
.. note::
The feature described below are optional and disabled by default in nova
26.0.0. (Zed). The legacy PCI tracker code path is still supported and
enabled. The Placement PCI tracking can be enabled via the
:oslo.config:option:`pci.report_in_placement` configuration. But please note
that once it is enabled on a given compute host it cannot be disabled there
any more.
Since nova 26.0.0 (Zed) PCI passthrough device inventories are tracked in
Placement. If a PCI device exists on the hypervisor and
matches one of the device specifications configured via

View File

@ -21,6 +21,7 @@ from oslo_log import log as logging
from oslo_utils import uuidutils
from nova.compute import provider_tree
import nova.conf
from nova import exception
from nova.i18n import _
from nova.objects import fields
@ -29,6 +30,7 @@ from nova.pci import devspec
from nova.pci import manager as pci_manager
CONF = nova.conf.CONF
LOG = logging.getLogger(__name__)
@ -42,14 +44,7 @@ CHILD_TYPES = (
def _is_placement_tracking_enabled() -> bool:
# This is false to act as a feature flag while we develop the feature
# step by step. It will be replaced with a config check when the feature is
# ready for production.
#
# return CONF.pci.report_in_placement
# Test code will mock this function to enable the feature in the test env
return False
return CONF.pci.report_in_placement
def _normalize_traits(traits: ty.List[str]) -> ty.List[str]:

View File

@ -225,7 +225,21 @@ Possible values:
device_spec = [{"product_id":"0001", "vendor_id":"8086"},
{"product_id":"0002", "vendor_id":"8086"}]
""")
"""),
cfg.BoolOpt('report_in_placement',
default=False,
help="""
Enable PCI resource inventory reporting to Placement. If it is enabled then the
nova-compute service will report PCI resource inventories to Placement
according to the [pci]device_spec configuration and the PCI devices reported
by the hypervisor. Once it is enabled it cannot be disabled any more. In a
future release the default of this config will be change to True.
Related options:
* [pci]device_spec: to define which PCI devices nova are allowed to track and
assign to guests.
"""),
]

View File

@ -61,13 +61,7 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
def setUp(self):
super().setUp()
patcher = mock.patch(
"nova.compute.pci_placement_translator."
"_is_placement_tracking_enabled",
return_value=True
)
self.addCleanup(patcher.stop)
self.mock_pci_report_in_placement = patcher.start()
self.flags(group="pci", report_in_placement=True)
# These tests should not depend on the host's sysfs
self.useFixture(
@ -718,7 +712,7 @@ class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests):
]
)
self.flags(group='pci', device_spec=device_spec)
self.mock_pci_report_in_placement.return_value = True
self.flags(group="pci", report_in_placement=True)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assertPCIDeviceCounts("compute1", total=1, free=1)
@ -893,7 +887,7 @@ class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests):
# Disable placement reporting so even if there are PCI devices on the
# hypervisor matching the [pci]device_spec config they are not reported
# to Placement
self.mock_pci_report_in_placement.return_value = False
self.flags(group="pci", report_in_placement=False)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assert_placement_pci_view(
@ -931,7 +925,7 @@ class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests):
# Try to disable placement reporting. The compute will refuse to start
# as there are already PCI device RPs in placement.
self.mock_pci_report_in_placement.return_value = False
self.flags(group="pci", report_in_placement=False)
ex = self.assertRaises(
exception.PlacementPciException,
self.restart_compute_service,
@ -988,7 +982,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
self.flags(group='pci', device_spec=device_spec)
# Start a compute *without* PCI tracking in placement
self.mock_pci_report_in_placement.return_value = False
self.flags(group="pci", report_in_placement=False)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assertPCIDeviceCounts("compute1", total=1, free=1)
@ -999,7 +993,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
self.assertPCIDeviceCounts("compute1", total=1, free=0)
# Restart the compute but now with PCI tracking enabled
self.mock_pci_report_in_placement.return_value = True
self.flags(group="pci", report_in_placement=True)
self.restart_compute_service("compute1")
# Assert that the PCI allocation is healed in placement
self.assertPCIDeviceCounts("compute1", total=1, free=0)
@ -1057,7 +1051,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
self.flags(group='pci', device_spec=device_spec)
# Start a compute *without* PCI tracking in placement
self.mock_pci_report_in_placement.return_value = False
self.flags(group="pci", report_in_placement=False)
self.start_compute(hostname="compute1", pci_info=pci_info)
# 2 PCI + 1 PF + 4 VFs
self.assertPCIDeviceCounts("compute1", total=7, free=7)
@ -1082,7 +1076,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
self.assertPCIDeviceCounts("compute1", total=7, free=1)
# Restart the compute but now with PCI tracking enabled
self.mock_pci_report_in_placement.return_value = True
self.flags(group="pci", report_in_placement=True)
self.restart_compute_service("compute1")
# Assert that the PCI allocation is healed in placement
self.assertPCIDeviceCounts("compute1", total=7, free=1)
@ -1157,7 +1151,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
self.flags(group='pci', device_spec=device_spec)
# Start a compute with PCI tracking in placement
self.mock_pci_report_in_placement.return_value = True
self.flags(group="pci", report_in_placement=True)
self.start_compute(hostname="compute1", pci_info=pci_info)
# 2 PCI + 1 PF + 4 VFs
self.assertPCIDeviceCounts("compute1", total=7, free=7)
@ -1234,7 +1228,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
self.flags(group='pci', device_spec=compute1_device_spec)
# Start a compute with PCI tracking in placement
self.mock_pci_report_in_placement.return_value = True
self.flags(group="pci", report_in_placement=True)
self.start_compute(hostname="compute1", pci_info=compute1_pci_info)
self.assertPCIDeviceCounts("compute1", total=2, free=2)
compute1_expected_placement_view = {
@ -1395,7 +1389,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
self.flags(group='pci', device_spec=compute1_device_spec)
# Start a compute with PCI tracking in placement
self.mock_pci_report_in_placement.return_value = True
self.flags(group="pci", report_in_placement=True)
self.start_compute(hostname="compute1", pci_info=compute1_pci_info)
self.assertPCIDeviceCounts("compute1", total=1, free=1)
compute1_expected_placement_view = {
@ -1458,7 +1452,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
self.flags(group='pci', device_spec=compute2_device_spec)
# Start a compute with PCI tracking in placement
self.mock_pci_report_in_placement.return_value = True
self.flags(group="pci", report_in_placement=True)
self.start_compute(hostname="compute2", pci_info=compute2_pci_info)
self.assertPCIDeviceCounts("compute2", total=3, free=3)
compute2_expected_placement_view = {
@ -1533,7 +1527,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
)
self.flags(group='pci', device_spec=compute1_device_spec)
# Start a compute with PCI tracking in placement
self.mock_pci_report_in_placement.return_value = True
self.flags(group="pci", report_in_placement=True)
self.start_compute(hostname="compute1", pci_info=compute1_pci_info)
self.assertPCIDeviceCounts("compute1", total=3, free=3)
compute1_expected_placement_view = {

View File

@ -1742,13 +1742,7 @@ class PCIServersTest(_PCIServersTestBase):
def setUp(self):
super().setUp()
patcher = mock.patch(
"nova.compute.pci_placement_translator."
"_is_placement_tracking_enabled",
return_value=True
)
self.addCleanup(patcher.stop)
patcher.start()
self.flags(group="pci", report_in_placement=True)
def test_create_server_with_pci_dev_and_numa(self):
"""Verifies that an instance can be booted with cpu pinning and with an

View File

@ -0,0 +1,9 @@
---
features:
- |
Nova started tracking PCI devices in Placement. This is an optional feature
disable by default while we are implementing inventory tracking and
scheduling support for both PCI passthrough devices and SR-IOV devices
consumed via Neutron ports. Please read our
`documentation <https://docs.openstack.org/nova/latest/admin/pci-passthrough.html#pci-tracking-in-placement>`_
for more details on what is supported how this feature can be enabled.