From 06389f8d849c6380a5f3763f03e84f804c4d29f2 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Wed, 20 Jul 2022 10:54:16 +0200 Subject: [PATCH] Allow enabling PCI tracking in Placement This patch introduces the [pci]report_in_placement config option that is False by default but if set to True will enable reporting of the PCI passthrough inventories to Placement. blueprint: pci-device-tracking-in-placement Change-Id: I49a3dbf4c5708d2d92dedd29a9dc3ef25b6cd66c --- doc/source/admin/pci-passthrough.rst | 8 +++++ nova/compute/pci_placement_translator.py | 11 ++----- nova/conf/pci.py | 16 +++++++++- .../libvirt/test_pci_in_placement.py | 32 ++++++++----------- .../libvirt/test_pci_sriov_servers.py | 8 +---- ...racking-in-placement-75ee1d20a57662f2.yaml | 9 ++++++ 6 files changed, 49 insertions(+), 35 deletions(-) create mode 100644 releasenotes/notes/bp-pci-device-tracking-in-placement-75ee1d20a57662f2.yaml diff --git a/doc/source/admin/pci-passthrough.rst b/doc/source/admin/pci-passthrough.rst index 3fd5f7826ee3..6017d9551564 100644 --- a/doc/source/admin/pci-passthrough.rst +++ b/doc/source/admin/pci-passthrough.rst @@ -351,6 +351,14 @@ information, refer to :oslo.config:option:`the documentation `. PCI tracking in Placement ------------------------- +.. note:: + The feature described below are optional and disabled by default in nova + 26.0.0. (Zed). The legacy PCI tracker code path is still supported and + enabled. The Placement PCI tracking can be enabled via the + :oslo.config:option:`pci.report_in_placement` configuration. But please note + that once it is enabled on a given compute host it cannot be disabled there + any more. + Since nova 26.0.0 (Zed) PCI passthrough device inventories are tracked in Placement. If a PCI device exists on the hypervisor and matches one of the device specifications configured via diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py index c7669f520392..5a1a060d7664 100644 --- a/nova/compute/pci_placement_translator.py +++ b/nova/compute/pci_placement_translator.py @@ -21,6 +21,7 @@ from oslo_log import log as logging from oslo_utils import uuidutils from nova.compute import provider_tree +import nova.conf from nova import exception from nova.i18n import _ from nova.objects import fields @@ -29,6 +30,7 @@ from nova.pci import devspec from nova.pci import manager as pci_manager +CONF = nova.conf.CONF LOG = logging.getLogger(__name__) @@ -42,14 +44,7 @@ CHILD_TYPES = ( def _is_placement_tracking_enabled() -> bool: - # This is false to act as a feature flag while we develop the feature - # step by step. It will be replaced with a config check when the feature is - # ready for production. - # - # return CONF.pci.report_in_placement - - # Test code will mock this function to enable the feature in the test env - return False + return CONF.pci.report_in_placement def _normalize_traits(traits: ty.List[str]) -> ty.List[str]: diff --git a/nova/conf/pci.py b/nova/conf/pci.py index 278fe4c0c022..673185391b2e 100644 --- a/nova/conf/pci.py +++ b/nova/conf/pci.py @@ -225,7 +225,21 @@ Possible values: device_spec = [{"product_id":"0001", "vendor_id":"8086"}, {"product_id":"0002", "vendor_id":"8086"}] -""") +"""), + cfg.BoolOpt('report_in_placement', + default=False, + help=""" +Enable PCI resource inventory reporting to Placement. If it is enabled then the +nova-compute service will report PCI resource inventories to Placement +according to the [pci]device_spec configuration and the PCI devices reported +by the hypervisor. Once it is enabled it cannot be disabled any more. In a +future release the default of this config will be change to True. + +Related options: + +* [pci]device_spec: to define which PCI devices nova are allowed to track and + assign to guests. +"""), ] diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py index c61ebda1ebab..32f6cfeca7ba 100644 --- a/nova/tests/functional/libvirt/test_pci_in_placement.py +++ b/nova/tests/functional/libvirt/test_pci_in_placement.py @@ -61,13 +61,7 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase): def setUp(self): super().setUp() - patcher = mock.patch( - "nova.compute.pci_placement_translator." - "_is_placement_tracking_enabled", - return_value=True - ) - self.addCleanup(patcher.stop) - self.mock_pci_report_in_placement = patcher.start() + self.flags(group="pci", report_in_placement=True) # These tests should not depend on the host's sysfs self.useFixture( @@ -718,7 +712,7 @@ class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests): ] ) self.flags(group='pci', device_spec=device_spec) - self.mock_pci_report_in_placement.return_value = True + self.flags(group="pci", report_in_placement=True) self.start_compute(hostname="compute1", pci_info=pci_info) self.assertPCIDeviceCounts("compute1", total=1, free=1) @@ -893,7 +887,7 @@ class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests): # Disable placement reporting so even if there are PCI devices on the # hypervisor matching the [pci]device_spec config they are not reported # to Placement - self.mock_pci_report_in_placement.return_value = False + self.flags(group="pci", report_in_placement=False) self.start_compute(hostname="compute1", pci_info=pci_info) self.assert_placement_pci_view( @@ -931,7 +925,7 @@ class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests): # Try to disable placement reporting. The compute will refuse to start # as there are already PCI device RPs in placement. - self.mock_pci_report_in_placement.return_value = False + self.flags(group="pci", report_in_placement=False) ex = self.assertRaises( exception.PlacementPciException, self.restart_compute_service, @@ -988,7 +982,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): self.flags(group='pci', device_spec=device_spec) # Start a compute *without* PCI tracking in placement - self.mock_pci_report_in_placement.return_value = False + self.flags(group="pci", report_in_placement=False) self.start_compute(hostname="compute1", pci_info=pci_info) self.assertPCIDeviceCounts("compute1", total=1, free=1) @@ -999,7 +993,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): self.assertPCIDeviceCounts("compute1", total=1, free=0) # Restart the compute but now with PCI tracking enabled - self.mock_pci_report_in_placement.return_value = True + self.flags(group="pci", report_in_placement=True) self.restart_compute_service("compute1") # Assert that the PCI allocation is healed in placement self.assertPCIDeviceCounts("compute1", total=1, free=0) @@ -1057,7 +1051,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): self.flags(group='pci', device_spec=device_spec) # Start a compute *without* PCI tracking in placement - self.mock_pci_report_in_placement.return_value = False + self.flags(group="pci", report_in_placement=False) self.start_compute(hostname="compute1", pci_info=pci_info) # 2 PCI + 1 PF + 4 VFs self.assertPCIDeviceCounts("compute1", total=7, free=7) @@ -1082,7 +1076,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): self.assertPCIDeviceCounts("compute1", total=7, free=1) # Restart the compute but now with PCI tracking enabled - self.mock_pci_report_in_placement.return_value = True + self.flags(group="pci", report_in_placement=True) self.restart_compute_service("compute1") # Assert that the PCI allocation is healed in placement self.assertPCIDeviceCounts("compute1", total=7, free=1) @@ -1157,7 +1151,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): self.flags(group='pci', device_spec=device_spec) # Start a compute with PCI tracking in placement - self.mock_pci_report_in_placement.return_value = True + self.flags(group="pci", report_in_placement=True) self.start_compute(hostname="compute1", pci_info=pci_info) # 2 PCI + 1 PF + 4 VFs self.assertPCIDeviceCounts("compute1", total=7, free=7) @@ -1234,7 +1228,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): self.flags(group='pci', device_spec=compute1_device_spec) # Start a compute with PCI tracking in placement - self.mock_pci_report_in_placement.return_value = True + self.flags(group="pci", report_in_placement=True) self.start_compute(hostname="compute1", pci_info=compute1_pci_info) self.assertPCIDeviceCounts("compute1", total=2, free=2) compute1_expected_placement_view = { @@ -1395,7 +1389,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): self.flags(group='pci', device_spec=compute1_device_spec) # Start a compute with PCI tracking in placement - self.mock_pci_report_in_placement.return_value = True + self.flags(group="pci", report_in_placement=True) self.start_compute(hostname="compute1", pci_info=compute1_pci_info) self.assertPCIDeviceCounts("compute1", total=1, free=1) compute1_expected_placement_view = { @@ -1458,7 +1452,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): self.flags(group='pci', device_spec=compute2_device_spec) # Start a compute with PCI tracking in placement - self.mock_pci_report_in_placement.return_value = True + self.flags(group="pci", report_in_placement=True) self.start_compute(hostname="compute2", pci_info=compute2_pci_info) self.assertPCIDeviceCounts("compute2", total=3, free=3) compute2_expected_placement_view = { @@ -1533,7 +1527,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): ) self.flags(group='pci', device_spec=compute1_device_spec) # Start a compute with PCI tracking in placement - self.mock_pci_report_in_placement.return_value = True + self.flags(group="pci", report_in_placement=True) self.start_compute(hostname="compute1", pci_info=compute1_pci_info) self.assertPCIDeviceCounts("compute1", total=3, free=3) compute1_expected_placement_view = { diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index 20d3014eb04b..c45621efd85c 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -1742,13 +1742,7 @@ class PCIServersTest(_PCIServersTestBase): def setUp(self): super().setUp() - patcher = mock.patch( - "nova.compute.pci_placement_translator." - "_is_placement_tracking_enabled", - return_value=True - ) - self.addCleanup(patcher.stop) - patcher.start() + self.flags(group="pci", report_in_placement=True) def test_create_server_with_pci_dev_and_numa(self): """Verifies that an instance can be booted with cpu pinning and with an diff --git a/releasenotes/notes/bp-pci-device-tracking-in-placement-75ee1d20a57662f2.yaml b/releasenotes/notes/bp-pci-device-tracking-in-placement-75ee1d20a57662f2.yaml new file mode 100644 index 000000000000..48a387f1bd08 --- /dev/null +++ b/releasenotes/notes/bp-pci-device-tracking-in-placement-75ee1d20a57662f2.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Nova started tracking PCI devices in Placement. This is an optional feature + disable by default while we are implementing inventory tracking and + scheduling support for both PCI passthrough devices and SR-IOV devices + consumed via Neutron ports. Please read our + `documentation `_ + for more details on what is supported how this feature can be enabled.