From 2cb1eedeafbfe0d99f5a7dcdf9c9ad02e735c6df Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Sun, 28 Aug 2022 19:20:05 +0200 Subject: [PATCH] Allow enabling PCI scheduling in Placement A new configuration option [filter_scheduler]pci_in_placement is added that allows enabling the scheduler logic for PCI device handling in Placement for flavor based PCI requests. blueprint: pci-device-tracking-in-placement Change-Id: I5ddf6d3cdc7e05cc4914b9b1e762fa02a5c7c550 --- doc/source/admin/pci-passthrough.rst | 22 ++++++++++++++++++ nova/compute/pci_placement_translator.py | 2 +- nova/conf/pci.py | 6 +++-- nova/conf/scheduler.py | 21 ++++++++++++++++- nova/objects/request_spec.py | 12 +++------- nova/pci/stats.py | 12 +++++----- .../libvirt/test_pci_in_placement.py | 23 +++++++------------ .../libvirt/test_pci_sriov_servers.py | 20 ++-------------- nova/tests/unit/objects/test_request_spec.py | 20 +++------------- ...n-placement-antelope-082310a2b0337e0e.yaml | 8 +++++++ 10 files changed, 77 insertions(+), 69 deletions(-) create mode 100644 releasenotes/notes/bp-pci-device-tracking-in-placement-antelope-082310a2b0337e0e.yaml diff --git a/doc/source/admin/pci-passthrough.rst b/doc/source/admin/pci-passthrough.rst index 46d16042147d..09a963603daa 100644 --- a/doc/source/admin/pci-passthrough.rst +++ b/doc/source/admin/pci-passthrough.rst @@ -65,6 +65,10 @@ capabilities. :oslo.config:option:`pci.device_spec` configuration that uses the ``devname`` field. +.. versionchanged:: 27.0.0 (2023.1 Antelope): + Nova provides Placement based scheduling support for servers with flavor + based PCI requests. This support is disable by default. + Enabling PCI passthrough ------------------------ @@ -442,6 +446,24 @@ removed and VFs from the same PF is configured (or vice versa) then nova-compute will refuse to start as it would create a situation where both the PF and its VFs are made available for consumption. +Since nova 27.0.0 (2023.1 Antelope) scheduling and allocation of PCI devices +in Placement can also be enabled via +:oslo.config:option:`filter_scheduler.pci_in_placement`. Please note that this +should only be enabled after all the computes in the system is configured to +report PCI inventory in Placement via +enabling :oslo.config:option:`pci.report_in_placement`. In Antelope flavor +based PCI requests are support but Neutron port base PCI requests are not +handled in Placement. + +If you are upgrading from an earlier version with already existing servers with +PCI usage then you must enable :oslo.config:option:`pci.report_in_placement` +first on all your computes having PCI allocations and then restart the +nova-compute service, before you enable +:oslo.config:option:`filter_scheduler.pci_in_placement`. The compute service +will heal the missing PCI allocation in placement during startup and will +continue healing missing allocations for future servers until the scheduling +support is enabled. + If a flavor requests multiple ``type-VF`` devices via :nova:extra-spec:`pci_passthrough:alias` then it is important to consider the value of :nova:extra-spec:`group_policy` as well. The value ``none`` diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py index 3ee52e303cae..016efd9122cc 100644 --- a/nova/compute/pci_placement_translator.py +++ b/nova/compute/pci_placement_translator.py @@ -614,7 +614,7 @@ def update_provider_tree_for_pci( if updated: LOG.debug( "Placement PCI view needs allocation healing. This should only " - "happen if [scheduler]pci_in_placement is still disabled. " + "happen if [filter_scheduler]pci_in_placement is still disabled. " "Original allocations: %s New allocations: %s", old_alloc, allocations, diff --git a/nova/conf/pci.py b/nova/conf/pci.py index 468ae9a3bd74..4a2f1aef32cb 100644 --- a/nova/conf/pci.py +++ b/nova/conf/pci.py @@ -79,7 +79,8 @@ Possible Values: ``vendor_id`` and ``product_id`` values of the alias in the form of ``CUSTOM_PCI_{vendor_id}_{product_id}``. The ``resource_class`` requested in the alias is matched against the ``resource_class`` defined in the - ``[pci]device_spec``. + ``[pci]device_spec``. This field can only be used only if + ``[filter_scheduler]pci_in_placement`` is enabled. ``traits`` An optional comma separated list of Placement trait names requested to be @@ -91,7 +92,8 @@ Possible Values: prefixed. The maximum allowed length of a trait name is 255 character including the prefix. Every trait in ``traits`` requested in the alias ensured to be in the list of traits provided in the ``traits`` field of - the ``[pci]device_spec`` when scheduling the request. + the ``[pci]device_spec`` when scheduling the request. This field can only + be used only if ``[filter_scheduler]pci_in_placement`` is enabled. * Supports multiple aliases by repeating the option (not by specifying a list value):: diff --git a/nova/conf/scheduler.py b/nova/conf/scheduler.py index 03e78fe70172..c75bd07c5ba0 100644 --- a/nova/conf/scheduler.py +++ b/nova/conf/scheduler.py @@ -745,7 +745,26 @@ Possible values: Related options: * ``[filter_scheduler] aggregate_image_properties_isolation_namespace`` -""")] +"""), + cfg.BoolOpt( + "pci_in_placement", + default=False, + help=""" +Enable scheduling and claiming PCI devices in Placement. + +This can be enabled after ``[pci]report_in_placement`` is enabled on all +compute hosts. + +When enabled the scheduler queries Placement about the PCI device +availability to select destination for a server with PCI request. The scheduler +also allocates the selected PCI devices in Placement. Note that this logic +does not replace the PCIPassthroughFilter but extends it. + +* ``[pci] report_in_placement`` +* ``[pci] alias`` +* ``[pci] device_spec`` +"""), +] metrics_group = cfg.OptGroup( name="metrics", diff --git a/nova/objects/request_spec.py b/nova/objects/request_spec.py index 6443cff8ca4f..a4ca77edf67f 100644 --- a/nova/objects/request_spec.py +++ b/nova/objects/request_spec.py @@ -22,6 +22,7 @@ from oslo_serialization import jsonutils from oslo_utils import versionutils from nova.compute import pci_placement_translator +import nova.conf from nova.db.api import api as api_db_api from nova.db.api import models as api_models from nova import exception @@ -30,6 +31,7 @@ from nova.objects import base from nova.objects import fields from nova.objects import instance as obj_instance +CONF = nova.conf.CONF LOG = logging.getLogger(__name__) REQUEST_SPEC_OPTIONAL_ATTRS = ['requested_destination', @@ -487,16 +489,8 @@ class RequestSpec(base.NovaObject): def _traits_from_request(spec: ty.Dict[str, ty.Any]) -> ty.Set[str]: return pci_placement_translator.get_traits(spec.get("traits", "")) - # This is here temporarily until the PCI placement scheduling is under - # implementation. When that is done there will be a config option - # [scheduler]pci_in_placement to configure this. Now we add this as a - # function to allow tests to selectively enable the WIP feature - @staticmethod - def _pci_in_placement_enabled(): - return False - def generate_request_groups_from_pci_requests(self): - if not self._pci_in_placement_enabled(): + if not CONF.filter_scheduler.pci_in_placement: return False for pci_request in self.pci_requests.requests: diff --git a/nova/pci/stats.py b/nova/pci/stats.py index fab91c055ace..ac97501a0746 100644 --- a/nova/pci/stats.py +++ b/nova/pci/stats.py @@ -552,7 +552,7 @@ class PciDeviceStats(object): # by it. This could happen if the instance only has neutron port # based InstancePCIRequest as that is currently not having # placement allocation (except for QoS ports, but that handled in a - # separate codepath) or if the [scheduler]pci_in_placement + # separate codepath) or if the [filter_scheduler]pci_in_placement # configuration option is not enabled in the scheduler. return pools @@ -563,15 +563,15 @@ class PciDeviceStats(object): # NOTE(gibi): There can be pools without rp_uuid field if the # [pci]report_in_placement is not enabled for a compute with # viable PCI devices. We have a non-empty rp_uuids, so we know - # that the [scheduler]pci_in_placement is enabled. This is a - # configuration error. + # that the [filter_scheduler]pci_in_placement is enabled. This + # is a configuration error. LOG.warning( "The PCI pool %s isn't mapped to an RP UUID but the " "scheduler is configured to create PCI allocations in " "placement. This should not happen. Please enable " "[pci]report_in_placement on all compute hosts before " - "enabling [scheduler]pci_in_placement in the scheduler. " - "This pool is ignored now.", pool) + "enabling [filter_scheduler]pci_in_placement in the " + "scheduler. This pool is ignored now.", pool) continue if rp_uuid in rp_uuids: @@ -809,7 +809,7 @@ class PciDeviceStats(object): # but the object is hard to change retroactively rp_uuids = request.spec[0].get('rp_uuids') if not rp_uuids: - # This can happen if [scheduler]pci_in_placement is not + # This can happen if [filter_scheduler]pci_in_placement is not # enabled yet # set() will signal that any PCI pool can be used for this # request diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py index 989ebc85c7ef..23e8f84e4b47 100644 --- a/nova/tests/functional/libvirt/test_pci_in_placement.py +++ b/nova/tests/functional/libvirt/test_pci_in_placement.py @@ -1616,15 +1616,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests): def setUp(self): super().setUp() - # TODO(gibi): replace this with setting the [scheduler]pci_in_placement - # confing to True once that config is added - self.mock_pci_in_placement_enabled = self.useFixture( - fixtures.MockPatch( - 'nova.objects.request_spec.RequestSpec.' - '_pci_in_placement_enabled', - return_value=True - ) - ).mock + self.flags(group='filter_scheduler', pci_in_placement=True) def test_boot_with_custom_rc_and_traits(self): # The fake libvirt will emulate on the host: @@ -1737,12 +1729,13 @@ class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests): self.assert_no_pci_healing("compute1") def test_device_claim_consistent_with_placement_allocation(self): - """As soon as [scheduler]pci_in_placement is enabled the nova-scheduler - will allocate PCI devices in placement. Then on the nova-compute side - the PCI claim will also allocate PCI devices in the nova DB. This test - will create a situation where the two allocation could contradict and - observes that in a contradicting situation the PCI claim will fail - instead of allocating a device that is not allocated in placement. + """As soon as [filter_scheduler]pci_in_placement is enabled the + nova-scheduler will allocate PCI devices in placement. Then on the + nova-compute side the PCI claim will also allocate PCI devices in the + nova DB. This test will create a situation where the two allocation + could contradict and observes that in a contradicting situation the PCI + claim will fail instead of allocating a device that is not allocated in + placement. For the contradiction to happen we need two PCI devices that looks different from placement perspective than from the nova DB perspective. diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index e470ba2707ad..5a7b0ec06e3b 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -1952,15 +1952,7 @@ class PCIServersTest(_PCIServersTestBase): def setUp(self): super().setUp() self.flags(group="pci", report_in_placement=True) - # TODO(gibi): replace this with setting the [scheduler]pci_prefilter - # confing to True once that config is added - self.mock_pci_in_placement_enabled = self.useFixture( - fixtures.MockPatch( - 'nova.objects.request_spec.RequestSpec.' - '_pci_in_placement_enabled', - return_value=True - ) - ).mock + self.flags(group='filter_scheduler', pci_in_placement=True) def test_create_server_with_pci_dev_and_numa(self): """Verifies that an instance can be booted with cpu pinning and with an @@ -3026,15 +3018,7 @@ class PCIServersWithPreferredNUMATest(_PCIServersTestBase): def setUp(self): super().setUp() self.flags(group="pci", report_in_placement=True) - # TODO(gibi): replace this with setting the [scheduler]pci_in_placement - # confing to True once that config is added - self.mock_pci_in_placement_enabled = self.useFixture( - fixtures.MockPatch( - 'nova.objects.request_spec.RequestSpec.' - '_pci_in_placement_enabled', - return_value=True - ) - ).mock + self.flags(group='filter_scheduler', pci_in_placement=True) def test_create_server_with_pci_dev_and_numa(self): """Validate behavior of 'preferred' PCI NUMA policy. diff --git a/nova/tests/unit/objects/test_request_spec.py b/nova/tests/unit/objects/test_request_spec.py index 271c94302539..58b985923400 100644 --- a/nova/tests/unit/objects/test_request_spec.py +++ b/nova/tests/unit/objects/test_request_spec.py @@ -14,7 +14,6 @@ import collections from unittest import mock -import fixtures from oslo_serialization import jsonutils from oslo_utils.fixture import uuidsentinel as uuids from oslo_utils import uuidutils @@ -431,13 +430,8 @@ class _TestRequestSpecObject(object): self.assertListEqual([rg], spec.requested_resources) self.assertEqual(req_lvl_params, spec.request_level_params) - # TODO(gibi): replace this with setting the config - # [scheduler]pci_in_placement=True once that flag is available - @mock.patch( - 'nova.objects.request_spec.RequestSpec._pci_in_placement_enabled', - new=mock.Mock(return_value=True), - ) def test_from_components_flavor_based_pci_requests(self): + self.flags(group='filter_scheduler', pci_in_placement=True) ctxt = context.RequestContext( fakes.FAKE_USER_ID, fakes.FAKE_PROJECT_ID ) @@ -1119,18 +1113,10 @@ class TestRemoteRequestSpecObject(test_objects._RemoteTest, class TestInstancePCIRequestToRequestGroups(test.NoDBTestCase): def setUp(self): super().setUp() - # TODO(gibi): replace this with setting the config - # [scheduler]pci_in_placement=True once that flag is available - self.mock_pci_in_placement_enabled = self.useFixture( - fixtures.MockPatch( - "nova.objects.request_spec.RequestSpec." - "_pci_in_placement_enabled", - return_value=True, - ) - ).mock + self.flags(group='filter_scheduler', pci_in_placement=True) def test_pci_reqs_ignored_if_disabled(self): - self.mock_pci_in_placement_enabled.return_value = False + self.flags(group='filter_scheduler', pci_in_placement=False) spec = request_spec.RequestSpec( requested_resources=[], diff --git a/releasenotes/notes/bp-pci-device-tracking-in-placement-antelope-082310a2b0337e0e.yaml b/releasenotes/notes/bp-pci-device-tracking-in-placement-antelope-082310a2b0337e0e.yaml new file mode 100644 index 000000000000..7a9e53ed2695 --- /dev/null +++ b/releasenotes/notes/bp-pci-device-tracking-in-placement-antelope-082310a2b0337e0e.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + Since 26.0.0 (Zed) Nova supports tracking PCI devices in Placement. Now + Nova also supports scheduling flavor based PCI device requests via + Placement. This support is disable by default. Please read + `documentation `_ + for more details on what is supported how this feature can be enabled.