diff --git a/doc/source/admin/pci-passthrough.rst b/doc/source/admin/pci-passthrough.rst index df4aa87e64ba..aecf0c9e3f4a 100644 --- a/doc/source/admin/pci-passthrough.rst +++ b/doc/source/admin/pci-passthrough.rst @@ -386,4 +386,12 @@ be added to the resource provider representing the matching PCI devices. enforce the same rule for the configuration as well and will refuse to start if both the parent PF and its VFs are configured. +.. important:: + While nova supported configuring PCI devices by device name via the + ``devname`` parameter in :oslo.config:option:`pci.device_spec` in the past, + this proved to be problematic as the netdev name of a PCI device could + change for multiple reasons during hypervisor reboot. So since nova 26.0.0 + (Zed) the nova-compute service will refuse to start with such configuration. + It is suggested to use the PCI address of the device instead. + For deeper technical details please read the `nova specification. `_ diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py index b0ad6ba4ae53..77414b5d189b 100644 --- a/nova/compute/pci_placement_translator.py +++ b/nova/compute/pci_placement_translator.py @@ -22,6 +22,7 @@ from nova import exception from nova.i18n import _ from nova.objects import fields from nova.objects import pci_device +from nova.pci import devspec from nova.pci import manager as pci_manager @@ -278,6 +279,18 @@ class PlacementView: rp.update_provider_tree(provider_tree) +def ensure_no_dev_spec_with_devname(dev_specs: ty.List[devspec.PciDeviceSpec]): + for dev_spec in dev_specs: + if dev_spec.dev_spec_conf.get("devname"): + msg = _( + "Invalid [pci]device_spec configuration. PCI Placement " + "reporting does not support 'devname' based device " + "specification but we got %(dev_spec)s. " + "Please use PCI address in the configuration instead." + ) % {"dev_spec": dev_spec.dev_spec_conf} + raise exception.PlacementPciException(error=msg) + + def update_provider_tree_for_pci( provider_tree: provider_tree.ProviderTree, nodename: str, @@ -317,6 +330,8 @@ def update_provider_tree_for_pci( # If tracking is not enabled we just return without touching anything return False + ensure_no_dev_spec_with_devname(pci_tracker.dev_filter.specs) + LOG.debug( 'Collecting PCI inventories and allocations to track them in Placement' ) diff --git a/nova/pci/devspec.py b/nova/pci/devspec.py index d0c2edd1a5c3..386005c8eb0b 100644 --- a/nova/pci/devspec.py +++ b/nova/pci/devspec.py @@ -12,6 +12,7 @@ # under the License. import abc +import copy import re import string import typing as ty @@ -267,6 +268,10 @@ class WhitelistPciAddress(object): class PciDeviceSpec(PciAddressSpec): def __init__(self, dev_spec: ty.Dict[str, str]) -> None: + # stored for better error reporting + self.dev_spec_conf = copy.deepcopy(dev_spec) + # the non tag fields (i.e. address, devname) will be removed by + # _init_dev_details self.tags = dev_spec self._init_dev_details() diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py index c62f433a2b56..dce30ec44c0c 100644 --- a/nova/tests/functional/libvirt/test_pci_in_placement.py +++ b/nova/tests/functional/libvirt/test_pci_in_placement.py @@ -51,6 +51,12 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase): # These tests should not depend on the host's sysfs self.useFixture( fixtures.MockPatch('nova.pci.utils.is_physical_function')) + self.useFixture( + fixtures.MockPatch( + 'nova.pci.utils.get_function_by_ifname', + return_value=(None, False) + ) + ) @staticmethod def _to_device_spec_conf(spec_list): @@ -309,3 +315,26 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase): inventories={}, traits={}, ) + + def test_devname_based_dev_spec_rejected(self): + device_spec = self._to_device_spec_conf( + [ + { + "devname": "eth0", + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + + ex = self.assertRaises( + exception.PlacementPciException, + self.start_compute, + hostname="compute1", + ) + self.assertIn( + " Invalid [pci]device_spec configuration. PCI Placement reporting " + "does not support 'devname' based device specification but we got " + "{'devname': 'eth0'}. Please use PCI address in the configuration " + "instead.", + str(ex) + ) diff --git a/nova/tests/unit/compute/test_pci_placement_translator.py b/nova/tests/unit/compute/test_pci_placement_translator.py index 9499e64bb0c0..edaf5b68709a 100644 --- a/nova/tests/unit/compute/test_pci_placement_translator.py +++ b/nova/tests/unit/compute/test_pci_placement_translator.py @@ -50,6 +50,7 @@ class TestTranslator(test.NoDBTestCase): ) # So we have a device but there is no spec for it pci_tracker.dev_filter.get_devspec = mock.Mock(return_value=None) + pci_tracker.dev_filter.specs = [] # we expect that the provider_tree is not touched as the device without # spec is skipped, we assert that with the NonCallableMock provider_tree = mock.NonCallableMock()