Merge "Reject PCI dependent device config"

This commit is contained in:
Zuul
2022-08-26 04:42:58 +00:00
committed by Gerrit Code Review
7 changed files with 179 additions and 0 deletions

View File

@@ -367,4 +367,11 @@ resource class can be customized via the ``resource_class`` tag in the
tag in that configuration that allows specifying a list of placement traits to
be added to the resource provider representing the matching PCI devices.
.. important::
While nova supported configuring both the PF and its children VFs for PCI
passthrough in the past, it only allowed consuming either the parent PF or
its children VFs. Since 26.0.0. (Zed) the nova-compute service will
enforce the same rule for the configuration as well and will refuse to
start if both the parent PF and its VFs are configured.
For deeper technical details please read the `nova specification. <https://specs.openstack.org/openstack/nova-specs/specs/zed/approved/pci-device-tracking-in-placement.html>`_

View File

@@ -10153,6 +10153,19 @@ class ComputeManager(manager.Manager):
# (e.g. disable the service).
with excutils.save_and_reraise_exception():
LOG.exception("ReshapeNeeded exception is unexpected here!")
except exception.PlacementPciException:
# If we are at startup and the Placement PCI inventory handling
# failed then probably there is a configuration error. Propagate
# the error up to kill the service.
if startup:
raise
# If we are not at startup then we can assume that the
# configuration was correct at startup so the error is probably
# transient. Anyhow we cannot kill the service any more so just
# log the error and continue.
LOG.exception(
"Error updating PCI resources for node %(node)s.",
{'node': nodename})
except Exception:
LOG.exception("Error updating resources for node %(node)s.",
{'node': nodename})

View File

@@ -124,6 +124,12 @@ class PciResourceProvider:
return [self.parent_dev] if self.parent_dev else self.children_devs
def add_child(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None:
if self.parent_dev:
raise exception.PlacementPciDependentDeviceException(
parent_dev=dev.address,
children_devs=",".join(dev.address for dev in self.devs)
)
rc = _get_rc_for_dev(dev, dev_spec_tags)
traits = _get_traits_for_dev(dev_spec_tags)
self.children_devs.append(dev)
@@ -131,6 +137,12 @@ class PciResourceProvider:
self.traits = traits
def add_parent(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None:
if self.parent_dev or self.children_devs:
raise exception.PlacementPciDependentDeviceException(
parent_dev=dev.address,
children_devs=",".join(dev.address for dev in self.devs)
)
self.parent_dev = dev
self.resource_class = _get_rc_for_dev(dev, dev_spec_tags)
self.traits = _get_traits_for_dev(dev_spec_tags)

View File

@@ -2432,3 +2432,11 @@ class ProviderConfigException(NovaException):
class PlacementPciException(NovaException):
msg_fmt = _(
"Failed to gather or report PCI resources to Placement: %(error)s")
class PlacementPciDependentDeviceException(PlacementPciException):
msg_fmt = _(
"Configuring both %(parent_dev)s and %(children_devs)s in "
"[pci]device_spec is not supported. Either the parent PF or its "
"children VFs can be configured."
)

View File

@@ -20,6 +20,7 @@ from oslo_config import cfg
from oslo_log import log as logging
from oslo_serialization import jsonutils
from nova import exception
from nova.tests.fixtures import libvirt as fakelibvirt
from nova.tests.functional.libvirt import test_pci_sriov_servers
@@ -154,6 +155,7 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
},
# slot 1 func 0 is the type-PF dev. The child VF is ignored
{
"product_id": fakelibvirt.PF_PROD_ID,
"address": "0000:81:01.0",
"resource_class": "crypto",
"traits": "to-the-moon,hodl"
@@ -179,3 +181,38 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
],
},
)
def test_dependent_device_config_is_rejected(self):
"""Configuring both the PF and its children VFs is not supported.
Only either of them can be given to nova.
"""
# The fake libvirt will emulate on the host:
# * one type-PF dev in slot 0 with a single type-VF under it
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=1, num_vfs=1)
# both device will be matched by our config
device_spec = self._to_device_spec_conf(
[
# PF
{
"address": "0000:81:00.0"
},
# Its child VF
{
"address": "0000:81:00.1"
},
]
)
self.flags(group='pci', device_spec=device_spec)
ex = self.assertRaises(
exception.PlacementPciException,
self.start_compute,
hostname="compute1",
pci_info=pci_info
)
self.assertIn(
"Configuring both 0000:81:00.1 and 0000:81:00.0 in "
"[pci]device_spec is not supported",
str(ex)
)

View File

@@ -348,6 +348,46 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.context, mock.sentinel.node, startup=True)
log_mock.exception.assert_called_once()
def test_update_available_resource_for_node_pci_placement_failed_startup(
self
):
"""If the PCI placement translation failed during startup then the
exception is raised up to kill the service
"""
rt = self._mock_rt(spec_set=['update_available_resource'])
rt.update_available_resource.side_effect = (
exception.PlacementPciException(error='error'))
self.assertRaises(
exception.PlacementPciException,
self.compute._update_available_resource_for_node,
self.context,
mock.sentinel.node,
startup=True,
)
rt.update_available_resource.assert_called_once_with(
self.context, mock.sentinel.node, startup=True)
@mock.patch('nova.compute.manager.LOG')
def test_update_available_resource_for_node_pci_placement_failed_later(
self, mock_log
):
"""If the PCI placement translation failed later (not at startup)
during a periodic then the exception is just logged
"""
rt = self._mock_rt(spec_set=['update_available_resource'])
rt.update_available_resource.side_effect = (
exception.PlacementPciException(error='error'))
self.compute._update_available_resource_for_node(
self.context, mock.sentinel.node, startup=False)
rt.update_available_resource.assert_called_once_with(
self.context, mock.sentinel.node, startup=False)
mock_log.exception.assert_called_once_with(
'Error updating PCI resources for node %(node)s.',
{'node': mock.sentinel.node}
)
@mock.patch.object(manager, 'LOG')
@mock.patch.object(manager.ComputeManager,
'_update_available_resource_for_node')

View File

@@ -15,6 +15,8 @@ import ddt
from unittest import mock
from nova.compute import pci_placement_translator as ppt
from nova import exception
from nova.objects import fields
from nova.objects import pci_device
from nova import test
@@ -101,3 +103,63 @@ class TestTranslator(test.NoDBTestCase):
expected_rc,
ppt._get_rc_for_dev(pci_dev, {"resource_class": rc_name})
)
def test_dependent_device_pf_then_vf(self):
pv = ppt.PlacementView("fake-node")
pf = pci_device.PciDevice(
address="0000:81:00.0",
dev_type=fields.PciDeviceType.SRIOV_PF
)
vf = pci_device.PciDevice(
address="0000:81:00.1",
parent_addr=pf.address,
dev_type=fields.PciDeviceType.SRIOV_VF
)
pv.add_dev(pf, {"resource_class": "foo"})
ex = self.assertRaises(
exception.PlacementPciDependentDeviceException,
pv.add_dev,
vf,
{"resource_class": "bar"}
)
self.assertEqual(
"Configuring both 0000:81:00.1 and 0000:81:00.0 in "
"[pci]device_spec is not supported. Either the parent PF or its "
"children VFs can be configured.",
str(ex),
)
def test_dependent_device_vf_then_pf(self):
pv = ppt.PlacementView("fake-node")
pf = pci_device.PciDevice(
address="0000:81:00.0",
dev_type=fields.PciDeviceType.SRIOV_PF
)
vf = pci_device.PciDevice(
address="0000:81:00.1",
parent_addr=pf.address,
dev_type=fields.PciDeviceType.SRIOV_VF
)
vf2 = pci_device.PciDevice(
address="0000:81:00.2",
parent_addr=pf.address,
dev_type=fields.PciDeviceType.SRIOV_VF
)
pv.add_dev(vf, {"resource_class": "foo"})
pv.add_dev(vf2, {"resource_class": "foo"})
ex = self.assertRaises(
exception.PlacementPciDependentDeviceException,
pv.add_dev,
pf,
{"resource_class": "bar"}
)
self.assertEqual(
"Configuring both 0000:81:00.0 and 0000:81:00.1,0000:81:00.2 in "
"[pci]device_spec is not supported. Either the parent PF or its "
"children VFs can be configured.",
str(ex),
)