Merge "Reject PCI dependent device config"
This commit is contained in:
@@ -367,4 +367,11 @@ resource class can be customized via the ``resource_class`` tag in the
|
||||
tag in that configuration that allows specifying a list of placement traits to
|
||||
be added to the resource provider representing the matching PCI devices.
|
||||
|
||||
.. important::
|
||||
While nova supported configuring both the PF and its children VFs for PCI
|
||||
passthrough in the past, it only allowed consuming either the parent PF or
|
||||
its children VFs. Since 26.0.0. (Zed) the nova-compute service will
|
||||
enforce the same rule for the configuration as well and will refuse to
|
||||
start if both the parent PF and its VFs are configured.
|
||||
|
||||
For deeper technical details please read the `nova specification. <https://specs.openstack.org/openstack/nova-specs/specs/zed/approved/pci-device-tracking-in-placement.html>`_
|
||||
|
||||
@@ -10153,6 +10153,19 @@ class ComputeManager(manager.Manager):
|
||||
# (e.g. disable the service).
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.exception("ReshapeNeeded exception is unexpected here!")
|
||||
except exception.PlacementPciException:
|
||||
# If we are at startup and the Placement PCI inventory handling
|
||||
# failed then probably there is a configuration error. Propagate
|
||||
# the error up to kill the service.
|
||||
if startup:
|
||||
raise
|
||||
# If we are not at startup then we can assume that the
|
||||
# configuration was correct at startup so the error is probably
|
||||
# transient. Anyhow we cannot kill the service any more so just
|
||||
# log the error and continue.
|
||||
LOG.exception(
|
||||
"Error updating PCI resources for node %(node)s.",
|
||||
{'node': nodename})
|
||||
except Exception:
|
||||
LOG.exception("Error updating resources for node %(node)s.",
|
||||
{'node': nodename})
|
||||
|
||||
@@ -124,6 +124,12 @@ class PciResourceProvider:
|
||||
return [self.parent_dev] if self.parent_dev else self.children_devs
|
||||
|
||||
def add_child(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None:
|
||||
if self.parent_dev:
|
||||
raise exception.PlacementPciDependentDeviceException(
|
||||
parent_dev=dev.address,
|
||||
children_devs=",".join(dev.address for dev in self.devs)
|
||||
)
|
||||
|
||||
rc = _get_rc_for_dev(dev, dev_spec_tags)
|
||||
traits = _get_traits_for_dev(dev_spec_tags)
|
||||
self.children_devs.append(dev)
|
||||
@@ -131,6 +137,12 @@ class PciResourceProvider:
|
||||
self.traits = traits
|
||||
|
||||
def add_parent(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None:
|
||||
if self.parent_dev or self.children_devs:
|
||||
raise exception.PlacementPciDependentDeviceException(
|
||||
parent_dev=dev.address,
|
||||
children_devs=",".join(dev.address for dev in self.devs)
|
||||
)
|
||||
|
||||
self.parent_dev = dev
|
||||
self.resource_class = _get_rc_for_dev(dev, dev_spec_tags)
|
||||
self.traits = _get_traits_for_dev(dev_spec_tags)
|
||||
|
||||
@@ -2432,3 +2432,11 @@ class ProviderConfigException(NovaException):
|
||||
class PlacementPciException(NovaException):
|
||||
msg_fmt = _(
|
||||
"Failed to gather or report PCI resources to Placement: %(error)s")
|
||||
|
||||
|
||||
class PlacementPciDependentDeviceException(PlacementPciException):
|
||||
msg_fmt = _(
|
||||
"Configuring both %(parent_dev)s and %(children_devs)s in "
|
||||
"[pci]device_spec is not supported. Either the parent PF or its "
|
||||
"children VFs can be configured."
|
||||
)
|
||||
|
||||
@@ -20,6 +20,7 @@ from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import jsonutils
|
||||
|
||||
from nova import exception
|
||||
from nova.tests.fixtures import libvirt as fakelibvirt
|
||||
from nova.tests.functional.libvirt import test_pci_sriov_servers
|
||||
|
||||
@@ -154,6 +155,7 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
|
||||
},
|
||||
# slot 1 func 0 is the type-PF dev. The child VF is ignored
|
||||
{
|
||||
"product_id": fakelibvirt.PF_PROD_ID,
|
||||
"address": "0000:81:01.0",
|
||||
"resource_class": "crypto",
|
||||
"traits": "to-the-moon,hodl"
|
||||
@@ -179,3 +181,38 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
def test_dependent_device_config_is_rejected(self):
|
||||
"""Configuring both the PF and its children VFs is not supported.
|
||||
Only either of them can be given to nova.
|
||||
"""
|
||||
# The fake libvirt will emulate on the host:
|
||||
# * one type-PF dev in slot 0 with a single type-VF under it
|
||||
pci_info = fakelibvirt.HostPCIDevicesInfo(
|
||||
num_pci=0, num_pfs=1, num_vfs=1)
|
||||
# both device will be matched by our config
|
||||
device_spec = self._to_device_spec_conf(
|
||||
[
|
||||
# PF
|
||||
{
|
||||
"address": "0000:81:00.0"
|
||||
},
|
||||
# Its child VF
|
||||
{
|
||||
"address": "0000:81:00.1"
|
||||
},
|
||||
]
|
||||
)
|
||||
self.flags(group='pci', device_spec=device_spec)
|
||||
|
||||
ex = self.assertRaises(
|
||||
exception.PlacementPciException,
|
||||
self.start_compute,
|
||||
hostname="compute1",
|
||||
pci_info=pci_info
|
||||
)
|
||||
self.assertIn(
|
||||
"Configuring both 0000:81:00.1 and 0000:81:00.0 in "
|
||||
"[pci]device_spec is not supported",
|
||||
str(ex)
|
||||
)
|
||||
|
||||
@@ -348,6 +348,46 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
self.context, mock.sentinel.node, startup=True)
|
||||
log_mock.exception.assert_called_once()
|
||||
|
||||
def test_update_available_resource_for_node_pci_placement_failed_startup(
|
||||
self
|
||||
):
|
||||
"""If the PCI placement translation failed during startup then the
|
||||
exception is raised up to kill the service
|
||||
"""
|
||||
rt = self._mock_rt(spec_set=['update_available_resource'])
|
||||
rt.update_available_resource.side_effect = (
|
||||
exception.PlacementPciException(error='error'))
|
||||
|
||||
self.assertRaises(
|
||||
exception.PlacementPciException,
|
||||
self.compute._update_available_resource_for_node,
|
||||
self.context,
|
||||
mock.sentinel.node,
|
||||
startup=True,
|
||||
)
|
||||
rt.update_available_resource.assert_called_once_with(
|
||||
self.context, mock.sentinel.node, startup=True)
|
||||
|
||||
@mock.patch('nova.compute.manager.LOG')
|
||||
def test_update_available_resource_for_node_pci_placement_failed_later(
|
||||
self, mock_log
|
||||
):
|
||||
"""If the PCI placement translation failed later (not at startup)
|
||||
during a periodic then the exception is just logged
|
||||
"""
|
||||
rt = self._mock_rt(spec_set=['update_available_resource'])
|
||||
rt.update_available_resource.side_effect = (
|
||||
exception.PlacementPciException(error='error'))
|
||||
|
||||
self.compute._update_available_resource_for_node(
|
||||
self.context, mock.sentinel.node, startup=False)
|
||||
rt.update_available_resource.assert_called_once_with(
|
||||
self.context, mock.sentinel.node, startup=False)
|
||||
mock_log.exception.assert_called_once_with(
|
||||
'Error updating PCI resources for node %(node)s.',
|
||||
{'node': mock.sentinel.node}
|
||||
)
|
||||
|
||||
@mock.patch.object(manager, 'LOG')
|
||||
@mock.patch.object(manager.ComputeManager,
|
||||
'_update_available_resource_for_node')
|
||||
|
||||
@@ -15,6 +15,8 @@ import ddt
|
||||
from unittest import mock
|
||||
|
||||
from nova.compute import pci_placement_translator as ppt
|
||||
from nova import exception
|
||||
from nova.objects import fields
|
||||
from nova.objects import pci_device
|
||||
from nova import test
|
||||
|
||||
@@ -101,3 +103,63 @@ class TestTranslator(test.NoDBTestCase):
|
||||
expected_rc,
|
||||
ppt._get_rc_for_dev(pci_dev, {"resource_class": rc_name})
|
||||
)
|
||||
|
||||
def test_dependent_device_pf_then_vf(self):
|
||||
pv = ppt.PlacementView("fake-node")
|
||||
pf = pci_device.PciDevice(
|
||||
address="0000:81:00.0",
|
||||
dev_type=fields.PciDeviceType.SRIOV_PF
|
||||
)
|
||||
vf = pci_device.PciDevice(
|
||||
address="0000:81:00.1",
|
||||
parent_addr=pf.address,
|
||||
dev_type=fields.PciDeviceType.SRIOV_VF
|
||||
)
|
||||
|
||||
pv.add_dev(pf, {"resource_class": "foo"})
|
||||
ex = self.assertRaises(
|
||||
exception.PlacementPciDependentDeviceException,
|
||||
pv.add_dev,
|
||||
vf,
|
||||
{"resource_class": "bar"}
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
"Configuring both 0000:81:00.1 and 0000:81:00.0 in "
|
||||
"[pci]device_spec is not supported. Either the parent PF or its "
|
||||
"children VFs can be configured.",
|
||||
str(ex),
|
||||
)
|
||||
|
||||
def test_dependent_device_vf_then_pf(self):
|
||||
pv = ppt.PlacementView("fake-node")
|
||||
pf = pci_device.PciDevice(
|
||||
address="0000:81:00.0",
|
||||
dev_type=fields.PciDeviceType.SRIOV_PF
|
||||
)
|
||||
vf = pci_device.PciDevice(
|
||||
address="0000:81:00.1",
|
||||
parent_addr=pf.address,
|
||||
dev_type=fields.PciDeviceType.SRIOV_VF
|
||||
)
|
||||
vf2 = pci_device.PciDevice(
|
||||
address="0000:81:00.2",
|
||||
parent_addr=pf.address,
|
||||
dev_type=fields.PciDeviceType.SRIOV_VF
|
||||
)
|
||||
|
||||
pv.add_dev(vf, {"resource_class": "foo"})
|
||||
pv.add_dev(vf2, {"resource_class": "foo"})
|
||||
ex = self.assertRaises(
|
||||
exception.PlacementPciDependentDeviceException,
|
||||
pv.add_dev,
|
||||
pf,
|
||||
{"resource_class": "bar"}
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
"Configuring both 0000:81:00.0 and 0000:81:00.1,0000:81:00.2 in "
|
||||
"[pci]device_spec is not supported. Either the parent PF or its "
|
||||
"children VFs can be configured.",
|
||||
str(ex),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user