Handle PCI dev reconf with allocations

PCI devices which are allocated to instances can be removed from the
[pci]device_spec configuration or can be removed from the hypervisor
directly. The existing PciTracker code handle this cases by keeping the
PciDevice in the nova DB exists and allocated and issue a warning in the
logs during the compute service startup that nova is in an inconsistent
state. Similar behavior is now added to the PCI placement tracking code
so the PCI inventories and allocations in placement is kept in such
situation.

There is one case where we cannot simply accept the PCI device
reconfiguration by keeping the existing allocations and applying the new
config. It is when a PF that is configured and allocated is removed and
VFs from this PF is now configured in the [pci]device_spec. And vice
versa when VFs are removed and its parent PF is configured. In this case
keeping the existing inventory and allocations and adding the new inventory
to placement would result in placement model where a single PCI device
would provide both PF and VF inventories. This dependent device
configuration is not supported as it could lead to double consumption.
In such situation the compute service will refuse to start.

blueprint: pci-device-tracking-in-placement
Change-Id: Id130893de650cc2d38953cea7cf9f53af71ced93
This commit is contained in:
Balazs Gibizer 2022-08-08 12:42:56 +02:00
parent ab439dadb1
commit 9268bc36a3
7 changed files with 281 additions and 42 deletions

View File

@ -404,4 +404,20 @@ is upgraded. If there is an in-progress migration, then the PCI allocation on
the source host of the migration will not be healed. The placement view will be
consistent after such migration is completed or reverted.
Reconfiguring the PCI devices on the hypervisor or changing the
:oslo.config:option:`pci.device_spec` configuration option and restarting the
nova-compute service is supported in the following cases:
* new devices are added
* devices without allocation is removed
Removing a device that has allocations is not supported. If a device having any
allocation is removed then the nova-compute service will keep the device and
the allocation exists in the nova DB and in placement and logs a warning. If
a device with any allocation is reconfigured in a way that an allocated PF is
removed and VFs from the same PF is configured (or vice versa) then
nova-compute will refuse to start as it would create a situation where both
the PF and its VFs are made available for consumption.
For deeper technical details please read the `nova specification. <https://specs.openstack.org/openstack/nova-specs/specs/zed/approved/pci-device-tracking-in-placement.html>`_

View File

@ -443,15 +443,33 @@ class PlacementView:
fields.PciDeviceStatus.DELETED,
fields.PciDeviceStatus.REMOVED,
):
# If the PCI tracker marked the device DELETED or REMOVED then
# such device is not allocated, so we are free to drop it from
# placement too.
self._remove_dev(dev)
else:
if not dev_spec:
LOG.warning(
"Device spec is not found for device %s in "
"[pci]device_spec. Ignoring device in Placement resource "
"view. This should not happen. Please file a bug.",
dev.address
)
if dev.instance_uuid:
LOG.warning(
"Device spec is not found for device %s in "
"[pci]device_spec. We are skipping this devices "
"during Placement update. The device is allocated by "
"%s. You should not remove an allocated device from "
"the configuration. Please restore the configuration "
"or cold migrate the instance to resolve the "
"inconsistency.",
dev.address,
dev.instance_uuid
)
else:
LOG.warning(
"Device spec is not found for device %s in "
"[pci]device_spec. Ignoring device in Placement "
"resource view. This should not happen. Please file a "
"bug.",
dev.address
)
return
self._add_dev(dev, dev_spec.get_tags())

View File

@ -1295,14 +1295,22 @@ class ResourceTracker(object):
# This merges in changes from the provider config files loaded in init
self._merge_provider_configs(self.provider_configs, prov_tree)
# Flush any changes. If we either processed ReshapeNeeded above or
# update_provider_tree_for_pci did reshape, then we need to pass allocs
# to update_from_provider_tree to hit placement's POST /reshaper route.
self.reportclient.update_from_provider_tree(
context,
prov_tree,
allocations=allocs if driver_reshaped or pci_reshaped else None
)
try:
# Flush any changes. If we either processed ReshapeNeeded above or
# update_provider_tree_for_pci did reshape, then we need to pass
# allocs to update_from_provider_tree to hit placement's POST
# /reshaper route.
self.reportclient.update_from_provider_tree(
context,
prov_tree,
allocations=allocs if driver_reshaped or pci_reshaped else None
)
except exception.InventoryInUse as e:
# This means an inventory reconfiguration (e.g.: removing a parent
# PF and adding a VF under that parent) was not possible due to
# existing allocations. Translate the exception to prevent the
# compute service to start
raise exception.PlacementPciException(error=str(e))
def _update(self, context, compute_node, startup=False):
"""Update partial stats locally and populate them to Scheduler."""

View File

@ -1373,7 +1373,6 @@ class SchedulerReportClient(object):
# can inherit.
helper_exceptions = (
exception.InvalidResourceClass,
exception.InventoryInUse,
exception.ResourceProviderAggregateRetrievalFailed,
exception.ResourceProviderDeletionFailed,
exception.ResourceProviderInUse,

View File

@ -36,7 +36,28 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
# Just placeholders to satisfy the base class. The real value will be
# redefined by the tests
PCI_DEVICE_SPEC = []
PCI_ALIAS = None
PCI_ALIAS = [
jsonutils.dumps(x)
for x in (
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PCI_PROD_ID,
"name": "a-pci-dev",
},
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PF_PROD_ID,
"device_type": "type-PF",
"name": "a-pf",
},
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
"device_type": "type-VF",
"name": "a-vf",
},
)
]
def setUp(self):
super().setUp()
@ -681,6 +702,179 @@ class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests):
},
)
def _create_one_compute_with_a_pf_consumed_by_an_instance(self):
# The fake libvirt will emulate on the host:
# * two type-PFs in slot 0, with one type-VF
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=1, num_vfs=1)
# we match the PF only and ignore the VF
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PF_PROD_ID,
"address": "0000:81:00.0",
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.mock_pci_report_in_placement.return_value = True
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assertPCIDeviceCounts("compute1", total=1, free=1)
compute1_expected_placement_view = {
"inventories": {
"0000:81:00.0": {self.PF_RC: 1},
},
"traits": {
"0000:81:00.0": [],
},
"usages": {
"0000:81:00.0": {self.PF_RC: 0},
},
"allocations": {},
}
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
# Create an instance consuming the PF
extra_spec = {"pci_passthrough:alias": "a-pf:1"}
flavor_id = self._create_flavor(extra_spec=extra_spec)
server = self._create_server(flavor_id=flavor_id, networks=[])
self.assertPCIDeviceCounts("compute1", total=1, free=0)
compute1_expected_placement_view["usages"] = {
"0000:81:00.0": {self.PF_RC: 1},
}
compute1_expected_placement_view["allocations"][server["id"]] = {
"0000:81:00.0": {self.PF_RC: 1},
}
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
self._run_periodics()
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
return server, compute1_expected_placement_view
def test_device_reconfiguration_with_allocations_config_change_warn(self):
server, compute1_expected_placement_view = (
self._create_one_compute_with_a_pf_consumed_by_an_instance())
# remove 0000:81:00.0 from the device spec and restart the compute
device_spec = self._to_device_spec_conf([])
self.flags(group='pci', device_spec=device_spec)
# The PF is used but removed from the config. The PciTracker warns
# but keeps the device so the placement logic mimic this and only warns
# but keeps the RP and the allocation in placement intact.
self.restart_compute_service(hostname="compute1")
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
self._run_periodics()
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
# the warning from the PciTracker
self.assertIn(
"WARNING [nova.pci.manager] Unable to remove device with status "
"'allocated' and ownership %s because of PCI device "
"1:0000:81:00.0 is allocated instead of ['available', "
"'unavailable', 'unclaimable']. Check your [pci]device_spec "
"configuration to make sure this allocated device is whitelisted. "
"If you have removed the device from the whitelist intentionally "
"or the device is no longer available on the host you will need "
"to delete the server or migrate it to another host to silence "
"this warning."
% server['id'],
self.stdlog.logger.output,
)
# the warning from the placement PCI tracking logic
self.assertIn(
"WARNING [nova.compute.pci_placement_translator] Device spec is "
"not found for device 0000:81:00.0 in [pci]device_spec. We are "
"skipping this devices during Placement update. The device is "
"allocated by %s. You should not remove an allocated device from "
"the configuration. Please restore the configuration or cold "
"migrate the instance to resolve the inconsistency."
% server['id'],
self.stdlog.logger.output,
)
def test_device_reconfiguration_with_allocations_config_change_stop(self):
self._create_one_compute_with_a_pf_consumed_by_an_instance()
# switch 0000:81:00.0 PF to 0000:81:00.1 VF
# in the config, then restart the compute service
# only match the VF now
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
"address": "0000:81:00.1",
},
]
)
self.flags(group='pci', device_spec=device_spec)
# The compute fails to start as the new config would mean that the PF
# inventory is removed from the 0000:81:00.0 RP and the PF inventory is
# added instead there, but the VF inventory has allocations. Keeping
# the old inventory as in
# test_device_reconfiguration_with_allocations_config_change_warn is
# not an option as it would result in two resource class on the same RP
# one for the PF and one for the VF. That would allow consuming
# the same physical device twice. Such dependent device configuration
# is intentionally not supported so we are stopping the compute
# service.
ex = self.assertRaises(
exception.PlacementPciException,
self.restart_compute_service,
hostname="compute1"
)
self.assertRegex(
str(ex),
"Failed to gather or report PCI resources to Placement: There was "
"a conflict when trying to complete your request.\n\n "
"update conflict: Inventory for 'CUSTOM_PCI_8086_1528' on "
"resource provider '.*' in use.",
)
def test_device_reconfiguration_with_allocations_hyp_change(self):
server, compute1_expected_placement_view = (
self._create_one_compute_with_a_pf_consumed_by_an_instance())
# restart the compute but simulate that the device 0000:81:00.0 is
# removed from the hypervisor while the device spec config left
# intact. The PciTracker will notice this and log a warning. The
# placement tracking logic simply keeps the allocation intact in
# placement as both the PciDevice and the DeviceSpec is available.
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=0, num_vfs=0)
self.restart_compute_service(
hostname="compute1",
pci_info=pci_info,
keep_hypervisor_state=False
)
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
self._run_periodics()
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
# the warning from the PciTracker
self.assertIn(
"WARNING [nova.pci.manager] Unable to remove device with status "
"'allocated' and ownership %s because of PCI device "
"1:0000:81:00.0 is allocated instead of ['available', "
"'unavailable', 'unclaimable']. Check your [pci]device_spec "
"configuration to make sure this allocated device is whitelisted. "
"If you have removed the device from the whitelist intentionally "
"or the device is no longer available on the host you will need "
"to delete the server or migrate it to another host to silence "
"this warning."
% server['id'],
self.stdlog.logger.output,
)
def test_reporting_disabled_nothing_is_reported(self):
# The fake libvirt will emulate on the host:
# * one type-PCI in slot 0
@ -765,32 +959,6 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
)
)
# Pre-configure a PCI alias to consume our devs
alias_pci = {
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PCI_PROD_ID,
"name": "a-pci-dev",
}
alias_pf = {
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PF_PROD_ID,
"device_type": "type-PF",
"name": "a-pf",
}
alias_vf = {
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
"device_type": "type-VF",
"name": "a-vf",
}
self.flags(
group='pci',
alias=self._to_pci_alias_conf([alias_pci, alias_pf, alias_vf]))
@staticmethod
def _to_pci_alias_conf(alias_list):
return [jsonutils.dumps(x) for x in alias_list]
@staticmethod
def _move_allocation(allocations, from_uuid, to_uuid):
allocations[to_uuid] = allocations[from_uuid]

View File

@ -50,6 +50,7 @@ class TestTranslator(test.NoDBTestCase):
pci_device.PciDevice(
address="0000:81:00.0",
status=fields.PciDeviceStatus.AVAILABLE,
instance_uuid=None,
)
]
)

View File

@ -1960,6 +1960,35 @@ class TestUpdateComputeNode(BaseTestCase):
upt = self.rt.reportclient.update_from_provider_tree
upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
@mock.patch(
'nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait',
new=mock.Mock()
)
@mock.patch(
'nova.compute.resource_tracker.ResourceTracker._resource_change',
new=mock.Mock(return_value=False)
)
def test_update_pci_reporting_allocation_in_use_error_propagated(self):
"""Assert that if the pci placement reporting code tries to remove
inventory with allocation from placement due to invalid hypervisor
or [pci]device_spec reconfiguration then the InventoryInUse error from
placement is propagated and makes the compute startup to fail.
"""
compute_obj = _COMPUTE_NODE_FIXTURES[0].obj_clone()
self._setup_rt()
self.rt.reportclient.update_from_provider_tree.side_effect = (
exc.InventoryInUse(
resource_class="FOO", resource_provider="bar"))
self.assertRaises(
exc.PlacementPciException,
self.rt._update,
mock.sentinel.ctx,
compute_obj,
startup=True,
)
@mock.patch('nova.objects.Service.get_by_compute_host',
return_value=objects.Service(disabled=True))
def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):