diff --git a/doc/source/admin/pci-passthrough.rst b/doc/source/admin/pci-passthrough.rst index 44e53623032b..3fd5f7826ee3 100644 --- a/doc/source/admin/pci-passthrough.rst +++ b/doc/source/admin/pci-passthrough.rst @@ -404,4 +404,20 @@ is upgraded. If there is an in-progress migration, then the PCI allocation on the source host of the migration will not be healed. The placement view will be consistent after such migration is completed or reverted. +Reconfiguring the PCI devices on the hypervisor or changing the +:oslo.config:option:`pci.device_spec` configuration option and restarting the +nova-compute service is supported in the following cases: + +* new devices are added +* devices without allocation is removed + +Removing a device that has allocations is not supported. If a device having any +allocation is removed then the nova-compute service will keep the device and +the allocation exists in the nova DB and in placement and logs a warning. If +a device with any allocation is reconfigured in a way that an allocated PF is +removed and VFs from the same PF is configured (or vice versa) then +nova-compute will refuse to start as it would create a situation where both +the PF and its VFs are made available for consumption. + + For deeper technical details please read the `nova specification. `_ diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py index c6ccbf27ae5f..c7669f520392 100644 --- a/nova/compute/pci_placement_translator.py +++ b/nova/compute/pci_placement_translator.py @@ -443,15 +443,33 @@ class PlacementView: fields.PciDeviceStatus.DELETED, fields.PciDeviceStatus.REMOVED, ): + # If the PCI tracker marked the device DELETED or REMOVED then + # such device is not allocated, so we are free to drop it from + # placement too. self._remove_dev(dev) else: if not dev_spec: - LOG.warning( - "Device spec is not found for device %s in " - "[pci]device_spec. Ignoring device in Placement resource " - "view. This should not happen. Please file a bug.", - dev.address - ) + if dev.instance_uuid: + LOG.warning( + "Device spec is not found for device %s in " + "[pci]device_spec. We are skipping this devices " + "during Placement update. The device is allocated by " + "%s. You should not remove an allocated device from " + "the configuration. Please restore the configuration " + "or cold migrate the instance to resolve the " + "inconsistency.", + dev.address, + dev.instance_uuid + ) + else: + LOG.warning( + "Device spec is not found for device %s in " + "[pci]device_spec. Ignoring device in Placement " + "resource view. This should not happen. Please file a " + "bug.", + dev.address + ) + return self._add_dev(dev, dev_spec.get_tags()) diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index f311b11442ff..ffbc7ed03fab 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -1295,14 +1295,22 @@ class ResourceTracker(object): # This merges in changes from the provider config files loaded in init self._merge_provider_configs(self.provider_configs, prov_tree) - # Flush any changes. If we either processed ReshapeNeeded above or - # update_provider_tree_for_pci did reshape, then we need to pass allocs - # to update_from_provider_tree to hit placement's POST /reshaper route. - self.reportclient.update_from_provider_tree( - context, - prov_tree, - allocations=allocs if driver_reshaped or pci_reshaped else None - ) + try: + # Flush any changes. If we either processed ReshapeNeeded above or + # update_provider_tree_for_pci did reshape, then we need to pass + # allocs to update_from_provider_tree to hit placement's POST + # /reshaper route. + self.reportclient.update_from_provider_tree( + context, + prov_tree, + allocations=allocs if driver_reshaped or pci_reshaped else None + ) + except exception.InventoryInUse as e: + # This means an inventory reconfiguration (e.g.: removing a parent + # PF and adding a VF under that parent) was not possible due to + # existing allocations. Translate the exception to prevent the + # compute service to start + raise exception.PlacementPciException(error=str(e)) def _update(self, context, compute_node, startup=False): """Update partial stats locally and populate them to Scheduler.""" diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index cb04711bd0b1..2896a07f1352 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -1373,7 +1373,6 @@ class SchedulerReportClient(object): # can inherit. helper_exceptions = ( exception.InvalidResourceClass, - exception.InventoryInUse, exception.ResourceProviderAggregateRetrievalFailed, exception.ResourceProviderDeletionFailed, exception.ResourceProviderInUse, diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py index 233941cd5619..c61ebda1ebab 100644 --- a/nova/tests/functional/libvirt/test_pci_in_placement.py +++ b/nova/tests/functional/libvirt/test_pci_in_placement.py @@ -36,7 +36,28 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase): # Just placeholders to satisfy the base class. The real value will be # redefined by the tests PCI_DEVICE_SPEC = [] - PCI_ALIAS = None + PCI_ALIAS = [ + jsonutils.dumps(x) + for x in ( + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.PCI_PROD_ID, + "name": "a-pci-dev", + }, + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.PF_PROD_ID, + "device_type": "type-PF", + "name": "a-pf", + }, + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + "device_type": "type-VF", + "name": "a-vf", + }, + ) + ] def setUp(self): super().setUp() @@ -681,6 +702,179 @@ class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests): }, ) + def _create_one_compute_with_a_pf_consumed_by_an_instance(self): + # The fake libvirt will emulate on the host: + # * two type-PFs in slot 0, with one type-VF + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=1, num_vfs=1) + # we match the PF only and ignore the VF + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.PF_PROD_ID, + "address": "0000:81:00.0", + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.mock_pci_report_in_placement.return_value = True + self.start_compute(hostname="compute1", pci_info=pci_info) + + self.assertPCIDeviceCounts("compute1", total=1, free=1) + compute1_expected_placement_view = { + "inventories": { + "0000:81:00.0": {self.PF_RC: 1}, + }, + "traits": { + "0000:81:00.0": [], + }, + "usages": { + "0000:81:00.0": {self.PF_RC: 0}, + }, + "allocations": {}, + } + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + + # Create an instance consuming the PF + extra_spec = {"pci_passthrough:alias": "a-pf:1"} + flavor_id = self._create_flavor(extra_spec=extra_spec) + server = self._create_server(flavor_id=flavor_id, networks=[]) + + self.assertPCIDeviceCounts("compute1", total=1, free=0) + compute1_expected_placement_view["usages"] = { + "0000:81:00.0": {self.PF_RC: 1}, + } + compute1_expected_placement_view["allocations"][server["id"]] = { + "0000:81:00.0": {self.PF_RC: 1}, + } + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + self._run_periodics() + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + + return server, compute1_expected_placement_view + + def test_device_reconfiguration_with_allocations_config_change_warn(self): + server, compute1_expected_placement_view = ( + self._create_one_compute_with_a_pf_consumed_by_an_instance()) + + # remove 0000:81:00.0 from the device spec and restart the compute + device_spec = self._to_device_spec_conf([]) + self.flags(group='pci', device_spec=device_spec) + # The PF is used but removed from the config. The PciTracker warns + # but keeps the device so the placement logic mimic this and only warns + # but keeps the RP and the allocation in placement intact. + self.restart_compute_service(hostname="compute1") + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + self._run_periodics() + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + # the warning from the PciTracker + self.assertIn( + "WARNING [nova.pci.manager] Unable to remove device with status " + "'allocated' and ownership %s because of PCI device " + "1:0000:81:00.0 is allocated instead of ['available', " + "'unavailable', 'unclaimable']. Check your [pci]device_spec " + "configuration to make sure this allocated device is whitelisted. " + "If you have removed the device from the whitelist intentionally " + "or the device is no longer available on the host you will need " + "to delete the server or migrate it to another host to silence " + "this warning." + % server['id'], + self.stdlog.logger.output, + ) + # the warning from the placement PCI tracking logic + self.assertIn( + "WARNING [nova.compute.pci_placement_translator] Device spec is " + "not found for device 0000:81:00.0 in [pci]device_spec. We are " + "skipping this devices during Placement update. The device is " + "allocated by %s. You should not remove an allocated device from " + "the configuration. Please restore the configuration or cold " + "migrate the instance to resolve the inconsistency." + % server['id'], + self.stdlog.logger.output, + ) + + def test_device_reconfiguration_with_allocations_config_change_stop(self): + self._create_one_compute_with_a_pf_consumed_by_an_instance() + + # switch 0000:81:00.0 PF to 0000:81:00.1 VF + # in the config, then restart the compute service + + # only match the VF now + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + "address": "0000:81:00.1", + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + # The compute fails to start as the new config would mean that the PF + # inventory is removed from the 0000:81:00.0 RP and the PF inventory is + # added instead there, but the VF inventory has allocations. Keeping + # the old inventory as in + # test_device_reconfiguration_with_allocations_config_change_warn is + # not an option as it would result in two resource class on the same RP + # one for the PF and one for the VF. That would allow consuming + # the same physical device twice. Such dependent device configuration + # is intentionally not supported so we are stopping the compute + # service. + ex = self.assertRaises( + exception.PlacementPciException, + self.restart_compute_service, + hostname="compute1" + ) + self.assertRegex( + str(ex), + "Failed to gather or report PCI resources to Placement: There was " + "a conflict when trying to complete your request.\n\n " + "update conflict: Inventory for 'CUSTOM_PCI_8086_1528' on " + "resource provider '.*' in use.", + ) + + def test_device_reconfiguration_with_allocations_hyp_change(self): + server, compute1_expected_placement_view = ( + self._create_one_compute_with_a_pf_consumed_by_an_instance()) + + # restart the compute but simulate that the device 0000:81:00.0 is + # removed from the hypervisor while the device spec config left + # intact. The PciTracker will notice this and log a warning. The + # placement tracking logic simply keeps the allocation intact in + # placement as both the PciDevice and the DeviceSpec is available. + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=0, num_vfs=0) + self.restart_compute_service( + hostname="compute1", + pci_info=pci_info, + keep_hypervisor_state=False + ) + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + self._run_periodics() + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + # the warning from the PciTracker + self.assertIn( + "WARNING [nova.pci.manager] Unable to remove device with status " + "'allocated' and ownership %s because of PCI device " + "1:0000:81:00.0 is allocated instead of ['available', " + "'unavailable', 'unclaimable']. Check your [pci]device_spec " + "configuration to make sure this allocated device is whitelisted. " + "If you have removed the device from the whitelist intentionally " + "or the device is no longer available on the host you will need " + "to delete the server or migrate it to another host to silence " + "this warning." + % server['id'], + self.stdlog.logger.output, + ) + def test_reporting_disabled_nothing_is_reported(self): # The fake libvirt will emulate on the host: # * one type-PCI in slot 0 @@ -765,32 +959,6 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): ) ) - # Pre-configure a PCI alias to consume our devs - alias_pci = { - "vendor_id": fakelibvirt.PCI_VEND_ID, - "product_id": fakelibvirt.PCI_PROD_ID, - "name": "a-pci-dev", - } - alias_pf = { - "vendor_id": fakelibvirt.PCI_VEND_ID, - "product_id": fakelibvirt.PF_PROD_ID, - "device_type": "type-PF", - "name": "a-pf", - } - alias_vf = { - "vendor_id": fakelibvirt.PCI_VEND_ID, - "product_id": fakelibvirt.VF_PROD_ID, - "device_type": "type-VF", - "name": "a-vf", - } - self.flags( - group='pci', - alias=self._to_pci_alias_conf([alias_pci, alias_pf, alias_vf])) - - @staticmethod - def _to_pci_alias_conf(alias_list): - return [jsonutils.dumps(x) for x in alias_list] - @staticmethod def _move_allocation(allocations, from_uuid, to_uuid): allocations[to_uuid] = allocations[from_uuid] diff --git a/nova/tests/unit/compute/test_pci_placement_translator.py b/nova/tests/unit/compute/test_pci_placement_translator.py index 23fa7e1b7eda..ee6a0469ac8a 100644 --- a/nova/tests/unit/compute/test_pci_placement_translator.py +++ b/nova/tests/unit/compute/test_pci_placement_translator.py @@ -50,6 +50,7 @@ class TestTranslator(test.NoDBTestCase): pci_device.PciDevice( address="0000:81:00.0", status=fields.PciDeviceStatus.AVAILABLE, + instance_uuid=None, ) ] ) diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index d31ec97f339e..a3b005b80f90 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -1960,6 +1960,35 @@ class TestUpdateComputeNode(BaseTestCase): upt = self.rt.reportclient.update_from_provider_tree upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None) + @mock.patch( + 'nova.compute.resource_tracker.ResourceTracker.' + '_sync_compute_service_disabled_trait', + new=mock.Mock() + ) + @mock.patch( + 'nova.compute.resource_tracker.ResourceTracker._resource_change', + new=mock.Mock(return_value=False) + ) + def test_update_pci_reporting_allocation_in_use_error_propagated(self): + """Assert that if the pci placement reporting code tries to remove + inventory with allocation from placement due to invalid hypervisor + or [pci]device_spec reconfiguration then the InventoryInUse error from + placement is propagated and makes the compute startup to fail. + """ + compute_obj = _COMPUTE_NODE_FIXTURES[0].obj_clone() + self._setup_rt() + self.rt.reportclient.update_from_provider_tree.side_effect = ( + exc.InventoryInUse( + resource_class="FOO", resource_provider="bar")) + + self.assertRaises( + exc.PlacementPciException, + self.rt._update, + mock.sentinel.ctx, + compute_obj, + startup=True, + ) + @mock.patch('nova.objects.Service.get_by_compute_host', return_value=objects.Service(disabled=True)) def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):