diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 33417f5058aa..174b7139530b 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2369,61 +2369,6 @@ class ComputeManager(manager.Manager): else: return None - def _update_pci_request_spec_with_allocated_interface_name( - self, context, instance, request_group_resource_providers_mapping): - if not instance.pci_requests: - return - - def needs_update(pci_request, mapping): - return (pci_request.requester_id and - pci_request.requester_id in mapping) - - modified = False - for pci_request in instance.pci_requests.requests: - if needs_update( - pci_request, request_group_resource_providers_mapping): - - provider_uuids = request_group_resource_providers_mapping[ - pci_request.requester_id] - - if len(provider_uuids) != 1: - reason = ( - 'Allocating resources from more than one resource ' - 'providers %(providers)s for a single pci request ' - '%(requester)s is not supported.' % - {'providers': provider_uuids, - 'requester': pci_request.requester_id}) - raise exception.BuildAbortException( - instance_uuid=instance.uuid, - reason=reason) - - dev_rp_name = self.reportclient.get_resource_provider_name( - context, - provider_uuids[0]) - - # NOTE(gibi): the device RP name reported by neutron is - # structured like :: - rp_name_pieces = dev_rp_name.split(':') - if len(rp_name_pieces) != 3: - reason = ( - 'Resource provider %(provider)s used to allocate ' - 'resources for the pci request %(requester)s does not ' - 'have properly formatted name. Expected name format ' - 'is ::, but got ' - '%(provider_name)s' % - {'provider': provider_uuids[0], - 'requester': pci_request.requester_id, - 'provider_name': dev_rp_name}) - raise exception.BuildAbortException( - instance_uuid=instance.uuid, - reason=reason) - - for spec in pci_request.spec: - spec['parent_ifname'] = rp_name_pieces[2] - modified = True - if modified: - instance.save() - def _build_and_run_instance(self, context, instance, image, injected_files, admin_password, requested_networks, security_groups, block_device_mapping, node, limits, filter_properties, @@ -2449,8 +2394,16 @@ class ComputeManager(manager.Manager): self._get_request_group_mapping(request_spec) if request_group_resource_providers_mapping: - self._update_pci_request_spec_with_allocated_interface_name( - context, instance, request_group_resource_providers_mapping) + try: + compute_utils\ + .update_pci_request_spec_with_allocated_interface_name( + context, self.reportclient, instance, + request_group_resource_providers_mapping) + except (exception.AmbiguousResourceProviderForPCIRequest, + exception.UnexpectedResourceProviderNameForPCIRequest + ) as e: + raise exception.BuildAbortException( + reason=six.text_type(e), instance_uuid=instance.uuid) # TODO(Luyao) cut over to get_allocs_for_consumer allocs = self.reportclient.get_allocations_for_consumer( @@ -3475,9 +3428,10 @@ class ComputeManager(manager.Manager): self._get_request_group_mapping(request_spec) if request_group_resource_providers_mapping: - self._update_pci_request_spec_with_allocated_interface_name( - context, instance, - request_group_resource_providers_mapping) + compute_utils.\ + update_pci_request_spec_with_allocated_interface_name( + context, self.reportclient, instance, + request_group_resource_providers_mapping) claim_context = rebuild_claim( context, instance, scheduled_node, allocations, @@ -4889,8 +4843,16 @@ class ComputeManager(manager.Manager): self._get_request_group_mapping(request_spec) if request_group_resource_providers_mapping: - self._update_pci_request_spec_with_allocated_interface_name( - context, instance, request_group_resource_providers_mapping) + try: + compute_utils.\ + update_pci_request_spec_with_allocated_interface_name( + context, self.reportclient, instance, + request_group_resource_providers_mapping) + except (exception.AmbiguousResourceProviderForPCIRequest, + exception.UnexpectedResourceProviderNameForPCIRequest + ) as e: + raise exception.BuildAbortException( + reason=six.text_type(e), instance_uuid=instance.uuid) limits = filter_properties.get('limits', {}) allocs = self.reportclient.get_allocations_for_consumer( @@ -4973,7 +4935,7 @@ class ComputeManager(manager.Manager): clean_shutdown) except exception.BuildAbortException: # NOTE(gibi): We failed - # _update_pci_request_spec_with_allocated_interface_name so + # update_pci_request_spec_with_allocated_interface_name so # there is no reason to re-schedule. Just revert the allocation # and fail the migration. with excutils.save_and_reraise_exception(): @@ -5103,7 +5065,7 @@ class ComputeManager(manager.Manager): 'host (%s).', self.host, instance=instance) self._send_prep_resize_notifications( ctxt, instance, fields.NotificationPhase.START, flavor) - # TODO(mriedem): _update_pci_request_spec_with_allocated_interface_name + # TODO(mriedem): update_pci_request_spec_with_allocated_interface_name # should be called here if the request spec has request group mappings, # e.g. for things like QoS ports with resource requests. Do it outside # the try/except so if it raises BuildAbortException we do not attempt diff --git a/nova/compute/utils.py b/nova/compute/utils.py index aac657ff9b7a..5a51d84ee409 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -1460,3 +1460,59 @@ def notify_about_instance_delete(notifier, context, instance, source=source, action=delete_type, phase=fields.NotificationPhase.END) + + +def update_pci_request_spec_with_allocated_interface_name( + context, report_client, instance, provider_mapping): + """Update the instance's PCI request based on the request group - + resource provider mapping and the device RP name from placement. + + :param context: the request context + :param report_client: a SchedulerReportClient instance + :param instance: an Instance object to be updated + :param provider_mapping: the request group - resource provider mapping + in the form returned by the RequestSpec.get_request_group_mapping() + call. + :raises AmbigousResourceProviderForPCIRequest: if more than one + resource provider provides resource for the given PCI request. + :raises UnexpectResourceProviderNameForPCIRequest: if the resource + provider, which provides resource for the pci request, does not + have a well formatted name so we cannot parse the parent interface + name out of it. + """ + if not instance.pci_requests: + return + + def needs_update(pci_request, mapping): + return (pci_request.requester_id and + pci_request.requester_id in mapping) + + modified = False + for pci_request in instance.pci_requests.requests: + if needs_update(pci_request, provider_mapping): + + provider_uuids = provider_mapping[pci_request.requester_id] + if len(provider_uuids) != 1: + raise exception.AmbiguousResourceProviderForPCIRequest( + providers=provider_uuids, + requester=pci_request.requester_id) + + dev_rp_name = report_client.get_resource_provider_name( + context, + provider_uuids[0]) + + # NOTE(gibi): the device RP name reported by neutron is + # structured like :: + rp_name_pieces = dev_rp_name.split(':') + if len(rp_name_pieces) != 3: + ex = exception.UnexpectedResourceProviderNameForPCIRequest + raise ex( + provider=provider_uuids[0], + requester=pci_request.requester_id, + provider_name=dev_rp_name) + + for spec in pci_request.spec: + spec['parent_ifname'] = rp_name_pieces[2] + modified = True + if modified: + instance.save() diff --git a/nova/exception.py b/nova/exception.py index c4f368dde99e..df97218dcc7e 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2492,3 +2492,17 @@ class VPMEMCleanupFailed(NovaException): class RequestGroupSuffixConflict(NovaException): msg_fmt = _("Duplicate request group suffix %(suffix)s!") + + +class AmbiguousResourceProviderForPCIRequest(NovaException): + msg_fmt = _("Allocating resources from multiple resource providers " + "%(providers)s for a single pci request %(requester)s is not " + "supported.") + + +class UnexpectedResourceProviderNameForPCIRequest(NovaException): + msg_fmt = _("Resource provider %(provider)s used to allocate resources " + "for the pci request %(requester)s does not have a properly " + "formatted name. Expected name format is " + "::, but got " + "%(provider_name)s") diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 4f7717e4c6c9..f46477864a58 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -6836,7 +6836,7 @@ class ServerMoveWithPortResourceRequestTest( def test_migrate_server_with_qos_port_pci_update_fail_not_reschedule(self): # Update the name of the network device RP of PF2 on host2 to something # unexpected. This will cause - # _update_pci_request_spec_with_allocated_interface_name() to raise + # update_pci_request_spec_with_allocated_interface_name() to raise # when the instance is migrated to the host2. rsp = self.placement_api.put( '/resource_providers/%s' @@ -6861,7 +6861,7 @@ class ServerMoveWithPortResourceRequestTest( qos_sriov_port, self.flavor_with_group_policy) # The compute manager on host2 will raise from - # _update_pci_request_spec_with_allocated_interface_name which will + # update_pci_request_spec_with_allocated_interface_name which will # intentionally not trigger a re-schedule even if there is host3 as an # alternate. self.api.post_server_action(server['id'], {'migrate': None}) @@ -7186,7 +7186,7 @@ class ServerMoveWithPortResourceRequestTest( def test_evacuate_with_qos_port_pci_update_fail(self): # Update the name of the network device RP of PF2 on host2 to something # unexpected. This will cause - # _update_pci_request_spec_with_allocated_interface_name() to raise + # update_pci_request_spec_with_allocated_interface_name() to raise # when the instance is evacuated to the host2. rsp = self.placement_api.put( '/resource_providers/%s' @@ -7212,7 +7212,7 @@ class ServerMoveWithPortResourceRequestTest( self.compute1_service_id, {'forced_down': 'true'}) # The compute manager on host2 will raise from - # _update_pci_request_spec_with_allocated_interface_name + # update_pci_request_spec_with_allocated_interface_name self.api.post_server_action(server['id'], {'evacuate': {}}) server = self._wait_for_server_parameter(server, {'OS-EXT-SRV-ATTR:host': 'host1', @@ -7221,7 +7221,7 @@ class ServerMoveWithPortResourceRequestTest( self._wait_for_migration_status(server, ['failed']) self.assertIn( - 'Build of instance %s aborted' % server['id'], + 'does not have a properly formatted name', server['fault']['message']) self._wait_for_action_fail_completion( diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 1006efc106d8..fab17e92ded2 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -12749,8 +12749,8 @@ class EvacuateHostTestCase(BaseTestCase): 'nova.compute.manager.ComputeManager._get_request_group_mapping', return_value=mock.sentinel.mapping) @mock.patch( - 'nova.compute.manager.ComputeManager' - '._update_pci_request_spec_with_allocated_interface_name') + 'nova.compute.utils.' + 'update_pci_request_spec_with_allocated_interface_name') @mock.patch('nova.compute.utils.notify_about_instance_action') @mock.patch('nova.compute.utils.notify_about_instance_rebuild') @mock.patch.object(network_api, 'setup_networks_on_host') @@ -12797,7 +12797,8 @@ class EvacuateHostTestCase(BaseTestCase): self.mock_get_allocs.assert_called_once_with(ctxt, self.inst.uuid) mock_update_pci_req.assert_called_once_with( - ctxt, self.inst, mock.sentinel.mapping) + ctxt, self.compute.reportclient, self.inst, + mock.sentinel.mapping) _test_rebuild(vm_is_stopped=vm_states_is_stopped) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index f9e96136df41..c6db03d28753 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -10022,8 +10022,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, @mock.patch('nova.compute.rpcapi.ComputeAPI.resize_instance') @mock.patch('nova.compute.resource_tracker.ResourceTracker.resize_claim') @mock.patch('nova.objects.Instance.save') - @mock.patch('nova.compute.manager.ComputeManager.' - '_update_pci_request_spec_with_allocated_interface_name') + @mock.patch('nova.compute.utils.' + 'update_pci_request_spec_with_allocated_interface_name') @mock.patch('nova.compute.utils.notify_usage_exists') @mock.patch('nova.compute.manager.ComputeManager.' '_notify_about_instance_usage') @@ -10047,7 +10047,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, clean_shutdown=True, migration=self.migration, host_list=[]) mock_update_pci.assert_called_once_with( - self.context, instance, {uuids.port_id: [uuids.rp_uuid]}) + self.context, self.compute.reportclient, instance, + {uuids.port_id: [uuids.rp_uuid]}) mock_save.assert_called_once_with() @mock.patch('nova.compute.manager.ComputeManager._revert_allocation') @@ -10055,8 +10056,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, @mock.patch('nova.compute.rpcapi.ComputeAPI.resize_instance') @mock.patch('nova.compute.resource_tracker.ResourceTracker.resize_claim') @mock.patch('nova.objects.Instance.save') - @mock.patch('nova.compute.manager.ComputeManager.' - '_update_pci_request_spec_with_allocated_interface_name') + @mock.patch('nova.compute.utils.' + 'update_pci_request_spec_with_allocated_interface_name') @mock.patch('nova.compute.utils.notify_usage_exists') @mock.patch('nova.compute.manager.ComputeManager.' '_notify_about_instance_usage')