From deada11da1954efd443b66905765aa77f343bc23 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Thu, 28 Nov 2019 14:40:37 +0100 Subject: [PATCH] Move _update_pci_request_spec_with_allocated_interface_name This patch moves _update_pci_request_spec_with_allocated_interface_name from the ComputeManager to nova.compute.utils as a later patch needs to call it from outside of the ComputeManager. This patch also changes the exception types raised from update_pci_request_spec_with_allocated_interface_name as raising BuildAbortException from a util method does not make too much sense. This means that some of the callers also needed to be adjusted to handle the new exception types. In the _do_rebuild_instance_with_claim code path the exception type is not translated back to BuildAbortException as the old and the new exceptions are both handled by the except Exception branch in rebuild_instance [1]. [1] https://github.com/openstack/nova/blob/ed503ce3e1a8598c6e8ee9d8b345c1d92df01539/nova/compute/manager.py#L3448 Change-Id: I5853a0f6e133044ffc1861d21008e568ad62ffc7 blueprint: support-move-ops-with-qos-ports-ussuri --- nova/compute/manager.py | 90 ++++++--------------- nova/compute/utils.py | 56 +++++++++++++ nova/exception.py | 14 ++++ nova/tests/functional/test_servers.py | 10 +-- nova/tests/unit/compute/test_compute.py | 7 +- nova/tests/unit/compute/test_compute_mgr.py | 11 +-- 6 files changed, 111 insertions(+), 77 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 33417f5058aa..174b7139530b 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2369,61 +2369,6 @@ class ComputeManager(manager.Manager): else: return None - def _update_pci_request_spec_with_allocated_interface_name( - self, context, instance, request_group_resource_providers_mapping): - if not instance.pci_requests: - return - - def needs_update(pci_request, mapping): - return (pci_request.requester_id and - pci_request.requester_id in mapping) - - modified = False - for pci_request in instance.pci_requests.requests: - if needs_update( - pci_request, request_group_resource_providers_mapping): - - provider_uuids = request_group_resource_providers_mapping[ - pci_request.requester_id] - - if len(provider_uuids) != 1: - reason = ( - 'Allocating resources from more than one resource ' - 'providers %(providers)s for a single pci request ' - '%(requester)s is not supported.' % - {'providers': provider_uuids, - 'requester': pci_request.requester_id}) - raise exception.BuildAbortException( - instance_uuid=instance.uuid, - reason=reason) - - dev_rp_name = self.reportclient.get_resource_provider_name( - context, - provider_uuids[0]) - - # NOTE(gibi): the device RP name reported by neutron is - # structured like :: - rp_name_pieces = dev_rp_name.split(':') - if len(rp_name_pieces) != 3: - reason = ( - 'Resource provider %(provider)s used to allocate ' - 'resources for the pci request %(requester)s does not ' - 'have properly formatted name. Expected name format ' - 'is ::, but got ' - '%(provider_name)s' % - {'provider': provider_uuids[0], - 'requester': pci_request.requester_id, - 'provider_name': dev_rp_name}) - raise exception.BuildAbortException( - instance_uuid=instance.uuid, - reason=reason) - - for spec in pci_request.spec: - spec['parent_ifname'] = rp_name_pieces[2] - modified = True - if modified: - instance.save() - def _build_and_run_instance(self, context, instance, image, injected_files, admin_password, requested_networks, security_groups, block_device_mapping, node, limits, filter_properties, @@ -2449,8 +2394,16 @@ class ComputeManager(manager.Manager): self._get_request_group_mapping(request_spec) if request_group_resource_providers_mapping: - self._update_pci_request_spec_with_allocated_interface_name( - context, instance, request_group_resource_providers_mapping) + try: + compute_utils\ + .update_pci_request_spec_with_allocated_interface_name( + context, self.reportclient, instance, + request_group_resource_providers_mapping) + except (exception.AmbiguousResourceProviderForPCIRequest, + exception.UnexpectedResourceProviderNameForPCIRequest + ) as e: + raise exception.BuildAbortException( + reason=six.text_type(e), instance_uuid=instance.uuid) # TODO(Luyao) cut over to get_allocs_for_consumer allocs = self.reportclient.get_allocations_for_consumer( @@ -3475,9 +3428,10 @@ class ComputeManager(manager.Manager): self._get_request_group_mapping(request_spec) if request_group_resource_providers_mapping: - self._update_pci_request_spec_with_allocated_interface_name( - context, instance, - request_group_resource_providers_mapping) + compute_utils.\ + update_pci_request_spec_with_allocated_interface_name( + context, self.reportclient, instance, + request_group_resource_providers_mapping) claim_context = rebuild_claim( context, instance, scheduled_node, allocations, @@ -4889,8 +4843,16 @@ class ComputeManager(manager.Manager): self._get_request_group_mapping(request_spec) if request_group_resource_providers_mapping: - self._update_pci_request_spec_with_allocated_interface_name( - context, instance, request_group_resource_providers_mapping) + try: + compute_utils.\ + update_pci_request_spec_with_allocated_interface_name( + context, self.reportclient, instance, + request_group_resource_providers_mapping) + except (exception.AmbiguousResourceProviderForPCIRequest, + exception.UnexpectedResourceProviderNameForPCIRequest + ) as e: + raise exception.BuildAbortException( + reason=six.text_type(e), instance_uuid=instance.uuid) limits = filter_properties.get('limits', {}) allocs = self.reportclient.get_allocations_for_consumer( @@ -4973,7 +4935,7 @@ class ComputeManager(manager.Manager): clean_shutdown) except exception.BuildAbortException: # NOTE(gibi): We failed - # _update_pci_request_spec_with_allocated_interface_name so + # update_pci_request_spec_with_allocated_interface_name so # there is no reason to re-schedule. Just revert the allocation # and fail the migration. with excutils.save_and_reraise_exception(): @@ -5103,7 +5065,7 @@ class ComputeManager(manager.Manager): 'host (%s).', self.host, instance=instance) self._send_prep_resize_notifications( ctxt, instance, fields.NotificationPhase.START, flavor) - # TODO(mriedem): _update_pci_request_spec_with_allocated_interface_name + # TODO(mriedem): update_pci_request_spec_with_allocated_interface_name # should be called here if the request spec has request group mappings, # e.g. for things like QoS ports with resource requests. Do it outside # the try/except so if it raises BuildAbortException we do not attempt diff --git a/nova/compute/utils.py b/nova/compute/utils.py index aac657ff9b7a..5a51d84ee409 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -1460,3 +1460,59 @@ def notify_about_instance_delete(notifier, context, instance, source=source, action=delete_type, phase=fields.NotificationPhase.END) + + +def update_pci_request_spec_with_allocated_interface_name( + context, report_client, instance, provider_mapping): + """Update the instance's PCI request based on the request group - + resource provider mapping and the device RP name from placement. + + :param context: the request context + :param report_client: a SchedulerReportClient instance + :param instance: an Instance object to be updated + :param provider_mapping: the request group - resource provider mapping + in the form returned by the RequestSpec.get_request_group_mapping() + call. + :raises AmbigousResourceProviderForPCIRequest: if more than one + resource provider provides resource for the given PCI request. + :raises UnexpectResourceProviderNameForPCIRequest: if the resource + provider, which provides resource for the pci request, does not + have a well formatted name so we cannot parse the parent interface + name out of it. + """ + if not instance.pci_requests: + return + + def needs_update(pci_request, mapping): + return (pci_request.requester_id and + pci_request.requester_id in mapping) + + modified = False + for pci_request in instance.pci_requests.requests: + if needs_update(pci_request, provider_mapping): + + provider_uuids = provider_mapping[pci_request.requester_id] + if len(provider_uuids) != 1: + raise exception.AmbiguousResourceProviderForPCIRequest( + providers=provider_uuids, + requester=pci_request.requester_id) + + dev_rp_name = report_client.get_resource_provider_name( + context, + provider_uuids[0]) + + # NOTE(gibi): the device RP name reported by neutron is + # structured like :: + rp_name_pieces = dev_rp_name.split(':') + if len(rp_name_pieces) != 3: + ex = exception.UnexpectedResourceProviderNameForPCIRequest + raise ex( + provider=provider_uuids[0], + requester=pci_request.requester_id, + provider_name=dev_rp_name) + + for spec in pci_request.spec: + spec['parent_ifname'] = rp_name_pieces[2] + modified = True + if modified: + instance.save() diff --git a/nova/exception.py b/nova/exception.py index ae936445bb63..562da5cfe4d6 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2482,3 +2482,17 @@ class VPMEMCleanupFailed(NovaException): class RequestGroupSuffixConflict(NovaException): msg_fmt = _("Duplicate request group suffix %(suffix)s!") + + +class AmbiguousResourceProviderForPCIRequest(NovaException): + msg_fmt = _("Allocating resources from multiple resource providers " + "%(providers)s for a single pci request %(requester)s is not " + "supported.") + + +class UnexpectedResourceProviderNameForPCIRequest(NovaException): + msg_fmt = _("Resource provider %(provider)s used to allocate resources " + "for the pci request %(requester)s does not have a properly " + "formatted name. Expected name format is " + "::, but got " + "%(provider_name)s") diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index ec8849a2642d..24bdbbca9100 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -6854,7 +6854,7 @@ class ServerMoveWithPortResourceRequestTest( def test_migrate_server_with_qos_port_pci_update_fail_not_reschedule(self): # Update the name of the network device RP of PF2 on host2 to something # unexpected. This will cause - # _update_pci_request_spec_with_allocated_interface_name() to raise + # update_pci_request_spec_with_allocated_interface_name() to raise # when the instance is migrated to the host2. rsp = self.placement_api.put( '/resource_providers/%s' @@ -6879,7 +6879,7 @@ class ServerMoveWithPortResourceRequestTest( qos_sriov_port, self.flavor_with_group_policy) # The compute manager on host2 will raise from - # _update_pci_request_spec_with_allocated_interface_name which will + # update_pci_request_spec_with_allocated_interface_name which will # intentionally not trigger a re-schedule even if there is host3 as an # alternate. self.api.post_server_action(server['id'], {'migrate': None}) @@ -7208,7 +7208,7 @@ class ServerMoveWithPortResourceRequestTest( def test_evacuate_with_qos_port_pci_update_fail(self): # Update the name of the network device RP of PF2 on host2 to something # unexpected. This will cause - # _update_pci_request_spec_with_allocated_interface_name() to raise + # update_pci_request_spec_with_allocated_interface_name() to raise # when the instance is evacuated to the host2. rsp = self.placement_api.put( '/resource_providers/%s' @@ -7234,7 +7234,7 @@ class ServerMoveWithPortResourceRequestTest( self.compute1_service_id, {'forced_down': 'true'}) # The compute manager on host2 will raise from - # _update_pci_request_spec_with_allocated_interface_name + # update_pci_request_spec_with_allocated_interface_name self.api.post_server_action(server['id'], {'evacuate': {}}) server = self._wait_for_server_parameter( self.api, server, @@ -7244,7 +7244,7 @@ class ServerMoveWithPortResourceRequestTest( self._wait_for_migration_status(server, ['failed']) self.assertIn( - 'Build of instance %s aborted' % server['id'], + 'does not have a properly formatted name', server['fault']['message']) self._wait_for_action_fail_completion( diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 1006efc106d8..fab17e92ded2 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -12749,8 +12749,8 @@ class EvacuateHostTestCase(BaseTestCase): 'nova.compute.manager.ComputeManager._get_request_group_mapping', return_value=mock.sentinel.mapping) @mock.patch( - 'nova.compute.manager.ComputeManager' - '._update_pci_request_spec_with_allocated_interface_name') + 'nova.compute.utils.' + 'update_pci_request_spec_with_allocated_interface_name') @mock.patch('nova.compute.utils.notify_about_instance_action') @mock.patch('nova.compute.utils.notify_about_instance_rebuild') @mock.patch.object(network_api, 'setup_networks_on_host') @@ -12797,7 +12797,8 @@ class EvacuateHostTestCase(BaseTestCase): self.mock_get_allocs.assert_called_once_with(ctxt, self.inst.uuid) mock_update_pci_req.assert_called_once_with( - ctxt, self.inst, mock.sentinel.mapping) + ctxt, self.compute.reportclient, self.inst, + mock.sentinel.mapping) _test_rebuild(vm_is_stopped=vm_states_is_stopped) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index f9e96136df41..c6db03d28753 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -10022,8 +10022,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, @mock.patch('nova.compute.rpcapi.ComputeAPI.resize_instance') @mock.patch('nova.compute.resource_tracker.ResourceTracker.resize_claim') @mock.patch('nova.objects.Instance.save') - @mock.patch('nova.compute.manager.ComputeManager.' - '_update_pci_request_spec_with_allocated_interface_name') + @mock.patch('nova.compute.utils.' + 'update_pci_request_spec_with_allocated_interface_name') @mock.patch('nova.compute.utils.notify_usage_exists') @mock.patch('nova.compute.manager.ComputeManager.' '_notify_about_instance_usage') @@ -10047,7 +10047,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, clean_shutdown=True, migration=self.migration, host_list=[]) mock_update_pci.assert_called_once_with( - self.context, instance, {uuids.port_id: [uuids.rp_uuid]}) + self.context, self.compute.reportclient, instance, + {uuids.port_id: [uuids.rp_uuid]}) mock_save.assert_called_once_with() @mock.patch('nova.compute.manager.ComputeManager._revert_allocation') @@ -10055,8 +10056,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, @mock.patch('nova.compute.rpcapi.ComputeAPI.resize_instance') @mock.patch('nova.compute.resource_tracker.ResourceTracker.resize_claim') @mock.patch('nova.objects.Instance.save') - @mock.patch('nova.compute.manager.ComputeManager.' - '_update_pci_request_spec_with_allocated_interface_name') + @mock.patch('nova.compute.utils.' + 'update_pci_request_spec_with_allocated_interface_name') @mock.patch('nova.compute.utils.notify_usage_exists') @mock.patch('nova.compute.manager.ComputeManager.' '_notify_about_instance_usage')