From deada11da1954efd443b66905765aa77f343bc23 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <balazs.gibizer@est.tech>
Date: Thu, 28 Nov 2019 14:40:37 +0100
Subject: [PATCH] Move _update_pci_request_spec_with_allocated_interface_name

This patch moves _update_pci_request_spec_with_allocated_interface_name
from the ComputeManager to nova.compute.utils as a later patch needs to
call it from outside of the ComputeManager.

This patch also changes the exception types raised from
update_pci_request_spec_with_allocated_interface_name as raising
BuildAbortException from a util method does not make too much
sense. This means that some of the callers also needed to be adjusted to
handle the new exception types.

In the _do_rebuild_instance_with_claim code path the exception type is
not translated back to BuildAbortException as the old and the new
exceptions are both handled by the except Exception branch in
rebuild_instance [1].

[1] https://github.com/openstack/nova/blob/ed503ce3e1a8598c6e8ee9d8b345c1d92df01539/nova/compute/manager.py#L3448

Change-Id: I5853a0f6e133044ffc1861d21008e568ad62ffc7
blueprint: support-move-ops-with-qos-ports-ussuri
---
 nova/compute/manager.py                     | 90 ++++++---------------
 nova/compute/utils.py                       | 56 +++++++++++++
 nova/exception.py                           | 14 ++++
 nova/tests/functional/test_servers.py       | 10 +--
 nova/tests/unit/compute/test_compute.py     |  7 +-
 nova/tests/unit/compute/test_compute_mgr.py | 11 +--
 6 files changed, 111 insertions(+), 77 deletions(-)
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 33417f5058aa..174b7139530b 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -2369,61 +2369,6 @@ class ComputeManager(manager.Manager):
         else:
             return None
 
-    def _update_pci_request_spec_with_allocated_interface_name(
-            self, context, instance, request_group_resource_providers_mapping):
-        if not instance.pci_requests:
-            return
-
-        def needs_update(pci_request, mapping):
-            return (pci_request.requester_id and
-                    pci_request.requester_id in mapping)
-
-        modified = False
-        for pci_request in instance.pci_requests.requests:
-            if needs_update(
-                    pci_request, request_group_resource_providers_mapping):
-
-                provider_uuids = request_group_resource_providers_mapping[
-                    pci_request.requester_id]
-
-                if len(provider_uuids) != 1:
-                    reason = (
-                        'Allocating resources from more than one resource '
-                        'providers %(providers)s for a single pci request '
-                        '%(requester)s is not supported.' %
-                        {'providers': provider_uuids,
-                         'requester': pci_request.requester_id})
-                    raise exception.BuildAbortException(
-                        instance_uuid=instance.uuid,
-                        reason=reason)
-
-                dev_rp_name = self.reportclient.get_resource_provider_name(
-                    context,
-                    provider_uuids[0])
-
-                # NOTE(gibi): the device RP name reported by neutron is
-                # structured like <hostname>:<agentname>:<interfacename>
-                rp_name_pieces = dev_rp_name.split(':')
-                if len(rp_name_pieces) != 3:
-                    reason = (
-                        'Resource provider %(provider)s used to allocate '
-                        'resources for the pci request %(requester)s does not '
-                        'have properly formatted name. Expected name format '
-                        'is <hostname>:<agentname>:<interfacename>, but got '
-                        '%(provider_name)s' %
-                        {'provider': provider_uuids[0],
-                         'requester': pci_request.requester_id,
-                         'provider_name': dev_rp_name})
-                    raise exception.BuildAbortException(
-                        instance_uuid=instance.uuid,
-                        reason=reason)
-
-                for spec in pci_request.spec:
-                    spec['parent_ifname'] = rp_name_pieces[2]
-                    modified = True
-        if modified:
-            instance.save()
-
     def _build_and_run_instance(self, context, instance, image, injected_files,
             admin_password, requested_networks, security_groups,
             block_device_mapping, node, limits, filter_properties,
@@ -2449,8 +2394,16 @@ class ComputeManager(manager.Manager):
             self._get_request_group_mapping(request_spec)
 
         if request_group_resource_providers_mapping:
-            self._update_pci_request_spec_with_allocated_interface_name(
-                context, instance, request_group_resource_providers_mapping)
+            try:
+                compute_utils\
+                    .update_pci_request_spec_with_allocated_interface_name(
+                        context, self.reportclient, instance,
+                        request_group_resource_providers_mapping)
+            except (exception.AmbiguousResourceProviderForPCIRequest,
+                    exception.UnexpectedResourceProviderNameForPCIRequest
+                    ) as e:
+                raise exception.BuildAbortException(
+                    reason=six.text_type(e), instance_uuid=instance.uuid)
 
         # TODO(Luyao) cut over to get_allocs_for_consumer
         allocs = self.reportclient.get_allocations_for_consumer(
@@ -3475,9 +3428,10 @@ class ComputeManager(manager.Manager):
                 self._get_request_group_mapping(request_spec)
 
             if request_group_resource_providers_mapping:
-                self._update_pci_request_spec_with_allocated_interface_name(
-                    context, instance,
-                    request_group_resource_providers_mapping)
+                compute_utils.\
+                    update_pci_request_spec_with_allocated_interface_name(
+                        context, self.reportclient, instance,
+                        request_group_resource_providers_mapping)
 
         claim_context = rebuild_claim(
             context, instance, scheduled_node, allocations,
@@ -4889,8 +4843,16 @@ class ComputeManager(manager.Manager):
             self._get_request_group_mapping(request_spec)
 
         if request_group_resource_providers_mapping:
-            self._update_pci_request_spec_with_allocated_interface_name(
-                context, instance, request_group_resource_providers_mapping)
+            try:
+                compute_utils.\
+                    update_pci_request_spec_with_allocated_interface_name(
+                        context, self.reportclient, instance,
+                        request_group_resource_providers_mapping)
+            except (exception.AmbiguousResourceProviderForPCIRequest,
+                    exception.UnexpectedResourceProviderNameForPCIRequest
+                    ) as e:
+                raise exception.BuildAbortException(
+                    reason=six.text_type(e), instance_uuid=instance.uuid)
 
         limits = filter_properties.get('limits', {})
         allocs = self.reportclient.get_allocations_for_consumer(
@@ -4973,7 +4935,7 @@ class ComputeManager(manager.Manager):
                                   clean_shutdown)
             except exception.BuildAbortException:
                 # NOTE(gibi): We failed
-                # _update_pci_request_spec_with_allocated_interface_name so
+                # update_pci_request_spec_with_allocated_interface_name so
                 # there is no reason to re-schedule. Just revert the allocation
                 # and fail the migration.
                 with excutils.save_and_reraise_exception():
@@ -5103,7 +5065,7 @@ class ComputeManager(manager.Manager):
                   'host (%s).', self.host, instance=instance)
         self._send_prep_resize_notifications(
             ctxt, instance, fields.NotificationPhase.START, flavor)
-        # TODO(mriedem): _update_pci_request_spec_with_allocated_interface_name
+        # TODO(mriedem): update_pci_request_spec_with_allocated_interface_name
         # should be called here if the request spec has request group mappings,
         # e.g. for things like QoS ports with resource requests. Do it outside
         # the try/except so if it raises BuildAbortException we do not attempt
diff --git a/nova/compute/utils.py b/nova/compute/utils.py
index aac657ff9b7a..5a51d84ee409 100644
--- a/nova/compute/utils.py
+++ b/nova/compute/utils.py
@@ -1460,3 +1460,59 @@ def notify_about_instance_delete(notifier, context, instance,
                 source=source,
                 action=delete_type,
                 phase=fields.NotificationPhase.END)
+
+
+def update_pci_request_spec_with_allocated_interface_name(
+        context, report_client, instance, provider_mapping):
+    """Update the instance's PCI request based on the request group -
+    resource provider mapping and the device RP name from placement.
+
+    :param context: the request context
+    :param report_client: a SchedulerReportClient instance
+    :param instance: an Instance object to be updated
+    :param provider_mapping: the request group - resource provider mapping
+        in the form returned by the RequestSpec.get_request_group_mapping()
+        call.
+    :raises AmbigousResourceProviderForPCIRequest: if more than one
+        resource provider provides resource for the given PCI request.
+    :raises UnexpectResourceProviderNameForPCIRequest: if the resource
+        provider, which provides resource for the pci request, does not
+        have a well formatted name so we cannot parse the parent interface
+        name out of it.
+    """
+    if not instance.pci_requests:
+        return
+
+    def needs_update(pci_request, mapping):
+        return (pci_request.requester_id and
+                pci_request.requester_id in mapping)
+
+    modified = False
+    for pci_request in instance.pci_requests.requests:
+        if needs_update(pci_request, provider_mapping):
+
+            provider_uuids = provider_mapping[pci_request.requester_id]
+            if len(provider_uuids) != 1:
+                raise exception.AmbiguousResourceProviderForPCIRequest(
+                    providers=provider_uuids,
+                    requester=pci_request.requester_id)
+
+            dev_rp_name = report_client.get_resource_provider_name(
+                context,
+                provider_uuids[0])
+
+            # NOTE(gibi): the device RP name reported by neutron is
+            # structured like <hostname>:<agentname>:<interfacename>
+            rp_name_pieces = dev_rp_name.split(':')
+            if len(rp_name_pieces) != 3:
+                ex = exception.UnexpectedResourceProviderNameForPCIRequest
+                raise ex(
+                    provider=provider_uuids[0],
+                    requester=pci_request.requester_id,
+                    provider_name=dev_rp_name)
+
+            for spec in pci_request.spec:
+                spec['parent_ifname'] = rp_name_pieces[2]
+                modified = True
+    if modified:
+        instance.save()
diff --git a/nova/exception.py b/nova/exception.py
index ae936445bb63..562da5cfe4d6 100644
--- a/nova/exception.py
+++ b/nova/exception.py
@@ -2482,3 +2482,17 @@ class VPMEMCleanupFailed(NovaException):
 
 class RequestGroupSuffixConflict(NovaException):
     msg_fmt = _("Duplicate request group suffix %(suffix)s!")
+
+
+class AmbiguousResourceProviderForPCIRequest(NovaException):
+    msg_fmt = _("Allocating resources from multiple resource providers "
+                "%(providers)s for a single pci request %(requester)s is not "
+                "supported.")
+
+
+class UnexpectedResourceProviderNameForPCIRequest(NovaException):
+    msg_fmt = _("Resource provider %(provider)s used to allocate resources "
+                "for the pci request %(requester)s does not have a properly "
+                "formatted name. Expected name format is "
+                "<hostname>:<agentname>:<interfacename>, but got "
+                "%(provider_name)s")
diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py
index ec8849a2642d..24bdbbca9100 100644
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@@ -6854,7 +6854,7 @@ class ServerMoveWithPortResourceRequestTest(
     def test_migrate_server_with_qos_port_pci_update_fail_not_reschedule(self):
         # Update the name of the network device RP of PF2 on host2 to something
         # unexpected. This will cause
-        # _update_pci_request_spec_with_allocated_interface_name() to raise
+        # update_pci_request_spec_with_allocated_interface_name() to raise
         # when the instance is migrated to the host2.
         rsp = self.placement_api.put(
             '/resource_providers/%s'
@@ -6879,7 +6879,7 @@ class ServerMoveWithPortResourceRequestTest(
             qos_sriov_port, self.flavor_with_group_policy)
 
         # The compute manager on host2 will raise from
-        # _update_pci_request_spec_with_allocated_interface_name which will
+        # update_pci_request_spec_with_allocated_interface_name which will
         # intentionally not trigger a re-schedule even if there is host3 as an
         # alternate.
         self.api.post_server_action(server['id'], {'migrate': None})
@@ -7208,7 +7208,7 @@ class ServerMoveWithPortResourceRequestTest(
     def test_evacuate_with_qos_port_pci_update_fail(self):
         # Update the name of the network device RP of PF2 on host2 to something
         # unexpected. This will cause
-        # _update_pci_request_spec_with_allocated_interface_name() to raise
+        # update_pci_request_spec_with_allocated_interface_name() to raise
         # when the instance is evacuated to the host2.
         rsp = self.placement_api.put(
             '/resource_providers/%s'
@@ -7234,7 +7234,7 @@ class ServerMoveWithPortResourceRequestTest(
             self.compute1_service_id, {'forced_down': 'true'})
 
         # The compute manager on host2 will raise from
-        # _update_pci_request_spec_with_allocated_interface_name
+        # update_pci_request_spec_with_allocated_interface_name
         self.api.post_server_action(server['id'], {'evacuate': {}})
         server = self._wait_for_server_parameter(
             self.api, server,
@@ -7244,7 +7244,7 @@ class ServerMoveWithPortResourceRequestTest(
         self._wait_for_migration_status(server, ['failed'])
 
         self.assertIn(
-            'Build of instance %s aborted' % server['id'],
+            'does not have a properly formatted name',
             server['fault']['message'])
 
         self._wait_for_action_fail_completion(
diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py
index 1006efc106d8..fab17e92ded2 100644
--- a/nova/tests/unit/compute/test_compute.py
+++ b/nova/tests/unit/compute/test_compute.py
@@ -12749,8 +12749,8 @@ class EvacuateHostTestCase(BaseTestCase):
             'nova.compute.manager.ComputeManager._get_request_group_mapping',
             return_value=mock.sentinel.mapping)
         @mock.patch(
-            'nova.compute.manager.ComputeManager'
-            '._update_pci_request_spec_with_allocated_interface_name')
+            'nova.compute.utils.'
+            'update_pci_request_spec_with_allocated_interface_name')
         @mock.patch('nova.compute.utils.notify_about_instance_action')
         @mock.patch('nova.compute.utils.notify_about_instance_rebuild')
         @mock.patch.object(network_api, 'setup_networks_on_host')
@@ -12797,7 +12797,8 @@ class EvacuateHostTestCase(BaseTestCase):
             self.mock_get_allocs.assert_called_once_with(ctxt, self.inst.uuid)
 
             mock_update_pci_req.assert_called_once_with(
-                ctxt, self.inst, mock.sentinel.mapping)
+                ctxt, self.compute.reportclient, self.inst,
+                mock.sentinel.mapping)
 
         _test_rebuild(vm_is_stopped=vm_states_is_stopped)
 
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index f9e96136df41..c6db03d28753 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -10022,8 +10022,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
     @mock.patch('nova.compute.rpcapi.ComputeAPI.resize_instance')
     @mock.patch('nova.compute.resource_tracker.ResourceTracker.resize_claim')
     @mock.patch('nova.objects.Instance.save')
-    @mock.patch('nova.compute.manager.ComputeManager.'
-                '_update_pci_request_spec_with_allocated_interface_name')
+    @mock.patch('nova.compute.utils.'
+                'update_pci_request_spec_with_allocated_interface_name')
     @mock.patch('nova.compute.utils.notify_usage_exists')
     @mock.patch('nova.compute.manager.ComputeManager.'
                 '_notify_about_instance_usage')
@@ -10047,7 +10047,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
             clean_shutdown=True, migration=self.migration, host_list=[])
 
         mock_update_pci.assert_called_once_with(
-            self.context, instance, {uuids.port_id: [uuids.rp_uuid]})
+            self.context, self.compute.reportclient, instance,
+            {uuids.port_id: [uuids.rp_uuid]})
         mock_save.assert_called_once_with()
 
     @mock.patch('nova.compute.manager.ComputeManager._revert_allocation')
@@ -10055,8 +10056,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
     @mock.patch('nova.compute.rpcapi.ComputeAPI.resize_instance')
     @mock.patch('nova.compute.resource_tracker.ResourceTracker.resize_claim')
     @mock.patch('nova.objects.Instance.save')
-    @mock.patch('nova.compute.manager.ComputeManager.'
-                '_update_pci_request_spec_with_allocated_interface_name')
+    @mock.patch('nova.compute.utils.'
+                'update_pci_request_spec_with_allocated_interface_name')
     @mock.patch('nova.compute.utils.notify_usage_exists')
     @mock.patch('nova.compute.manager.ComputeManager.'
                 '_notify_about_instance_usage')