From 7daa3f59e29849ccd43a1275e38917842852ab75 Mon Sep 17 00:00:00 2001
From: Eric Fried <openstack@fried.cc>
Date: Mon, 2 Dec 2019 18:01:25 -0600
Subject: [PATCH] Use provider mappings from Placement (mostly)

fill_provider_mapping is used from *most* code paths where it's
necessary to associate RequestSpec.request_groups with the resource
providers that are satisfying them. (Specifically, all the code paths
where we have a Selection object available. More about that below.)

Prior to Placement microversion 1.34, the only way to do this mapping
was by reproducing much of the logic from GET /allocation_candidates
locally to reverse engineer the associations. This was incomplete,
imperfect, inefficient, and ugly. That workaround was nested in the call
from fill_provider_mapping to fill_provider_mapping_based_on_allocation.

Placement microversion 1.34 enhanced GET /allocation_candidates to
return these mappings [1], and Nova started using 1.34 as of [2], so
this commit makes fill_provider_mapping bypass
fill_provider_mapping_based_on_allocations completely.

We would love to get rid of the entire hack, but
fill_provider_mapping_based_on_allocation is still used from
finish_revert_resize to restore port bindings on a reverted migration.
And when reverting a migration, we don't have allocation candidates with
mappings, only the original source allocations. It is left to a future
patch to figure out how to get around this, conceivably by saving the
original mappings in the migration context.

[1] https://docs.openstack.org/placement/train/specs/train/implemented/placement-resource-provider-request-group-mapping-in-allocation-candidates.html
[2] I52499ff6639c1a5815a8557b22dd33106dcc386b

Related to blueprint: placement-resource-provider-request-group-mapping-in-allocation-candidates
Change-Id: I45e0b2b73f88b86a20bc70ddf4f9bb97c8ea8312
---
 nova/conductor/manager.py                     |  9 +--
 nova/conductor/tasks/migrate.py               |  6 +-
 nova/scheduler/utils.py                       | 38 ++++-----
 .../unit/conductor/tasks/test_migrate.py      | 38 +++------
 nova/tests/unit/conductor/test_conductor.py   | 79 ++++++++++---------
 nova/tests/unit/scheduler/test_utils.py       | 59 +++++++-------
 6 files changed, 109 insertions(+), 120 deletions(-)

diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
index a789afea5380..ba52b5723283 100644
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@@ -729,8 +729,7 @@ class ComputeTaskManager(base.Base):
                                 # moves the allocation of the instance to
                                 # another host
                                 scheduler_utils.fill_provider_mapping(
-                                    context, self.report_client, request_spec,
-                                    host)
+                                    request_spec, host)
                         except Exception as exc:
                             self._cleanup_when_reschedule_fails(
                                 context, instance, exc, legacy_request_spec,
@@ -1144,8 +1143,7 @@ class ComputeTaskManager(base.Base):
 
                     if recreate:
                         scheduler_utils.fill_provider_mapping(
-                            context, self.report_client, request_spec,
-                            selection)
+                            request_spec, selection)
 
                 except (exception.NoValidHost,
                         exception.UnsupportedPolicyException,
@@ -1531,8 +1529,7 @@ class ComputeTaskManager(base.Base):
             # allocations in the scheduler) for this instance, we may need to
             # map allocations to resource providers in the request spec.
             try:
-                scheduler_utils.fill_provider_mapping(
-                    context, self.report_client, request_spec, host)
+                scheduler_utils.fill_provider_mapping(request_spec, host)
             except Exception as exc:
                 # If anything failed here we need to cleanup and bail out.
                 with excutils.save_and_reraise_exception():
diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py
index e6e31eea9772..f60d6a51a01e 100644
--- a/nova/conductor/tasks/migrate.py
+++ b/nova/conductor/tasks/migrate.py
@@ -441,8 +441,7 @@ class MigrationTask(base.TaskBase):
         selection, self.host_list = self._get_host_supporting_request(
             selection_list)
 
-        scheduler_utils.fill_provider_mapping(
-            self.context, self.reportclient, self.request_spec, selection)
+        scheduler_utils.fill_provider_mapping(self.request_spec, selection)
         return selection
 
     def _reschedule(self):
@@ -479,8 +478,7 @@ class MigrationTask(base.TaskBase):
                     selection.allocation_request_version)
                 if host_available:
                     scheduler_utils.fill_provider_mapping(
-                        self.context, self.reportclient, self.request_spec,
-                        selection)
+                        self.request_spec, selection)
             else:
                 # Some deployments use different schedulers that do not
                 # use Placement, so they will not have an
diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py
index c7c429b37eb7..af637dd9ffb4 100644
--- a/nova/scheduler/utils.py
+++ b/nova/scheduler/utils.py
@@ -1214,23 +1214,10 @@ def get_weight_multiplier(host_state, multiplier_name, multiplier_config):
     return value
 
 
-def fill_provider_mapping(
-        context, report_client, request_spec, host_selection):
+def fill_provider_mapping(request_spec, host_selection):
     """Fills out the request group - resource provider mapping in the
     request spec.
 
-    This is a workaround as placement does not return which RP
-    fulfills which granular request group in the allocation candidate
-    request. There is a spec proposing a solution in placement:
-    https://review.opendev.org/#/c/597601/
-    When that spec is implemented then this function can be
-    replaced with a simpler code that copies the group - RP
-    mapping out from the Selection object returned by the scheduler's
-    select_destinations call.
-
-    :param context: The security context
-    :param report_client: SchedulerReportClient instance to be used to
-        communicate with placement
     :param request_spec: The RequestSpec object associated with the
         operation
     :param host_selection: The Selection object returned by the scheduler
@@ -1244,11 +1231,19 @@ def fill_provider_mapping(
     # allocations in placement but if request_spec.maps_requested_resources
     # is not empty and the scheduling succeeded then placement has to be
     # involved
-    ar = jsonutils.loads(host_selection.allocation_request)
-    allocs = ar['allocations']
+    mappings = jsonutils.loads(host_selection.allocation_request)['mappings']
 
-    fill_provider_mapping_based_on_allocation(
-        context, report_client, request_spec, allocs)
+    for request_group in request_spec.requested_resources:
+        # NOTE(efried): We can count on request_group.requester_id being set:
+        # - For groups from flavors, ResourceRequest.get_request_group sets it
+        #   to the group suffix.
+        # - For groups from other sources (e.g. ports, accelerators), it is
+        #   required to be set by ResourceRequest._add_request_group, and that
+        #   method uses it as the suffix.
+        # And we can count on mappings[requester_id] existing because each
+        # RequestGroup translated into a (replete - empties are disallowed by
+        # ResourceRequest._add_request_group) group fed to Placement.
+        request_group.provider_uuids = mappings[request_group.requester_id]
 
 
 def fill_provider_mapping_based_on_allocation(
@@ -1262,6 +1257,13 @@ def fill_provider_mapping_based_on_allocation(
     the mapping is calculated based on the allocation of the source host the
     move operation is reverting to.
 
+    This is a workaround as placement does not return which RP fulfills which
+    granular request group except in the allocation candidate request (because
+    request groups are ephemeral, only existing in the scope of that request).
+
+    .. todo:: Figure out a better way to preserve the mappings so we can get
+              rid of this workaround.
+
     :param context: The security context
     :param report_client: SchedulerReportClient instance to be used to
         communicate with placement
diff --git a/nova/tests/unit/conductor/tasks/test_migrate.py b/nova/tests/unit/conductor/tasks/test_migrate.py
index f76e00fbec6e..67ecc86c0d7a 100644
--- a/nova/tests/unit/conductor/tasks/test_migrate.py
+++ b/nova/tests/unit/conductor/tasks/test_migrate.py
@@ -125,15 +125,9 @@ class MigrationTaskTestCase(test.NoDBTestCase):
         cn_mock.side_effect = set_migration_uuid
 
         selection = self.host_lists[0][0]
-        with test.nested(
-                mock.patch.object(scheduler_utils,
-                                  'fill_provider_mapping'),
-                mock.patch.object(task, '_is_selected_host_in_source_cell',
-                                  return_value=same_cell)
-        ) as (fill_mock, _is_source_cell_mock):
+        with mock.patch.object(task, '_is_selected_host_in_source_cell',
+                               return_value=same_cell) as _is_source_cell_mock:
             task.execute()
-            fill_mock.assert_called_once_with(
-                task.context, task.reportclient, task.request_spec, selection)
             _is_source_cell_mock.assert_called_once_with(selection)
 
         self.ensure_network_metadata_mock.assert_called_once_with(
@@ -253,11 +247,9 @@ class MigrationTaskTestCase(test.NoDBTestCase):
         mock_get_resources.assert_called_once_with(
             self.context, self.instance.uuid)
 
-    @mock.patch.object(scheduler_utils, 'fill_provider_mapping')
     @mock.patch.object(scheduler_utils, 'claim_resources')
     @mock.patch.object(context.RequestContext, 'elevated')
-    def test_execute_reschedule(
-            self, mock_elevated, mock_claim, mock_fill_provider_mapping):
+    def test_execute_reschedule(self, mock_elevated, mock_claim):
         report_client = report.SchedulerReportClient()
         # setup task for re-schedule
         alloc_req = {
@@ -286,9 +278,6 @@ class MigrationTaskTestCase(test.NoDBTestCase):
         mock_claim.assert_called_once_with(
             mock_elevated.return_value, report_client, self.request_spec,
             self.instance.uuid, alloc_req, '1.19')
-        mock_fill_provider_mapping.assert_called_once_with(
-            self.context, report_client, self.request_spec,
-            alternate_selection)
 
     @mock.patch.object(scheduler_utils, 'fill_provider_mapping')
     @mock.patch.object(scheduler_utils, 'claim_resources')
@@ -724,14 +713,12 @@ class MigrationTaskTestCase(test.NoDBTestCase):
         ])
 
     @mock.patch.object(migrate.LOG, 'debug')
-    @mock.patch('nova.scheduler.utils.fill_provider_mapping')
     @mock.patch('nova.scheduler.utils.claim_resources')
     @mock.patch('nova.objects.Service.get_by_host_and_binary')
     def test_reschedule_old_compute_skipped(
-            self, mock_get_service, mock_claim_resources, mock_fill_mapping,
-            mock_debug):
+            self, mock_get_service, mock_claim_resources, mock_debug):
         self.request_spec.requested_resources = [
-            objects.RequestGroup()
+            objects.RequestGroup(requester_id=uuids.port1)
         ]
         task = self._generate_task()
         resources = {
@@ -745,15 +732,17 @@ class MigrationTaskTestCase(test.NoDBTestCase):
             nodename="node1",
             cell_uuid=uuids.cell1,
             allocation_request=jsonutils.dumps(
-                {"allocations": {uuids.host1: resources}}),
-            allocation_request_version='1.19')
+                {"allocations": {uuids.host1: resources},
+                 "mappings": {uuids.port1: [uuids.host1]}}),
+            allocation_request_version='1.34')
         second = objects.Selection(
             service_host="host2",
             nodename="node2",
             cell_uuid=uuids.cell1,
             allocation_request=jsonutils.dumps(
-                {"allocations": {uuids.host2: resources}}),
-            allocation_request_version='1.19')
+                {"allocations": {uuids.host2: resources},
+                 "mappings": {uuids.port1: [uuids.host2]}}),
+            allocation_request_version='1.34')
 
         first_service = objects.Service(service_host='host1')
         first_service.version = 38
@@ -775,9 +764,8 @@ class MigrationTaskTestCase(test.NoDBTestCase):
         mock_claim_resources.assert_called_once_with(
             self.context.elevated(), task.reportclient, task.request_spec,
             self.instance.uuid,
-            {"allocations": {uuids.host2: resources}}, '1.19')
-        mock_fill_mapping.assert_called_once_with(
-            task.context, task.reportclient, task.request_spec, second)
+            {"allocations": {uuids.host2: resources},
+             "mappings": {uuids.port1: [uuids.host2]}}, '1.34')
         mock_debug.assert_has_calls([
             mock.call(
                 'Scheduler returned alternate host %(host)s as a possible '
diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py
index 9c100661f56b..6b43cf8645b9 100644
--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@@ -48,7 +48,6 @@ from nova.objects import base as obj_base
 from nova.objects import block_device as block_device_obj
 from nova.objects import fields
 from nova.scheduler.client import query
-from nova.scheduler.client import report
 from nova.scheduler import utils as scheduler_utils
 from nova import test
 from nova.tests import fixtures
@@ -68,24 +67,42 @@ from nova.volume import cinder
 CONF = conf.CONF
 
 
-fake_alloc1 = {"allocations": {
-    uuids.host1: {
-         "resources": {"VCPU": 1,
-                       "MEMORY_MB": 1024,
-                       "DISK_GB": 100}
-    }}}
-fake_alloc2 = {"allocations": {
-    uuids.host2: {
-         "resources": {"VCPU": 1,
-                       "MEMORY_MB": 1024,
-                       "DISK_GB": 100}
-    }}}
-fake_alloc3 = {"allocations": {
-    uuids.host3: {
-         "resources": {"VCPU": 1,
-                       "MEMORY_MB": 1024,
-                       "DISK_GB": 100}
-    }}}
+fake_alloc1 = {
+    "allocations": {
+        uuids.host1: {
+             "resources": {"VCPU": 1,
+                           "MEMORY_MB": 1024,
+                           "DISK_GB": 100}
+        }
+    },
+    "mappings": {
+        uuids.port1: [uuids.host1]
+    }
+}
+fake_alloc2 = {
+    "allocations": {
+        uuids.host2: {
+             "resources": {"VCPU": 1,
+                           "MEMORY_MB": 1024,
+                           "DISK_GB": 100}
+        }
+    },
+    "mappings": {
+        uuids.port1: [uuids.host2]
+    }
+}
+fake_alloc3 = {
+    "allocations": {
+        uuids.host3: {
+             "resources": {"VCPU": 1,
+                           "MEMORY_MB": 1024,
+                           "DISK_GB": 100}
+        }
+    },
+    "mappings": {
+        uuids.port1: [uuids.host3]
+    }
+}
 fake_alloc_json1 = jsonutils.dumps(fake_alloc1)
 fake_alloc_json2 = jsonutils.dumps(fake_alloc2)
 fake_alloc_json3 = jsonutils.dumps(fake_alloc3)
@@ -1028,11 +1045,9 @@ class _BaseTaskTestCase(object):
                 host_list=expected_build_run_host_list)
 
             mock_rp_mapping.assert_called_once_with(
-                self.context,
-                test.MatchType(report.SchedulerReportClient),
                 test.MatchType(objects.RequestSpec),
                 test.MatchType(objects.Selection))
-            actual_request_spec = mock_rp_mapping.mock_calls[0][1][2]
+            actual_request_spec = mock_rp_mapping.mock_calls[0][1][0]
             self.assertEqual(
                 rg1.resources,
                 actual_request_spec.requested_resources[0].resources)
@@ -1113,11 +1128,9 @@ class _BaseTaskTestCase(object):
 
             # called only once when the claim succeeded
             mock_rp_mapping.assert_called_once_with(
-                self.context,
-                test.MatchType(report.SchedulerReportClient),
                 test.MatchType(objects.RequestSpec),
                 test.MatchType(objects.Selection))
-            actual_request_spec = mock_rp_mapping.mock_calls[0][1][2]
+            actual_request_spec = mock_rp_mapping.mock_calls[0][1][0]
             self.assertEqual(
                 rg1.resources,
                 actual_request_spec.requested_resources[0].resources)
@@ -1750,7 +1763,6 @@ class _BaseTaskTestCase(object):
             get_req_res_mock.assert_called_once_with(
                 self.context, inst_obj.uuid)
             fill_rp_mapping_mock.assert_called_once_with(
-                self.context, self.conductor_manager.report_client,
                 fake_spec, fake_selection)
 
         self.assertEqual('compute.instance.rebuild.scheduled',
@@ -2291,24 +2303,13 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
 
         self.assertTrue(mock_build.called)
 
-    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
-                'get_provider_traits')
-    @mock.patch('nova.objects.request_spec.RequestSpec.'
-                'map_requested_resources_to_providers')
-    def test_schedule_and_build_instances_fill_request_spec(
-            self, mock_map, mock_traits):
+    def test_schedule_and_build_instances_fill_request_spec(self):
         # makes sure there is some request group in the spec to be mapped
         self.params['request_specs'][0].requested_resources = [
-            objects.RequestGroup()]
-
-        mock_traits.return_value.traits = ['TRAIT1']
+            objects.RequestGroup(requester_id=uuids.port1)]
 
         self._do_schedule_and_build_instances_test(self.params)
 
-        mock_map.assert_called_once_with(fake_alloc1['allocations'],
-                                         {uuids.host1: ['TRAIT1']})
-        mock_traits.assert_called_once_with(mock.ANY, uuids.host1)
-
     @mock.patch('nova.conductor.manager.ComputeTaskManager.'
                 '_cleanup_build_artifacts')
     @mock.patch('nova.scheduler.utils.'
diff --git a/nova/tests/unit/scheduler/test_utils.py b/nova/tests/unit/scheduler/test_utils.py
index d9a6327a75f7..e3a999332c89 100644
--- a/nova/tests/unit/scheduler/test_utils.py
+++ b/nova/tests/unit/scheduler/test_utils.py
@@ -1371,45 +1371,48 @@ class TestUtils(TestUtilsBase):
             utils.get_weight_multiplier(host1, 'cpu_weight_multiplier', 1.0)
         )
 
-    @mock.patch('nova.scheduler.utils.'
-                'fill_provider_mapping_based_on_allocation')
-    def test_fill_provider_mapping_returns_early_if_nothing_to_do(
-            self, mock_fill_provider):
-        context = nova_context.RequestContext()
+    def _set_up_and_fill_provider_mapping(self, requested_resources):
         request_spec = objects.RequestSpec()
-        # set up the request that there is nothing to do
-        request_spec.requested_resources = []
-        report_client = mock.sentinel.report_client
-        selection = objects.Selection()
-
-        utils.fill_provider_mapping(
-            context, report_client, request_spec, selection)
-
-        mock_fill_provider.assert_not_called()
-
-    @mock.patch('nova.scheduler.utils.'
-                'fill_provider_mapping_based_on_allocation')
-    def test_fill_provider_mapping(self, mock_fill_provider):
-        context = nova_context.RequestContext()
-        request_spec = objects.RequestSpec()
-        request_spec.requested_resources = [objects.RequestGroup()]
-        report_client = mock.sentinel.report_client
+        request_spec.requested_resources = requested_resources
         allocs = {
-            uuids.rp_uuid: {
+            uuids.rp_uuid1: {
                 'resources': {
                     'NET_BW_EGR_KILOBIT_PER_SEC': 1,
                 }
+            },
+            uuids.rp_uuid2: {
+                'resources': {
+                    'NET_BW_INGR_KILOBIT_PER_SEC': 1,
+                }
             }
         }
-        allocation_req = {'allocations': allocs}
+        mappings = {
+            uuids.port_id1: [uuids.rp_uuid2],
+            uuids.port_id2: [uuids.rp_uuid1],
+        }
+        allocation_req = {'allocations': allocs, 'mappings': mappings}
         selection = objects.Selection(
             allocation_request=jsonutils.dumps(allocation_req))
 
-        utils.fill_provider_mapping(
-            context, report_client, request_spec, selection)
+        # Unmapped initially
+        for rg in requested_resources:
+            self.assertEqual([], rg.provider_uuids)
 
-        mock_fill_provider.assert_called_once_with(
-            context, report_client, request_spec, allocs)
+        utils.fill_provider_mapping(request_spec, selection)
+
+    def test_fill_provider_mapping(self):
+        rg1 = objects.RequestGroup(requester_id=uuids.port_id1)
+        rg2 = objects.RequestGroup(requester_id=uuids.port_id2)
+        self._set_up_and_fill_provider_mapping([rg1, rg2])
+
+        # Validate the mappings
+        self.assertEqual([uuids.rp_uuid2], rg1.provider_uuids)
+        self.assertEqual([uuids.rp_uuid1], rg2.provider_uuids)
+
+    def test_fill_provider_mapping_no_op(self):
+        # This just proves that having 'mappings' in the allocation request
+        # doesn't break anything.
+        self._set_up_and_fill_provider_mapping([])
 
     @mock.patch.object(objects.RequestSpec,
                        'map_requested_resources_to_providers')