From bc57a916c4045a387c69f327eb3a9e74be5b1a33 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <balazs.gibizer@ericsson.com>
Date: Wed, 21 Nov 2018 17:06:24 +0100
Subject: [PATCH] Recalculate request group - RP mapping during re-schedule

Re-scheduling uses an alternative allocation candidate from placement to
get resource for the server. This also means that the previously
calculated request group - resource provider mapping is not valid any
more and needs to be recalculated based on the new allocation of the
server.

blueprint bandwidth-resource-provider

Change-Id: If8a13f74d2b3c99f05365eb49dcfa01d9042fefa
---
 nova/compute/manager.py                     |  4 +-
 nova/conductor/manager.py                   | 30 ++++++---
 nova/tests/unit/conductor/test_conductor.py | 75 +++++++++++++++++++++
 3 files changed, 100 insertions(+), 9 deletions(-)

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 14d44e1a1308..b9770acc1f04 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -2125,7 +2125,9 @@ class ComputeManager(manager.Manager):
                                                      scheduler_hints)
                 image_meta = objects.ImageMeta.from_dict(image)
 
-                if request_spec:
+                if (request_spec
+                        and 'requested_resources' in request_spec
+                        and request_spec.requested_resources is not None):
                     request_group_resource_providers_mapping = {
                         group.requester_id: group.provider_uuids
                         for group in request_spec.requested_resources
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
index b834b01139e3..e6f63ea5acee 100644
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@@ -567,7 +567,7 @@ class ComputeTaskManager(base.Base):
         # will the API send the request_spec if using cells v1, so we need
         # to check and build our own if one is not provided.
         if request_spec is None:
-            request_spec = scheduler_utils.build_request_spec(
+            legacy_request_spec = scheduler_utils.build_request_spec(
                 image, instances)
         else:
             # TODO(mriedem): This is annoying but to populate the local
@@ -576,7 +576,10 @@ class ComputeTaskManager(base.Base):
             # and we can remove it once the populate_retry and
             # populate_filter_properties utility methods are converted to
             # work on a RequestSpec object rather than filter_properties.
-            request_spec = request_spec.to_legacy_request_spec_dict()
+            # NOTE(gibi): we have to keep a reference to the original
+            # RequestSpec object passed to this function as we lose information
+            # during the below legacy conversion
+            legacy_request_spec = request_spec.to_legacy_request_spec_dict()
 
         # 'host_lists' will be None in one of two cases: when running cellsv1,
         # or during a reschedule from a pre-Queens compute. In all other cases,
@@ -593,7 +596,7 @@ class ComputeTaskManager(base.Base):
                 filter_properties, instances[0].uuid)
             instance_uuids = [instance.uuid for instance in instances]
             spec_obj = objects.RequestSpec.from_primitives(
-                    context, request_spec, filter_properties)
+                    context, legacy_request_spec, filter_properties)
             LOG.debug("Rescheduling: %s", is_reschedule)
             if is_reschedule:
                 # Make sure that we have a host, as we may have exhausted all
@@ -629,7 +632,7 @@ class ComputeTaskManager(base.Base):
             for instance in instances:
                 self._set_vm_state_and_notify(
                     context, instance.uuid, 'build_instances', updates,
-                    exc, request_spec)
+                    exc, legacy_request_spec)
                 # If num_attempts > 1, we're in a reschedule and probably
                 # either hit NoValidHost or MaxRetriesExceeded. Either way,
                 # the build request should already be gone and we probably
@@ -668,6 +671,12 @@ class ComputeTaskManager(base.Base):
                                 elevated, self.report_client, spec_obj,
                                 instance.uuid, alloc_req,
                                 host.allocation_request_version)
+                        if request_spec:
+                            # NOTE(gibi): redo the request group - resource
+                            # provider mapping as the above claim call moves
+                            # the allocation of the instance to another host
+                            self._fill_provider_mapping(
+                                context, instance.uuid, request_spec)
                     else:
                         # Some deployments use different schedulers that do not
                         # use Placement, so they will not have an
@@ -704,11 +713,16 @@ class ComputeTaskManager(base.Base):
             # could have come from a compute via reschedule and it would
             # already have some things set, like scheduler_hints.
             local_reqspec = objects.RequestSpec.from_primitives(
-                context, request_spec, local_filter_props)
+                context, legacy_request_spec, local_filter_props)
 
-            # TODO(gibi): make sure that the request group - provider mapping
-            # is updated in the request spec during reschedule
-            local_reqspec.requested_resources = []
+            # NOTE(gibi): at this point the request spec already got converted
+            # to a legacy dict and then back to an object so we lost the non
+            # legacy part of the spec. Re-populate the requested_resource
+            # field based on the original request spec object passed to this
+            # function.
+            if request_spec:
+                local_reqspec.requested_resources = (
+                    request_spec.requested_resources)
 
             # The block_device_mapping passed from the api doesn't contain
             # instance specific information
diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py
index 760ef7f57d8a..e2b31d8031d9 100644
--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@@ -951,6 +951,81 @@ class _BaseTaskTestCase(object):
 
         do_test()
 
+    @mock.patch.object(objects.Instance, 'save', new=mock.MagicMock())
+    @mock.patch.object(query.SchedulerQueryClient, 'select_destinations')
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_set_vm_state_and_notify', new=mock.MagicMock())
+    def test_build_instances_reschedule_recalculates_provider_mapping(self,
+            mock_select_dests):
+
+        rg1 = objects.RequestGroup(resources={"CUSTOM_FOO": 1})
+        request_spec = objects.RequestSpec(requested_resources=[rg1])
+
+        mock_select_dests.return_value = [[fake_selection1]]
+        instance = fake_instance.fake_instance_obj(self.context)
+        image = {'fake-data': 'should_pass_silently'}
+
+        # build_instances() is a cast, we need to wait for it to complete
+        self.useFixture(cast_as_call.CastAsCall(self))
+
+        @mock.patch('nova.conductor.manager.ComputeTaskManager.'
+                    '_fill_provider_mapping')
+        @mock.patch('nova.scheduler.utils.claim_resources')
+        @mock.patch('nova.objects.request_spec.RequestSpec.from_primitives',
+                    return_value=request_spec)
+        @mock.patch.object(self.conductor_manager.compute_rpcapi,
+                           'build_and_run_instance')
+        @mock.patch.object(self.conductor_manager,
+                           '_populate_instance_mapping')
+        @mock.patch.object(self.conductor_manager, '_destroy_build_request')
+        def do_test(mock_destroy_build_req, mock_pop_inst_map,
+                    mock_build_and_run, mock_request_spec_from_primitives,
+                    mock_claim, mock_rp_mapping):
+            self.conductor.build_instances(
+                context=self.context,
+                instances=[instance],
+                image=image,
+                filter_properties={'retry': {'num_attempts': 1, 'hosts': []}},
+                admin_password='admin_password',
+                injected_files='injected_files',
+                requested_networks=None,
+                security_groups='security_groups',
+                block_device_mapping='block_device_mapping',
+                legacy_bdm=False,
+                host_lists=fake_host_lists1,
+                request_spec=request_spec)
+
+            expected_build_run_host_list = copy.copy(fake_host_lists1[0])
+            if expected_build_run_host_list:
+                expected_build_run_host_list.pop(0)
+            mock_build_and_run.assert_called_once_with(
+                self.context,
+                instance=mock.ANY,
+                host='host1',
+                image=image,
+                request_spec=request_spec,
+                filter_properties={'retry': {'num_attempts': 2,
+                                             'hosts': [['host1', 'node1']]},
+                                   'limits': {}},
+                admin_password='admin_password',
+                injected_files='injected_files',
+                requested_networks=None,
+                security_groups='security_groups',
+                block_device_mapping=test.MatchType(
+                    objects.BlockDeviceMappingList),
+                node='node1',
+                limits=None,
+                host_list=expected_build_run_host_list)
+
+            mock_rp_mapping.assert_called_once_with(
+                self.context, instance.uuid, mock.ANY)
+            actual_request_spec = mock_rp_mapping.mock_calls[0][1][2]
+            self.assertEqual(
+                rg1.resources,
+                actual_request_spec.requested_resources[0].resources)
+
+        do_test()
+
     @mock.patch.object(cinder.API, 'attachment_get')
     @mock.patch.object(cinder.API, 'attachment_create')
     @mock.patch.object(block_device_obj.BlockDeviceMapping, 'save')