Follow up for I0c764e441993e32aafef0b18049a425c3c832a50

This is a follow up for change I0c764e441993e32aafef0b18049a425c3c832a50 to address review comments. The most important part is the early exit from _fill_provider_mapping if request_spec.maps_requested_resources returns False. That is needed to avoid the performance impact of getting allocations and resource provider traits per instance and provider. Since this code is currently only going to be exercised with ports that have resource requests, we want to avoid the extra work for all other server create requests. Part of blueprint bandwidth-resource-provider Change-Id: I90845461b2b98c176c7b3b97dd3f47ed604a9bef
2019-02-21 17:02:32 -05:00 · 2019-02-21 17:02:32 -05:00 · 39ec15f58c
parent f5d236f868
commit 39ec15f58c
6 changed files with 95 additions and 40 deletions
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@ -1249,7 +1249,7 @@ class ComputeTaskManager(base.Base):
        """Fills out the request group - resource provider mapping in the
        request spec.

-        This is a workaround as placement does not return which PR
+        This is a workaround as placement does not return which RP
        fulfills which granular request group in the allocation candidate
        request. There is a spec proposing a solution in placement:
        https://review.openstack.org/#/c/597601/
@ -1258,10 +1258,35 @@ class ComputeTaskManager(base.Base):
        mapping out from the Selection object returned by the scheduler's
        select_destinations call.
        """
-        allocs = self.report_client.get_allocations_for_consumer(
-            context, instance_uuid)
+        # Exit early if this request spec does not require mappings.
+        if not request_spec.maps_requested_resources:
+            return
+        # TODO(mriedem): Could we use the Selection.allocation_request here
+        # to get the resource providers rather than making an API call to
+        # placement per instance being scheduled? Granted that is a
+        # PUT /allocations/{consumer_id} *request* payload rather than a
+        # *response* but at least currently they are in the same format and
+        # could make this faster.
+        allocs = self.report_client.get_allocs_for_consumer(
+            context, instance_uuid)['allocations']
+        if not allocs:
+            # Technically out-of-tree scheduler drivers can still not create
+            # allocations in placement so move on if there are no allocations
+            # for the instance.
+            LOG.debug('No allocations found for instance after scheduling. '
+                      'Assuming the scheduler driver is not using Placement.',
+                      instance_uuid=instance_uuid)
+            return
+        # TODO(mriedem): Short-term we can optimize this by passing a cache by
+        # reference of the RP->traits mapping because if we are processing
+        # a multi-create request we could have the same RPs being used for
+        # multiple instances and avoid duplicate calls. Long-term we could
+        # stash the RP->traits mapping on the Selection object since we can
+        # pull the traits for each provider from the GET /allocation_candidates
+        # response in the scheduler (or leverage the change from the spec
+        # mentioned in the docstring above).
        provider_traits = {
-            rp_uuid: self.report_client._get_provider_traits(
+            rp_uuid: self.report_client.get_provider_traits(
                context, rp_uuid).traits
            for rp_uuid in allocs}
        request_spec.map_requested_resources_to_providers(
@ -1382,17 +1407,18 @@ class ComputeTaskManager(base.Base):
            scheduler_utils.populate_filter_properties(filter_props,
                                                       host)

+            # Now that we have a selected host (which has claimed resource
+            # allocations in the scheduler) for this instance, we may need to
+            # map allocations to resource providers in the request spec.
            try:
                self._fill_provider_mapping(
                    context, instance.uuid, request_spec)
            except Exception as exc:
+                # If anything failed here we need to cleanup and bail out.
                with excutils.save_and_reraise_exception():
-                    self._cleanup_build_artifacts(context, exc, instances,
-                                                  build_requests,
-                                                  request_specs,
-                                                  block_device_mapping,
-                                                  tags,
-                                                  cell_mapping_cache)
+                    self._cleanup_build_artifacts(
+                        context, exc, instances, build_requests, request_specs,
+                        block_device_mapping, tags, cell_mapping_cache)

            # TODO(melwitt): Maybe we should set_target_cell on the contexts
            # once we map to a cell, and remove these separate with statements.
--- a/nova/objects/request_spec.py
+++ b/nova/objects/request_spec.py
@ -676,6 +676,13 @@ class RequestSpec(base.NovaObject):
        # original request for the forced hosts
        self.obj_reset_changes(['force_hosts', 'force_nodes'])

+    @property
+    def maps_requested_resources(self):
+        """Returns True if this RequestSpec needs to map requested_resources
+        to resource providers, False otherwise.
+        """
+        return 'requested_resources' in self and self.requested_resources
+
    def _is_valid_group_rp_mapping(
            self, group_rp_mapping, placement_allocations, provider_traits):
        """Decides if the mapping is valid from resources and traits
@ -778,7 +785,7 @@ class RequestSpec(base.NovaObject):
                                This dict contains info only about RPs
                                appearing in the placement_allocations param.
        """
-        if 'requested_resources' not in self or not self.requested_resources:
+        if not self.maps_requested_resources:
            # Nothing to do, so let's return early
            return

--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@ -399,7 +399,7 @@ class SchedulerReportClient(object):
        LOG.error(msg, args)
        raise exception.ResourceProviderAggregateRetrievalFailed(uuid=rp_uuid)

-    def _get_provider_traits(self, context, rp_uuid):
+    def get_provider_traits(self, context, rp_uuid):
        """Queries the placement API for a resource provider's traits.

        :param context: The security context
@ -800,7 +800,7 @@ class SchedulerReportClient(object):
                rp_uuid, aggs, generation=generation)

            # Refresh traits
-            trait_info = self._get_provider_traits(context, rp_uuid)
+            trait_info = self.get_provider_traits(context, rp_uuid)
            traits, generation = trait_info.traits, trait_info.generation
            msg = ("Refreshing trait associations for resource provider %s, "
                   "traits: %s")
--- a/nova/tests/functional/test_report_client.py
+++ b/nova/tests/functional/test_report_client.py
@ -885,7 +885,7 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase):
                self.client.update_from_provider_tree, self.context, new_tree)
            # Placement didn't get updated
            self.assertEqual(set(['HW_CPU_X86_AVX', 'HW_CPU_X86_AVX2']),
-                             self.client._get_provider_traits(
+                             self.client.get_provider_traits(
                                 self.context, uuids.root).traits)
            # ...and the root was removed from the cache
            self.assertFalse(self.client._provider_tree.exists(uuids.root))
--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@ -2056,9 +2056,9 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
        self.assertTrue(mock_build.called)

    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
-                '_get_provider_traits')
+                'get_provider_traits')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
-                'get_allocations_for_consumer')
+                'get_allocs_for_consumer')
    @mock.patch('nova.objects.request_spec.RequestSpec.'
                'map_requested_resources_to_providers')
    def test_schedule_and_build_instances_fill_request_spec(
@ -2067,7 +2067,8 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
        self.params['request_specs'][0].requested_resources = [
            objects.RequestGroup()]

-        mock_get_allocs.return_value = {uuids.rp1: mock.sentinel.rp1_allocs}
+        mock_get_allocs.return_value = {
+            'allocations': {uuids.rp1: mock.sentinel.rp1_allocs}}
        mock_traits.return_value.traits = ['TRAIT1']

        instance_uuid = self._do_schedule_and_build_instances_test(
@ -2080,28 +2081,49 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):

    @mock.patch('nova.conductor.manager.ComputeTaskManager.'
                '_cleanup_build_artifacts')
-    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
-                '_get_provider_traits')
-    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
-                'get_allocations_for_consumer')
-    @mock.patch('nova.objects.request_spec.RequestSpec.'
-                'map_requested_resources_to_providers')
+    @mock.patch('nova.conductor.manager.ComputeTaskManager.'
+                '_fill_provider_mapping', side_effect=test.TestingException)
    def test_schedule_and_build_instances_fill_request_spec_error(
-            self, mock_map, mock_get_allocs, mock_traits, mock_cleanup):
-        # makes sure there is some request group in the spec to be mapped
-        self.params['request_specs'][0].requested_resources = [
-            objects.RequestGroup()]
-
-        mock_get_allocs.side_effect = exc.ConsumerAllocationRetrievalFailed(
-            consumer_uuid=uuids.inst, error='some error')
-
+            self, mock_fill, mock_cleanup):
        self.assertRaises(
-            exc.ConsumerAllocationRetrievalFailed,
+            test.TestingException,
            self._do_schedule_and_build_instances_test, self.params)

-        self.assertFalse(mock_map.called)
-        self.assertFalse(mock_traits.called)
-        self.assertTrue(mock_cleanup.called)
+        mock_fill.assert_called_once()
+        mock_cleanup.assert_called_once()
+
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'get_provider_traits', new_callable=mock.NonCallableMock)
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'get_allocs_for_consumer',
+                new_callable=mock.NonCallableMock)
+    @mock.patch('nova.objects.request_spec.RequestSpec.'
+                'map_requested_resources_to_providers',
+                new_callable=mock.NonCallableMock)
+    def test_schedule_and_build_instances_fill_request_spec_noop(
+            self, mock_map, mock_get_allocs, mock_traits):
+        """Tests to make sure _fill_provider_mapping exits early if there are
+        no requested_resources on the RequestSpec.
+        """
+        self.params['request_specs'][0].requested_resources = []
+        self._do_schedule_and_build_instances_test(self.params)
+
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'get_provider_traits', new_callable=mock.NonCallableMock)
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'get_allocs_for_consumer', return_value={'allocations': {}})
+    @mock.patch('nova.objects.request_spec.RequestSpec.'
+                'map_requested_resources_to_providers',
+                new_callable=mock.NonCallableMock)
+    def test_schedule_and_build_instances_fill_request_spec_no_allocs(
+            self, mock_map, mock_get_allocs, mock_traits):
+        """Tests to make sure _fill_provider_mapping handles a scheduler
+        driver which does not use placement (so there are no allocations).
+        """
+        self.params['request_specs'][0].requested_resources = [
+            objects.RequestGroup()]
+        self._do_schedule_and_build_instances_test(self.params)
+        mock_get_allocs.assert_called_once()

    @mock.patch('nova.objects.CellMapping.get_by_uuid')
    def test_bury_in_cell0_no_cell0(self, mock_cm_get):
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@ -1701,7 +1701,7 @@ class TestProviderOperations(SchedulerReportClientTestCase):
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                '_get_provider_aggregates')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
-                '_get_provider_traits')
+                'get_provider_traits')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                '_get_sharing_providers')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
@ -2644,7 +2644,7 @@ class TestTraits(SchedulerReportClientTestCase):
                                       'resource_provider_generation': 42}
        self.ks_adap_mock.get.return_value = resp_mock

-        result, gen = self.client._get_provider_traits(self.context, uuid)
+        result, gen = self.client.get_provider_traits(self.context, uuid)

        expected_url = '/resource_providers/' + uuid + '/traits'
        self.ks_adap_mock.get.assert_called_once_with(
@ -2668,7 +2668,7 @@ class TestTraits(SchedulerReportClientTestCase):
            resp_mock.status_code = status_code
            self.assertRaises(
                exception.ResourceProviderTraitRetrievalFailed,
-                self.client._get_provider_traits, self.context, uuid)
+                self.client.get_provider_traits, self.context, uuid)

            expected_url = '/resource_providers/' + uuid + '/traits'
            self.ks_adap_mock.get.assert_called_once_with(
@ -2686,7 +2686,7 @@ class TestTraits(SchedulerReportClientTestCase):
        uuid = uuids.compute_node
        self.ks_adap_mock.get.side_effect = ks_exc.EndpointNotFound()
        self.assertRaises(ks_exc.ClientException,
-                          self.client._get_provider_traits, self.context, uuid)
+                          self.client.get_provider_traits, self.context, uuid)
        expected_url = '/resource_providers/' + uuid + '/traits'
        self.ks_adap_mock.get.assert_called_once_with(
            expected_url,
@ -2865,7 +2865,7 @@ class TestAssociations(SchedulerReportClientTestCase):

        self.mock_get_traits = self.useFixture(fixtures.MockPatch(
            'nova.scheduler.client.report.SchedulerReportClient.'
-            '_get_provider_traits')).mock
+            'get_provider_traits')).mock
        self.mock_get_traits.return_value = report.TraitInfo(
            traits=set(['CUSTOM_GOLD']), generation=43)