From bc57a916c4045a387c69f327eb3a9e74be5b1a33 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Wed, 21 Nov 2018 17:06:24 +0100 Subject: [PATCH] Recalculate request group - RP mapping during re-schedule Re-scheduling uses an alternative allocation candidate from placement to get resource for the server. This also means that the previously calculated request group - resource provider mapping is not valid any more and needs to be recalculated based on the new allocation of the server. blueprint bandwidth-resource-provider Change-Id: If8a13f74d2b3c99f05365eb49dcfa01d9042fefa --- nova/compute/manager.py | 4 +- nova/conductor/manager.py | 30 ++++++--- nova/tests/unit/conductor/test_conductor.py | 75 +++++++++++++++++++++ 3 files changed, 100 insertions(+), 9 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 14d44e1a1308..b9770acc1f04 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2125,7 +2125,9 @@ class ComputeManager(manager.Manager): scheduler_hints) image_meta = objects.ImageMeta.from_dict(image) - if request_spec: + if (request_spec + and 'requested_resources' in request_spec + and request_spec.requested_resources is not None): request_group_resource_providers_mapping = { group.requester_id: group.provider_uuids for group in request_spec.requested_resources diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index b834b01139e3..e6f63ea5acee 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -567,7 +567,7 @@ class ComputeTaskManager(base.Base): # will the API send the request_spec if using cells v1, so we need # to check and build our own if one is not provided. if request_spec is None: - request_spec = scheduler_utils.build_request_spec( + legacy_request_spec = scheduler_utils.build_request_spec( image, instances) else: # TODO(mriedem): This is annoying but to populate the local @@ -576,7 +576,10 @@ class ComputeTaskManager(base.Base): # and we can remove it once the populate_retry and # populate_filter_properties utility methods are converted to # work on a RequestSpec object rather than filter_properties. - request_spec = request_spec.to_legacy_request_spec_dict() + # NOTE(gibi): we have to keep a reference to the original + # RequestSpec object passed to this function as we lose information + # during the below legacy conversion + legacy_request_spec = request_spec.to_legacy_request_spec_dict() # 'host_lists' will be None in one of two cases: when running cellsv1, # or during a reschedule from a pre-Queens compute. In all other cases, @@ -593,7 +596,7 @@ class ComputeTaskManager(base.Base): filter_properties, instances[0].uuid) instance_uuids = [instance.uuid for instance in instances] spec_obj = objects.RequestSpec.from_primitives( - context, request_spec, filter_properties) + context, legacy_request_spec, filter_properties) LOG.debug("Rescheduling: %s", is_reschedule) if is_reschedule: # Make sure that we have a host, as we may have exhausted all @@ -629,7 +632,7 @@ class ComputeTaskManager(base.Base): for instance in instances: self._set_vm_state_and_notify( context, instance.uuid, 'build_instances', updates, - exc, request_spec) + exc, legacy_request_spec) # If num_attempts > 1, we're in a reschedule and probably # either hit NoValidHost or MaxRetriesExceeded. Either way, # the build request should already be gone and we probably @@ -668,6 +671,12 @@ class ComputeTaskManager(base.Base): elevated, self.report_client, spec_obj, instance.uuid, alloc_req, host.allocation_request_version) + if request_spec: + # NOTE(gibi): redo the request group - resource + # provider mapping as the above claim call moves + # the allocation of the instance to another host + self._fill_provider_mapping( + context, instance.uuid, request_spec) else: # Some deployments use different schedulers that do not # use Placement, so they will not have an @@ -704,11 +713,16 @@ class ComputeTaskManager(base.Base): # could have come from a compute via reschedule and it would # already have some things set, like scheduler_hints. local_reqspec = objects.RequestSpec.from_primitives( - context, request_spec, local_filter_props) + context, legacy_request_spec, local_filter_props) - # TODO(gibi): make sure that the request group - provider mapping - # is updated in the request spec during reschedule - local_reqspec.requested_resources = [] + # NOTE(gibi): at this point the request spec already got converted + # to a legacy dict and then back to an object so we lost the non + # legacy part of the spec. Re-populate the requested_resource + # field based on the original request spec object passed to this + # function. + if request_spec: + local_reqspec.requested_resources = ( + request_spec.requested_resources) # The block_device_mapping passed from the api doesn't contain # instance specific information diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index 760ef7f57d8a..e2b31d8031d9 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -951,6 +951,81 @@ class _BaseTaskTestCase(object): do_test() + @mock.patch.object(objects.Instance, 'save', new=mock.MagicMock()) + @mock.patch.object(query.SchedulerQueryClient, 'select_destinations') + @mock.patch.object(conductor_manager.ComputeTaskManager, + '_set_vm_state_and_notify', new=mock.MagicMock()) + def test_build_instances_reschedule_recalculates_provider_mapping(self, + mock_select_dests): + + rg1 = objects.RequestGroup(resources={"CUSTOM_FOO": 1}) + request_spec = objects.RequestSpec(requested_resources=[rg1]) + + mock_select_dests.return_value = [[fake_selection1]] + instance = fake_instance.fake_instance_obj(self.context) + image = {'fake-data': 'should_pass_silently'} + + # build_instances() is a cast, we need to wait for it to complete + self.useFixture(cast_as_call.CastAsCall(self)) + + @mock.patch('nova.conductor.manager.ComputeTaskManager.' + '_fill_provider_mapping') + @mock.patch('nova.scheduler.utils.claim_resources') + @mock.patch('nova.objects.request_spec.RequestSpec.from_primitives', + return_value=request_spec) + @mock.patch.object(self.conductor_manager.compute_rpcapi, + 'build_and_run_instance') + @mock.patch.object(self.conductor_manager, + '_populate_instance_mapping') + @mock.patch.object(self.conductor_manager, '_destroy_build_request') + def do_test(mock_destroy_build_req, mock_pop_inst_map, + mock_build_and_run, mock_request_spec_from_primitives, + mock_claim, mock_rp_mapping): + self.conductor.build_instances( + context=self.context, + instances=[instance], + image=image, + filter_properties={'retry': {'num_attempts': 1, 'hosts': []}}, + admin_password='admin_password', + injected_files='injected_files', + requested_networks=None, + security_groups='security_groups', + block_device_mapping='block_device_mapping', + legacy_bdm=False, + host_lists=fake_host_lists1, + request_spec=request_spec) + + expected_build_run_host_list = copy.copy(fake_host_lists1[0]) + if expected_build_run_host_list: + expected_build_run_host_list.pop(0) + mock_build_and_run.assert_called_once_with( + self.context, + instance=mock.ANY, + host='host1', + image=image, + request_spec=request_spec, + filter_properties={'retry': {'num_attempts': 2, + 'hosts': [['host1', 'node1']]}, + 'limits': {}}, + admin_password='admin_password', + injected_files='injected_files', + requested_networks=None, + security_groups='security_groups', + block_device_mapping=test.MatchType( + objects.BlockDeviceMappingList), + node='node1', + limits=None, + host_list=expected_build_run_host_list) + + mock_rp_mapping.assert_called_once_with( + self.context, instance.uuid, mock.ANY) + actual_request_spec = mock_rp_mapping.mock_calls[0][1][2] + self.assertEqual( + rg1.resources, + actual_request_spec.requested_resources[0].resources) + + do_test() + @mock.patch.object(cinder.API, 'attachment_get') @mock.patch.object(cinder.API, 'attachment_create') @mock.patch.object(block_device_obj.BlockDeviceMapping, 'save')