Recalculate request group - RP mapping during re-schedule

Re-scheduling uses an alternative allocation candidate from placement to
get resource for the server. This also means that the previously
calculated request group - resource provider mapping is not valid any
more and needs to be recalculated based on the new allocation of the
server.

blueprint bandwidth-resource-provider

Change-Id: If8a13f74d2b3c99f05365eb49dcfa01d9042fefa
This commit is contained in:
Balazs Gibizer 2018-11-21 17:06:24 +01:00
parent bea316d479
commit bc57a916c4
3 changed files with 100 additions and 9 deletions

View File

@ -2125,7 +2125,9 @@ class ComputeManager(manager.Manager):
scheduler_hints)
image_meta = objects.ImageMeta.from_dict(image)
if request_spec:
if (request_spec
and 'requested_resources' in request_spec
and request_spec.requested_resources is not None):
request_group_resource_providers_mapping = {
group.requester_id: group.provider_uuids
for group in request_spec.requested_resources

View File

@ -567,7 +567,7 @@ class ComputeTaskManager(base.Base):
# will the API send the request_spec if using cells v1, so we need
# to check and build our own if one is not provided.
if request_spec is None:
request_spec = scheduler_utils.build_request_spec(
legacy_request_spec = scheduler_utils.build_request_spec(
image, instances)
else:
# TODO(mriedem): This is annoying but to populate the local
@ -576,7 +576,10 @@ class ComputeTaskManager(base.Base):
# and we can remove it once the populate_retry and
# populate_filter_properties utility methods are converted to
# work on a RequestSpec object rather than filter_properties.
request_spec = request_spec.to_legacy_request_spec_dict()
# NOTE(gibi): we have to keep a reference to the original
# RequestSpec object passed to this function as we lose information
# during the below legacy conversion
legacy_request_spec = request_spec.to_legacy_request_spec_dict()
# 'host_lists' will be None in one of two cases: when running cellsv1,
# or during a reschedule from a pre-Queens compute. In all other cases,
@ -593,7 +596,7 @@ class ComputeTaskManager(base.Base):
filter_properties, instances[0].uuid)
instance_uuids = [instance.uuid for instance in instances]
spec_obj = objects.RequestSpec.from_primitives(
context, request_spec, filter_properties)
context, legacy_request_spec, filter_properties)
LOG.debug("Rescheduling: %s", is_reschedule)
if is_reschedule:
# Make sure that we have a host, as we may have exhausted all
@ -629,7 +632,7 @@ class ComputeTaskManager(base.Base):
for instance in instances:
self._set_vm_state_and_notify(
context, instance.uuid, 'build_instances', updates,
exc, request_spec)
exc, legacy_request_spec)
# If num_attempts > 1, we're in a reschedule and probably
# either hit NoValidHost or MaxRetriesExceeded. Either way,
# the build request should already be gone and we probably
@ -668,6 +671,12 @@ class ComputeTaskManager(base.Base):
elevated, self.report_client, spec_obj,
instance.uuid, alloc_req,
host.allocation_request_version)
if request_spec:
# NOTE(gibi): redo the request group - resource
# provider mapping as the above claim call moves
# the allocation of the instance to another host
self._fill_provider_mapping(
context, instance.uuid, request_spec)
else:
# Some deployments use different schedulers that do not
# use Placement, so they will not have an
@ -704,11 +713,16 @@ class ComputeTaskManager(base.Base):
# could have come from a compute via reschedule and it would
# already have some things set, like scheduler_hints.
local_reqspec = objects.RequestSpec.from_primitives(
context, request_spec, local_filter_props)
context, legacy_request_spec, local_filter_props)
# TODO(gibi): make sure that the request group - provider mapping
# is updated in the request spec during reschedule
local_reqspec.requested_resources = []
# NOTE(gibi): at this point the request spec already got converted
# to a legacy dict and then back to an object so we lost the non
# legacy part of the spec. Re-populate the requested_resource
# field based on the original request spec object passed to this
# function.
if request_spec:
local_reqspec.requested_resources = (
request_spec.requested_resources)
# The block_device_mapping passed from the api doesn't contain
# instance specific information

View File

@ -951,6 +951,81 @@ class _BaseTaskTestCase(object):
do_test()
@mock.patch.object(objects.Instance, 'save', new=mock.MagicMock())
@mock.patch.object(query.SchedulerQueryClient, 'select_destinations')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_set_vm_state_and_notify', new=mock.MagicMock())
def test_build_instances_reschedule_recalculates_provider_mapping(self,
mock_select_dests):
rg1 = objects.RequestGroup(resources={"CUSTOM_FOO": 1})
request_spec = objects.RequestSpec(requested_resources=[rg1])
mock_select_dests.return_value = [[fake_selection1]]
instance = fake_instance.fake_instance_obj(self.context)
image = {'fake-data': 'should_pass_silently'}
# build_instances() is a cast, we need to wait for it to complete
self.useFixture(cast_as_call.CastAsCall(self))
@mock.patch('nova.conductor.manager.ComputeTaskManager.'
'_fill_provider_mapping')
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.objects.request_spec.RequestSpec.from_primitives',
return_value=request_spec)
@mock.patch.object(self.conductor_manager.compute_rpcapi,
'build_and_run_instance')
@mock.patch.object(self.conductor_manager,
'_populate_instance_mapping')
@mock.patch.object(self.conductor_manager, '_destroy_build_request')
def do_test(mock_destroy_build_req, mock_pop_inst_map,
mock_build_and_run, mock_request_spec_from_primitives,
mock_claim, mock_rp_mapping):
self.conductor.build_instances(
context=self.context,
instances=[instance],
image=image,
filter_properties={'retry': {'num_attempts': 1, 'hosts': []}},
admin_password='admin_password',
injected_files='injected_files',
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False,
host_lists=fake_host_lists1,
request_spec=request_spec)
expected_build_run_host_list = copy.copy(fake_host_lists1[0])
if expected_build_run_host_list:
expected_build_run_host_list.pop(0)
mock_build_and_run.assert_called_once_with(
self.context,
instance=mock.ANY,
host='host1',
image=image,
request_spec=request_spec,
filter_properties={'retry': {'num_attempts': 2,
'hosts': [['host1', 'node1']]},
'limits': {}},
admin_password='admin_password',
injected_files='injected_files',
requested_networks=None,
security_groups='security_groups',
block_device_mapping=test.MatchType(
objects.BlockDeviceMappingList),
node='node1',
limits=None,
host_list=expected_build_run_host_list)
mock_rp_mapping.assert_called_once_with(
self.context, instance.uuid, mock.ANY)
actual_request_spec = mock_rp_mapping.mock_calls[0][1][2]
self.assertEqual(
rg1.resources,
actual_request_spec.requested_resources[0].resources)
do_test()
@mock.patch.object(cinder.API, 'attachment_get')
@mock.patch.object(cinder.API, 'attachment_create')
@mock.patch.object(block_device_obj.BlockDeviceMapping, 'save')