Follow up for I0c764e441993e32aafef0b18049a425c3c832a50

This is a follow up for change
I0c764e441993e32aafef0b18049a425c3c832a50 to address
review comments.

The most important part is the early exit from
_fill_provider_mapping if request_spec.maps_requested_resources
returns False. That is needed to avoid the performance
impact of getting allocations and resource provider traits
per instance and provider. Since this code is currently only
going to be exercised with ports that have resource requests,
we want to avoid the extra work for all other server create
requests.

Part of blueprint bandwidth-resource-provider

Change-Id: I90845461b2b98c176c7b3b97dd3f47ed604a9bef
This commit is contained in:
Matt Riedemann 2019-02-21 17:02:32 -05:00 committed by Balazs Gibizer
parent f5d236f868
commit 39ec15f58c
6 changed files with 95 additions and 40 deletions

View File

@ -1249,7 +1249,7 @@ class ComputeTaskManager(base.Base):
"""Fills out the request group - resource provider mapping in the
request spec.
This is a workaround as placement does not return which PR
This is a workaround as placement does not return which RP
fulfills which granular request group in the allocation candidate
request. There is a spec proposing a solution in placement:
https://review.openstack.org/#/c/597601/
@ -1258,10 +1258,35 @@ class ComputeTaskManager(base.Base):
mapping out from the Selection object returned by the scheduler's
select_destinations call.
"""
allocs = self.report_client.get_allocations_for_consumer(
context, instance_uuid)
# Exit early if this request spec does not require mappings.
if not request_spec.maps_requested_resources:
return
# TODO(mriedem): Could we use the Selection.allocation_request here
# to get the resource providers rather than making an API call to
# placement per instance being scheduled? Granted that is a
# PUT /allocations/{consumer_id} *request* payload rather than a
# *response* but at least currently they are in the same format and
# could make this faster.
allocs = self.report_client.get_allocs_for_consumer(
context, instance_uuid)['allocations']
if not allocs:
# Technically out-of-tree scheduler drivers can still not create
# allocations in placement so move on if there are no allocations
# for the instance.
LOG.debug('No allocations found for instance after scheduling. '
'Assuming the scheduler driver is not using Placement.',
instance_uuid=instance_uuid)
return
# TODO(mriedem): Short-term we can optimize this by passing a cache by
# reference of the RP->traits mapping because if we are processing
# a multi-create request we could have the same RPs being used for
# multiple instances and avoid duplicate calls. Long-term we could
# stash the RP->traits mapping on the Selection object since we can
# pull the traits for each provider from the GET /allocation_candidates
# response in the scheduler (or leverage the change from the spec
# mentioned in the docstring above).
provider_traits = {
rp_uuid: self.report_client._get_provider_traits(
rp_uuid: self.report_client.get_provider_traits(
context, rp_uuid).traits
for rp_uuid in allocs}
request_spec.map_requested_resources_to_providers(
@ -1382,17 +1407,18 @@ class ComputeTaskManager(base.Base):
scheduler_utils.populate_filter_properties(filter_props,
host)
# Now that we have a selected host (which has claimed resource
# allocations in the scheduler) for this instance, we may need to
# map allocations to resource providers in the request spec.
try:
self._fill_provider_mapping(
context, instance.uuid, request_spec)
except Exception as exc:
# If anything failed here we need to cleanup and bail out.
with excutils.save_and_reraise_exception():
self._cleanup_build_artifacts(context, exc, instances,
build_requests,
request_specs,
block_device_mapping,
tags,
cell_mapping_cache)
self._cleanup_build_artifacts(
context, exc, instances, build_requests, request_specs,
block_device_mapping, tags, cell_mapping_cache)
# TODO(melwitt): Maybe we should set_target_cell on the contexts
# once we map to a cell, and remove these separate with statements.

View File

@ -676,6 +676,13 @@ class RequestSpec(base.NovaObject):
# original request for the forced hosts
self.obj_reset_changes(['force_hosts', 'force_nodes'])
@property
def maps_requested_resources(self):
"""Returns True if this RequestSpec needs to map requested_resources
to resource providers, False otherwise.
"""
return 'requested_resources' in self and self.requested_resources
def _is_valid_group_rp_mapping(
self, group_rp_mapping, placement_allocations, provider_traits):
"""Decides if the mapping is valid from resources and traits
@ -778,7 +785,7 @@ class RequestSpec(base.NovaObject):
This dict contains info only about RPs
appearing in the placement_allocations param.
"""
if 'requested_resources' not in self or not self.requested_resources:
if not self.maps_requested_resources:
# Nothing to do, so let's return early
return

View File

@ -399,7 +399,7 @@ class SchedulerReportClient(object):
LOG.error(msg, args)
raise exception.ResourceProviderAggregateRetrievalFailed(uuid=rp_uuid)
def _get_provider_traits(self, context, rp_uuid):
def get_provider_traits(self, context, rp_uuid):
"""Queries the placement API for a resource provider's traits.
:param context: The security context
@ -800,7 +800,7 @@ class SchedulerReportClient(object):
rp_uuid, aggs, generation=generation)
# Refresh traits
trait_info = self._get_provider_traits(context, rp_uuid)
trait_info = self.get_provider_traits(context, rp_uuid)
traits, generation = trait_info.traits, trait_info.generation
msg = ("Refreshing trait associations for resource provider %s, "
"traits: %s")

View File

@ -885,7 +885,7 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase):
self.client.update_from_provider_tree, self.context, new_tree)
# Placement didn't get updated
self.assertEqual(set(['HW_CPU_X86_AVX', 'HW_CPU_X86_AVX2']),
self.client._get_provider_traits(
self.client.get_provider_traits(
self.context, uuids.root).traits)
# ...and the root was removed from the cache
self.assertFalse(self.client._provider_tree.exists(uuids.root))

View File

@ -2056,9 +2056,9 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
self.assertTrue(mock_build.called)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_provider_traits')
'get_provider_traits')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocations_for_consumer')
'get_allocs_for_consumer')
@mock.patch('nova.objects.request_spec.RequestSpec.'
'map_requested_resources_to_providers')
def test_schedule_and_build_instances_fill_request_spec(
@ -2067,7 +2067,8 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
self.params['request_specs'][0].requested_resources = [
objects.RequestGroup()]
mock_get_allocs.return_value = {uuids.rp1: mock.sentinel.rp1_allocs}
mock_get_allocs.return_value = {
'allocations': {uuids.rp1: mock.sentinel.rp1_allocs}}
mock_traits.return_value.traits = ['TRAIT1']
instance_uuid = self._do_schedule_and_build_instances_test(
@ -2080,28 +2081,49 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
@mock.patch('nova.conductor.manager.ComputeTaskManager.'
'_cleanup_build_artifacts')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_provider_traits')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocations_for_consumer')
@mock.patch('nova.objects.request_spec.RequestSpec.'
'map_requested_resources_to_providers')
@mock.patch('nova.conductor.manager.ComputeTaskManager.'
'_fill_provider_mapping', side_effect=test.TestingException)
def test_schedule_and_build_instances_fill_request_spec_error(
self, mock_map, mock_get_allocs, mock_traits, mock_cleanup):
# makes sure there is some request group in the spec to be mapped
self.params['request_specs'][0].requested_resources = [
objects.RequestGroup()]
mock_get_allocs.side_effect = exc.ConsumerAllocationRetrievalFailed(
consumer_uuid=uuids.inst, error='some error')
self, mock_fill, mock_cleanup):
self.assertRaises(
exc.ConsumerAllocationRetrievalFailed,
test.TestingException,
self._do_schedule_and_build_instances_test, self.params)
self.assertFalse(mock_map.called)
self.assertFalse(mock_traits.called)
self.assertTrue(mock_cleanup.called)
mock_fill.assert_called_once()
mock_cleanup.assert_called_once()
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_provider_traits', new_callable=mock.NonCallableMock)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocs_for_consumer',
new_callable=mock.NonCallableMock)
@mock.patch('nova.objects.request_spec.RequestSpec.'
'map_requested_resources_to_providers',
new_callable=mock.NonCallableMock)
def test_schedule_and_build_instances_fill_request_spec_noop(
self, mock_map, mock_get_allocs, mock_traits):
"""Tests to make sure _fill_provider_mapping exits early if there are
no requested_resources on the RequestSpec.
"""
self.params['request_specs'][0].requested_resources = []
self._do_schedule_and_build_instances_test(self.params)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_provider_traits', new_callable=mock.NonCallableMock)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocs_for_consumer', return_value={'allocations': {}})
@mock.patch('nova.objects.request_spec.RequestSpec.'
'map_requested_resources_to_providers',
new_callable=mock.NonCallableMock)
def test_schedule_and_build_instances_fill_request_spec_no_allocs(
self, mock_map, mock_get_allocs, mock_traits):
"""Tests to make sure _fill_provider_mapping handles a scheduler
driver which does not use placement (so there are no allocations).
"""
self.params['request_specs'][0].requested_resources = [
objects.RequestGroup()]
self._do_schedule_and_build_instances_test(self.params)
mock_get_allocs.assert_called_once()
@mock.patch('nova.objects.CellMapping.get_by_uuid')
def test_bury_in_cell0_no_cell0(self, mock_cm_get):

View File

@ -1701,7 +1701,7 @@ class TestProviderOperations(SchedulerReportClientTestCase):
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_provider_aggregates')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_provider_traits')
'get_provider_traits')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_sharing_providers')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
@ -2644,7 +2644,7 @@ class TestTraits(SchedulerReportClientTestCase):
'resource_provider_generation': 42}
self.ks_adap_mock.get.return_value = resp_mock
result, gen = self.client._get_provider_traits(self.context, uuid)
result, gen = self.client.get_provider_traits(self.context, uuid)
expected_url = '/resource_providers/' + uuid + '/traits'
self.ks_adap_mock.get.assert_called_once_with(
@ -2668,7 +2668,7 @@ class TestTraits(SchedulerReportClientTestCase):
resp_mock.status_code = status_code
self.assertRaises(
exception.ResourceProviderTraitRetrievalFailed,
self.client._get_provider_traits, self.context, uuid)
self.client.get_provider_traits, self.context, uuid)
expected_url = '/resource_providers/' + uuid + '/traits'
self.ks_adap_mock.get.assert_called_once_with(
@ -2686,7 +2686,7 @@ class TestTraits(SchedulerReportClientTestCase):
uuid = uuids.compute_node
self.ks_adap_mock.get.side_effect = ks_exc.EndpointNotFound()
self.assertRaises(ks_exc.ClientException,
self.client._get_provider_traits, self.context, uuid)
self.client.get_provider_traits, self.context, uuid)
expected_url = '/resource_providers/' + uuid + '/traits'
self.ks_adap_mock.get.assert_called_once_with(
expected_url,
@ -2865,7 +2865,7 @@ class TestAssociations(SchedulerReportClientTestCase):
self.mock_get_traits = self.useFixture(fixtures.MockPatch(
'nova.scheduler.client.report.SchedulerReportClient.'
'_get_provider_traits')).mock
'get_provider_traits')).mock
self.mock_get_traits.return_value = report.TraitInfo(
traits=set(['CUSTOM_GOLD']), generation=43)