Avoid duplicate ProviderSummary in _merge_candidates

In testing with the nested-perfload topology containing 7000 providers, _merge_candidates had been returning 21000 ProviderSummary objects which were not trimmed until serialization processing in handlers/allocation_candidate.py. That situation has been present at least since when we added same_subtree [1] so is not simply because of the addition of rw_ctx.summaries_by_id [2]. Teasing out fixing this exposed a lot of opportunities for shrinking the amount of data being inspected and processed. In no particular order the changes are: * Return set() from _alloc_candidates_single_provider and _alloc_candidates_multiple_providers instead of list. It doesn't need to be a list and the cast to list costs. * Because we have rw_ctx.summaries_by_id we don't need to return summaries from _get_by_one_request, they're in the rw_ctx. * The candidates provided to _merge_candidates is no longer a dict of tuples of allocatation requests and summaries (_all_ the summaries). It's just a dict of allocation requests. * At the end of _merge_candidates we need to winnow the contents of rw_ctx.summaries_by_id to only those that still matter. [1] I7fdeac24606359d37f1a7405d22c5797840e1a9e [2] I43ae1118421366336b4e96738c2981e07caebec8 Change-Id: Ibccaed40f1eac9c244cf70654f6be1d72f7a6054
2019-08-09 12:32:49 +01:00 · 2019-08-09 12:32:49 +01:00 · aecd0f3497
commit aecd0f3497
parent 5dda479f5b
1 changed files with 37 additions and 42 deletions
--- a/placement/objects/allocation_candidate.py
+++ b/placement/objects/allocation_candidate.py
@ -106,7 +106,7 @@ class AllocationCandidates(object):
                trait_rps = res_ctx.get_provider_ids_having_any_trait(
                    rg_ctx.context, rg_ctx.required_trait_map)
                if not trait_rps:
-                    return [], []
+                    return set()
            rp_candidates = res_ctx.get_trees_matching_all(rg_ctx, rw_ctx)
            return _alloc_candidates_multiple_providers(
                rg_ctx, rw_ctx, rp_candidates)
@ -143,7 +143,7 @@ class AllocationCandidates(object):
                else:
                    seen_rcs.add(rc)

-            alloc_reqs, summaries = cls._get_by_one_request(rg_ctx, rw_ctx)
+            alloc_reqs = cls._get_by_one_request(rg_ctx, rw_ctx)
            LOG.debug("%s (suffix '%s') returned %d matches",
                      str(group), str(suffix), len(alloc_reqs))
            if not alloc_reqs:
@ -155,13 +155,13 @@ class AllocationCandidates(object):
            # single provider.  We'll need this later to evaluate group_policy.
            for areq in alloc_reqs:
                areq.use_same_provider = group.use_same_provider
-            candidates[suffix] = alloc_reqs, summaries
+            candidates[suffix] = alloc_reqs

-        # At this point, each (alloc_requests, summary_obj) in `candidates` is
-        # independent of the others. We need to fold them together such that
-        # each allocation request satisfies *all* the incoming `requests`.  The
-        # `candidates` dict is guaranteed to contain entries for all suffixes,
-        # or we would have short-circuited above.
+        # At this point, each alloc_requests in `candidates` is independent of
+        # the others. We need to fold them together such that  each allocation
+        # request satisfies *all* the incoming `requests`. The `candidates`
+        # dict is guaranteed to contain entries for all suffixes, or we would
+        # have short-circuited above.
        alloc_request_objs, summary_objs = _merge_candidates(
            candidates, rw_ctx)

@ -284,11 +284,11 @@ class ProviderSummaryResource(object):


 def _alloc_candidates_multiple_providers(rg_ctx, rw_ctx, rp_candidates):
-    """Returns a tuple of (allocation requests, provider summaries) for a
-    supplied set of requested resource amounts and tuples of
-    (rp_id, root_id, rc_id). The supplied resource provider trees have
-    capacity to satisfy ALL of the resources in the requested resources as
-    well as ALL required traits that were requested by the user.
+    """Returns a set of allocation requests for a supplied set of requested
+    resource amounts and tuples of (rp_id, root_id, rc_id). The supplied
+    resource provider trees have capacity to satisfy ALL of the resources in
+    the requested resources as well as ALL required traits that were requested
+    by the user.

    This is a code path to get results for a RequestGroup with
    use_same_provider=False. In this scenario, we are able to use multiple
@ -301,7 +301,7 @@ def _alloc_candidates_multiple_providers(rg_ctx, rw_ctx, rp_candidates):
                          that satisfy the request for resources.
    """
    if not rp_candidates:
-        return [], []
+        return set()

    # Get all the root resource provider IDs. We should include the first
    # values of rp_tuples because while sharing providers are root providers,
@ -380,15 +380,14 @@ def _alloc_candidates_multiple_providers(rg_ctx, rw_ctx, rp_candidates):
                                          mappings=mappings)
            root_alloc_reqs.add(alloc_req)
        alloc_requests |= root_alloc_reqs
-    return list(alloc_requests), list(rw_ctx.summaries_by_id.values())
+    return alloc_requests


 def _alloc_candidates_single_provider(rg_ctx, rw_ctx, rp_tuples):
-    """Returns a tuple of (allocation requests, provider summaries) for a
-    supplied set of requested resource amounts and resource providers. The
-    supplied resource providers have capacity to satisfy ALL of the resources
-    in the requested resources as well as ALL required traits that were
-    requested by the user.
+    """Returns a set of allocation requests for a supplied set of requested
+    resource amounts and resource providers. The supplied resource providers
+    have capacity to satisfy ALL of the resources in the requested resources
+    as well as ALL required traits that were requested by the user.

    This is used in two circumstances:
    - To get results for a RequestGroup with use_same_provider=True.
@ -404,7 +403,7 @@ def _alloc_candidates_single_provider(rg_ctx, rw_ctx, rp_tuples):
                      for providers that matched the requested resources
    """
    if not rp_tuples:
-        return [], []
+        return set()

    # Get all root resource provider IDs.
    root_ids = set(p[1] for p in rp_tuples)
@ -450,7 +449,7 @@ def _alloc_candidates_single_provider(rg_ctx, rw_ctx, rp_tuples):
                req_obj = copy.copy(req_obj)
                req_obj.anchor_root_provider_uuid = anchor.anchor_uuid
                alloc_requests.append(req_obj)
-    return alloc_requests, list(rw_ctx.summaries_by_id.values())
+    return alloc_requests


 def _allocation_request_for_provider(context, requested_resources, provider,
@ -518,7 +517,7 @@ def _build_provider_summaries(context, rw_ctx, prov_traits):
    # member of the tree is contributing resources or traits to an allocation
    # candidate. At this stage, any tree which we have previously touched has
    # been fully summarized already and any trees left are fully present in
-    # rp_ids. See _get_usages_by_provider_tree for additional detail.
+    # rp_ids. See _get_usages_by_provider_trees for additional detail.
    provider_ids = _provider_ids_from_rp_ids(context, rp_ids)

    # Build up a dict, keyed by internal resource provider ID, of
@ -673,20 +672,19 @@ def _consolidate_allocation_requests(areqs, rw_ctx):

 # TODO(efried): Move _merge_candidates to rw_ctx?
 def _merge_candidates(candidates, rw_ctx):
-    """Given a dict, keyed by RequestGroup suffix, of tuples of
-    (allocation_requests, provider_summaries), produce a single tuple of
-    (allocation_requests, provider_summaries) that appropriately incorporates
-    the elements from each.
+    """Given a dict, keyed by RequestGroup suffix, of allocation_requests,
+    produce a single tuple of (allocation_requests, provider_summaries) that
+    appropriately incorporates the elements from each.

-    Each (alloc_reqs, prov_sums) in `candidates` satisfies one RequestGroup.
-    This method creates a list of alloc_reqs, *each* of which satisfies *all*
+    Each alloc_reqs in `candidates` satisfies one RequestGroup. This method
+    creates a list of alloc_reqs, *each* of which satisfies *all*
    of the RequestGroups.

    For that merged list of alloc_reqs, a corresponding provider_summaries is
    produced.

-    :param candidates: A dict, keyed by suffix string or '', of tuples of
-            (allocation_requests, provider_summaries) to be merged.
+    :param candidates: A dict, keyed by suffix string or '', of a set of
+            allocation_requests to be merged.
    :param rw_ctx: RequestWideSearchContext.
    :return: A tuple of (allocation_requests, provider_summaries).
    """
@ -706,14 +704,10 @@ def _merge_candidates(candidates, rw_ctx):
    #   }
    areq_lists_by_anchor = collections.defaultdict(
        lambda: collections.defaultdict(list))
-    # Save off all the provider summaries lists - we'll use 'em later.
-    all_psums = []
-    for suffix, (areqs, psums) in candidates.items():
+    for suffix, areqs in candidates.items():
        for areq in areqs:
            anchor = areq.anchor_root_provider_uuid
            areq_lists_by_anchor[anchor][suffix].append(areq)
-        for psum in psums:
-            all_psums.append(psum)

    # Create all combinations picking one AllocationRequest from each list
    # for each anchor.
@ -783,16 +777,17 @@ def _merge_candidates(candidates, rw_ctx):
    if not areqs:
        return [], []

-    # Now we have to produce provider summaries.  The provider summaries in
-    # the candidates input contain all the information; we just need to
-    # filter it down to only the providers in trees represented by our merged
-    # list of allocation requests.
+    # Now we have to produce provider summaries. The provider summaries in
+    # rw_ctx.summary_by_id contain all the information; we just need to filter
+    # it down to only the providers in trees represented by our merged list of
+    # allocation requests.
    tree_uuids = set()
    for areq in areqs:
        for arr in areq.resource_requests:
            tree_uuids.add(arr.resource_provider.root_provider_uuid)
-    psums = [psum for psum in all_psums if
-             psum.resource_provider.root_provider_uuid in tree_uuids]
+    psums = [
+        psum for psum in rw_ctx.summaries_by_id.values()
+        if psum.resource_provider.root_provider_uuid in tree_uuids]

    LOG.debug('Merging candidates yields %d allocation requests and %d '
              'provider summaries', len(areqs), len(psums))