Extend the RP tree DB query to support any-traits

This extends the RP tree query at the DB layer to support any-traits with a nested required_traits syntax [{A, B}, {C}] meaning ((A or B) and C). The object and API layers do not support such queries yet. Story: 2005345 Story: 2005346 Change-Id: I8704fe7350f74e0567e574eb00fc40b330817381
2022-01-21 19:18:17 +01:00 · 2022-01-21 19:18:17 +01:00 · c19481a5f3
commit c19481a5f3
parent 849c9afd2e
2 changed files with 298 additions and 123 deletions
--- a/placement/objects/research_context.py
+++ b/placement/objects/research_context.py
@ -729,8 +729,8 @@ def get_trees_matching_all(rg_ctx, rw_ctx):
    # capacity and that set of providers (grouped by their tree) have all
    # of the required traits and none of the forbidden traits
    rp_tuples_with_trait = _get_trees_with_traits(
-        rg_ctx.context, provs_with_inv.rps, rg_ctx.required_trait_map,
-        rg_ctx.forbidden_trait_map)
+        rg_ctx.context, provs_with_inv.rps, rg_ctx.required_trait_map.values(),
+        rg_ctx.forbidden_trait_map.values())
    provs_with_inv.filter_by_rp(rp_tuples_with_trait)
    LOG.debug("found %d providers under %d trees after applying "
              "traits filter - required: %s, forbidden: %s",
@ -767,85 +767,113 @@ def _get_trees_with_traits(ctx, rp_ids, required_traits, forbidden_traits):

    :param ctx: Session context to use
    :param rp_ids: a set of resource provider IDs
-    :param required_traits: A map, keyed by trait string name, of required
-                            trait internal IDs that each provider TREE must
-                            COLLECTIVELY have associated with it
-    :param forbidden_traits: A map, keyed by trait string name, of trait
-                             internal IDs that a resource provider must
-                             not have.
+    :param required_traits: A list of set of trait internal IDs where the
+       traits in each nested set are OR'd while the items in the outer list are
+       AND'd together. The RPs in the tree should COLLECTIVELY fulfill this
+       trait request.
+    :param forbidden_traits: A list of trait internal IDs that a resource
+        provider tree must not have.
    """
-    # We now want to restrict the returned providers to only those provider
-    # trees that have all our required traits.
-    #
-    # The SQL we want looks like this:
-    #
-    # SELECT outer_rp.id, outer_rp.root_provider_id
-    # FROM resource_providers AS outer_rp
-    # JOIN (
-    #   SELECT rp.root_provider_id
-    #   FROM resource_providers AS rp
-    #   # Only if we have required traits...
-    #   INNER JOIN resource_provider_traits AS rptt
-    #   ON rp.id = rptt.resource_provider_id
-    #   AND rptt.trait_id IN ($REQUIRED_TRAIT_IDS)
-    #   # Only if we have forbidden_traits...
-    #   LEFT JOIN resource_provider_traits AS rptt_forbid
-    #   ON rp.id = rptt_forbid.resource_provider_id
-    #   AND rptt_forbid.trait_id IN ($FORBIDDEN_TRAIT_IDS)
-    #   WHERE rp.id IN ($RP_IDS)
-    #   # Only if we have forbidden traits...
-    #   AND rptt_forbid.resource_provider_id IS NULL
-    #   GROUP BY rp.root_provider_id
-    #   # Only if have required traits...
-    #   HAVING COUNT(DISTINCT rptt.trait_id) == $NUM_REQUIRED_TRAITS
-    # ) AS trees_with_traits
-    #  ON outer_rp.root_provider_id = trees_with_traits.root_provider_id
-    rpt = sa.alias(_RP_TBL, name="rp")
-    cond = [rpt.c.id.in_(rp_ids)]
-    subq = sa.select([rpt.c.root_provider_id])
-    subq_join = None
-    if required_traits:
-        rptt = sa.alias(_RP_TRAIT_TBL, name="rptt")
-        rpt_to_rptt = sa.join(
-            rpt, rptt, sa.and_(
-                rpt.c.id == rptt.c.resource_provider_id,
-                rptt.c.trait_id.in_(required_traits.values())))
-        subq_join = rpt_to_rptt
-        # Only get the resource providers that have ALL the required traits,
-        # so we need to GROUP BY the root provider and ensure that the
-        # COUNT(trait_id) is equal to the number of traits we are requiring
-        num_traits = len(required_traits)
-        having_cond = sa.func.count(sa.distinct(rptt.c.trait_id)) == num_traits
-        subq = subq.having(having_cond)
+    # FIXME(gibi): This is a temporary fallback to the old calling convention
+    # when required_traits was a flat list of trait ids. We translate such
+    # parameter of the new nested structure with the same meaning.
+    # This code should be removed once each caller is adapted to call this
+    # with the new structure
+    if all(not isinstance(trait, set) for trait in required_traits):
+        # old value: required_traits = [A, B, C] -> A and B and C
+        # new value: required_traits = [{A}, {B}, {C}] -> (A) and (B) and (C)
+        # the () part could be a set of traits with OR relationship but
+        # the old callers does not support such OR relationship hence the old
+        # flat structure
+        required_traits = [{trait} for trait in required_traits]

-    # Tack on an additional LEFT JOIN clause inside the derived table if we've
-    # got forbidden traits in the mix.
-    if forbidden_traits:
-        rptt_forbid = sa.alias(_RP_TRAIT_TBL, name="rptt_forbid")
-        join_to = rpt
-        if subq_join is not None:
-            join_to = subq_join
-        rpt_to_rptt_forbid = sa.outerjoin(
-            join_to, rptt_forbid, sa.and_(
-                rpt.c.id == rptt_forbid.c.resource_provider_id,
-                rptt_forbid.c.trait_id.in_(forbidden_traits.values())))
-        cond.append(rptt_forbid.c.resource_provider_id == sa.null())
-        subq_join = rpt_to_rptt_forbid
+    # TODO(gibi): if somebody can formulate the below three SQL query to a
+    # single one then probably that will improve performance

-    subq = subq.select_from(subq_join)
-    subq = subq.where(sa.and_(*cond))
-    subq = subq.group_by(rpt.c.root_provider_id)
-    trees_with_traits = sa.alias(subq, name="trees_with_traits")
+    # Get the root of all rps in the rp_ids as we need to return every rp from
+    # rp_ids that is in a matching tree but below we will filter out rps by
+    # traits. So we need a copy and also that copy needs to associate rps to
+    # trees by root_id
+    rpt = sa.alias(_RP_TBL, name='rpt')
+    sel = sa.select([rpt.c.id, rpt.c.root_provider_id]).select_from(rpt)
+    sel = sel.where(rpt.c.id.in_(rp_ids))
+    res = ctx.session.execute(sel).fetchall()
+    original_rp_ids = {rp_id: root_id for rp_id, root_id in res}

-    outer_rps = sa.alias(_RP_TBL, name="outer_rps")
-    outer_to_subq = sa.join(
-        outer_rps, trees_with_traits,
-        outer_rps.c.root_provider_id == trees_with_traits.c.root_provider_id)
-    sel = sa.select([outer_rps.c.id, outer_rps.c.root_provider_id])
-    sel = sel.select_from(outer_to_subq)
+    # First filter out the rps from the rp_ids list that provide forbidden
+    # traits. To do that we collect those rps that provide any of the forbidden
+    # traits and with the outer join and the null check we filter them out
+    # of the result
+    rptt_forbidden = sa.alias(_RP_TRAIT_TBL, name="rptt_forbidden")
+    rp_to_trait = sa.outerjoin(
+        rpt, rptt_forbidden,
+        sa.and_(
+            rpt.c.id == rptt_forbidden.c.resource_provider_id,
+            rptt_forbidden.c.trait_id.in_(forbidden_traits)
+        )
+    )
+    sel = sa.select(
+        [rpt.c.id, rpt.c.root_provider_id]).select_from(rp_to_trait)
+    sel = sel.where(
+        sa.and_(
+            rpt.c.id.in_(original_rp_ids.keys()),
+            rptt_forbidden.c.trait_id == sa.null()
+        )
+    )
    res = ctx.session.execute(sel).fetchall()

-    return set((rp_id, root_id) for rp_id, root_id in res)
+    # These are the rps that does not provide any forbidden traits
+    good_rp_ids = {}
+    for rp_id, root_id in res:
+        good_rp_ids[rp_id] = root_id
+
+    # shortcut if no traits required the good_rp_ids.values() contains all the
+    # good roots
+    if not required_traits:
+        return {
+            (rp_id, root_id)
+            for rp_id, root_id in original_rp_ids.items()
+            if root_id in good_rp_ids.values()
+        }
+
+    # now get the traits provided by the good rps per tree
+    rptt = sa.alias(_RP_TRAIT_TBL, name="rptt")
+    rp_to_trait = sa.join(
+        rpt, rptt, rpt.c.id == rptt.c.resource_provider_id)
+    sel = sa.select(
+        [rpt.c.root_provider_id, rptt.c.trait_id]
+    ).select_from(rp_to_trait)
+    sel = sel.where(rpt.c.id.in_(good_rp_ids))
+    res = ctx.session.execute(sel).fetchall()
+
+    root_to_traits = collections.defaultdict(set)
+    for root_id, trait_id in res:
+        root_to_traits[root_id].add(trait_id)
+
+    result = set()
+
+    # filter the trees by checking if each tree provides all the
+    # required_traits
+    for root_id, provided_traits in root_to_traits.items():
+        # we need a match for all the items from the outer list of the
+        # required_traits as that describes AND relationship
+        if all(
+            # we need at least one match per nested trait set as that set
+            # describes OR relationship
+            any_traits.intersection(provided_traits)
+            for any_traits in required_traits
+        ):
+            # This tree is matching the required traits so add result all the
+            # rps from the original rp_ids that belongs to this tree
+            result.update(
+                {
+                    (rp_id, root_id)
+                    for rp_id, original_root_id in original_rp_ids.items()
+                    if root_id == original_root_id
+                }
+
+            )
+    return result


@db_api.placement_context_manager.reader
--- a/placement/tests/functional/db/test_allocation_candidates.py
+++ b/placement/tests/functional/db/test_allocation_candidates.py
@ -776,9 +776,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        rp_ids, avx2_t, ssd_t, geneve_t, ssl_t = self._make_trees_with_traits()

        # Case1: required on root
-        required_traits = {
-            avx2_t.name: avx2_t.id,
-        }
+        required_traits = [{avx2_t.id}]
        forbidden_traits = {}

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -791,13 +789,9 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        self.assertEqual(expect_root_ids, tree_root_ids)

        # Case1': required on root with forbidden traits
-        # Let's validate that cn3 dissapears
-        required_traits = {
-            avx2_t.name: avx2_t.id,
-        }
-        forbidden_traits = {
-            ssd_t.name: ssd_t.id,
-        }
+        # Let's validate that cn3 disappears
+        required_traits = [{avx2_t.id}]
+        forbidden_traits = {ssd_t.id}

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
            self.ctx, rp_ids, required_traits, forbidden_traits)
@ -809,10 +803,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        self.assertEqual(expect_root_ids, tree_root_ids)

        # Case2: multiple required on root
-        required_traits = {
-            avx2_t.name: avx2_t.id,
-            ssd_t.name: ssd_t.id
-        }
+        required_traits = [{avx2_t.id}, {ssd_t.id}]
        forbidden_traits = {}

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -825,9 +816,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        self.assertEqual(expect_root_ids, tree_root_ids)

        # Case3: required on child
-        required_traits = {
-            geneve_t.name: geneve_t.id
-        }
+        required_traits = [{geneve_t.id}]
        forbidden_traits = {}

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -840,13 +829,9 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        self.assertEqual(expect_root_ids, tree_root_ids)

        # Case3': required on child with forbidden traits
-        # Let's validate that cn4 dissapears
-        required_traits = {
-            geneve_t.name: geneve_t.id
-        }
-        forbidden_traits = {
-            ssl_t.name: ssl_t.id
-        }
+        # Let's validate that cn4 disappears
+        required_traits = [{geneve_t.id}]
+        forbidden_traits = {ssl_t.id}

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
            self.ctx, rp_ids, required_traits, forbidden_traits)
@ -858,10 +843,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        self.assertEqual(expect_root_ids, tree_root_ids)

        # Case4: multiple required on child
-        required_traits = {
-            geneve_t.name: geneve_t.id,
-            ssl_t.name: ssl_t.id
-        }
+        required_traits = [{geneve_t.id}, {ssl_t.id}]
        forbidden_traits = {}

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -874,10 +856,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        self.assertEqual(expect_root_ids, tree_root_ids)

        # Case5: required on root and child
-        required_traits = {
-            avx2_t.name: avx2_t.id,
-            geneve_t.name: geneve_t.id
-        }
+        required_traits = [{avx2_t.id}, {geneve_t.id}]
        forbidden_traits = {}

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -901,11 +880,8 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        tb.set_traits(cn1, 'CUSTOM_FOO')
        custom_foo = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_FOO')

-        required_traits = {
-        }
-        forbidden_traits = {
-            custom_foo.name: custom_foo.id,
-        }
+        required_traits = []
+        forbidden_traits = {custom_foo.id}
        rp_ids = {cn1.id, cn1_c1.id}  # both RP from the tree

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -939,11 +915,8 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        custom_foo = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_FOO')
        tb.set_traits(cn1_c1, 'CUSTOM_FOO')

-        required_traits = {
-        }
-        forbidden_traits = {
-            custom_foo.name: custom_foo.id,
-        }
+        required_traits = []
+        forbidden_traits = {custom_foo.id}
        rp_ids = {cn1.id, cn1_c1.id}

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -964,12 +937,8 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        custom_foo = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_FOO')
        custom_bar = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_BAR')

-        required_traits = {
-            custom_bar.name: custom_bar.id
-        }
-        forbidden_traits = {
-            custom_foo.name: custom_foo.id,
-        }
+        required_traits = [{custom_bar.id}]
+        forbidden_traits = {custom_foo.id}
        rp_ids = {cn1.id, cn1_c1.id}

        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -988,6 +957,184 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
        # forbidden trait. There is no other rps in the tree to be considered.
        self.assertEqual(set(), rp_tuples_with_trait)

+    def make_tree_for_any_traits(self, tree_index, trait_list):
+        """Create an RP tree with traits
+                     CNx
+                    /   \
+              CNx_C1    CNx_C2
+                |
+              CNx_C1_GC1
+        """
+        cn_name = f'cn{tree_index}'
+        cn = self._create_provider(cn_name)
+        cn_c1 = self._create_provider(cn_name + 'c1', parent=cn.uuid)
+        cn_c1_gc1 = self._create_provider(
+            cn_name + 'c1_gc1', parent=cn_c1.uuid)
+        cn_c2 = self._create_provider(cn_name + 'c2', parent=cn.uuid)
+
+        rps = [cn, cn_c1, cn_c2, cn_c1_gc1]
+
+        for rp, traits in zip(rps, trait_list):
+            tb.set_traits(rp, *traits)
+
+        return [(rp.id, cn.id) for rp in rps]
+
+    def make_trees_with_traits_for_any_traits(self, rp_trait_list):
+        rp_ids = []
+
+        for index, rp_traits in rp_trait_list:
+            rp_ids += self.make_tree_for_any_traits(index, rp_traits)
+
+        return rp_ids
+
+    def test_get_trees_with_traits_any_traits(self):
+        """We are setting up multiple RP trees with the same structure but
+        with different traits. The structure is
+                     CNx
+                    /   \
+              CNx_C1    CNx_C2
+                |
+              CNx_C1_GC1
+
+        The required trait query is ((A or B) and C). Then we assert that
+        only the matching trees are returned.
+
+        """
+        a = 'CUSTOM_A'
+        b = 'CUSTOM_B'
+        c = 'CUSTOM_C'
+
+        matching_trees = [
+            #     CN         C1      C2      C1_GC1
+            (1,  [[a, b, c], [],     [],     [], ], ), # noqa
+            (2,  [[a, c],    [b],    [],     [], ], ), # noqa
+            (3,  [[a],       [b, c], [],     [], ], ), # noqa
+            (4,  [[a],       [b],    [c],    [], ], ), # noqa
+            (5,  [[c],       [b],    [a],    [], ], ), # noqa
+            (6,  [[],        [a],    [b],    [c], ], ), # noqa
+            (7,  [[c],       [],     [a, b], [], ], ), # noqa
+            (8,  [[c],       [],     [],     [a, b], ], ), # noqa
+            (9,  [[a, b],    [b],    [a],    [c], ], ), # noqa
+            (10, [[b, c],    [],     [],     [], ], ), # noqa
+            (11, [[c],       [a],    [],     [], ], ), # noqa
+            (12, [[a],       [],     [c],    [], ], ), # noqa
+            (13, [[b],       [],     [],     [c], ], ), # noqa
+            (14, [[],        [b],    [],     [c], ], ), # noqa
+        ]
+
+        non_matching_trees = [
+            #     CN         C1      C2      C1_GC1
+            (15, [[a, b],    [],     [],     [], ], ), # noqa
+            (16, [[],        [a],    [],     [b], ], ), # noqa
+            (17, [[c],       [],     [],     [], ], ), # noqa
+            (18, [[],        [c],    [],     [], ], ), # noqa
+            (19, [[],        [],     [a],    [], ], ), # noqa
+        ]
+
+        matching_rp_ids = self.make_trees_with_traits_for_any_traits(
+            matching_trees)
+
+        non_matching_rp_ids = self.make_trees_with_traits_for_any_traits(
+            non_matching_trees)
+
+        trait_a = trait_obj.Trait.get_by_name(self.ctx, a).id
+        trait_b = trait_obj.Trait.get_by_name(self.ctx, b).id
+        trait_c = trait_obj.Trait.get_by_name(self.ctx, c).id
+
+        # (A or B) and C
+        required_traits = [{trait_a, trait_b}, {trait_c}]
+
+        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
+            self.ctx,
+            {rp_id for rp_id, _ in matching_rp_ids + non_matching_rp_ids},
+            required_traits,
+            {}
+        )
+        # we check that every RP from every tree we expected to match is
+        # returned and none of the RPs from the other trees are returned
+        self.assertEqual(set(matching_rp_ids), rp_tuples_with_trait)
+
+    def test_get_trees_with_traits_any_traits_forbidden(self):
+        """Query RP trees with complex trait query involving both AND and OR
+        and forbidden traits
+
+        We use the following tree structure for these test with specific
+        traits.
+                     CN1 CUSTOM_A
+                    /   \
+              CN1_C1    CN1_C2 CUSTOM_B,
+                |
+              CN1_C1_GC1 CUSTOM_C
+
+        And each node has one extra custom trait with its own name so the test
+        can easily forbid one or more RPs directly from the tree.
+
+        We use the formula (CUSTOM_A or CUSTOM_B) and CUSTOM_C) in this test.
+        Then we do the following cases where forbidden traits remove RPs:
+
+            1) with an unnecessary trait -> OK
+            2) with one side of an OR -> OK
+            3) with both side of an OR -> NOK
+            4) with one side of an AND -> NOK
+        """
+        cn1 = self._create_provider('cn1')
+        tb.set_traits(cn1, 'CUSTOM_A', 'CUSTOM_CN1')
+        cn1_c1 = self._create_provider('cn1_c1', parent=cn1.uuid)
+        tb.set_traits(cn1_c1, 'CUSTOM_CN1_C1')
+        cn1_c1_gc1 = self._create_provider('cn1_c1_gc1', parent=cn1_c1.uuid)
+        tb.set_traits(cn1_c1_gc1, 'CUSTOM_C', 'CUSTOM_CN1_C1_GC1')
+        cn1_c2 = self._create_provider('cn1_c2', parent=cn1.uuid)
+        tb.set_traits(cn1_c2, 'CUSTOM_B', 'CUSTOM_CN1_C2')
+
+        trait_a = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_A').id
+        trait_b = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_B').id
+        trait_c = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_C').id
+
+        trait_cn1 = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_CN1').id
+        trait_cn1_c1 = trait_obj.Trait.get_by_name(
+            self.ctx, 'CUSTOM_CN1_C1').id
+        trait_cn1_c1_gc1 = trait_obj.Trait.get_by_name(
+            self.ctx, 'CUSTOM_CN1_C1_GC1').id
+        trait_cn1_c2 = trait_obj.Trait.get_by_name(
+            self.ctx, 'CUSTOM_CN1_C2').id
+
+        rp_ids = {cn1.id, cn1_c1.id, cn1_c1_gc1.id, cn1_c2.id}
+        expected_whole_tree = {(rp_id, cn1.id) for rp_id in rp_ids}
+
+        # (A or B) and C
+        required_traits = [{trait_a, trait_b}, {trait_c}]
+
+        # 1) forbid CN1_C1 but that is not needed
+        forbidden_traits = {trait_cn1_c1}
+
+        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
+            self.ctx, rp_ids, required_traits, forbidden_traits)
+        self.assertEqual(expected_whole_tree, rp_tuples_with_trait)
+
+        # 2) forbid CN1_C2 which has trait B. But trait A is also enough, and
+        # we have that on CN1 so this should still match
+        forbidden_traits = {trait_cn1_c2}
+
+        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
+            self.ctx, rp_ids, required_traits, forbidden_traits)
+        self.assertEqual(expected_whole_tree, rp_tuples_with_trait)
+
+        # 3) forbid CN1 and CN1_C2. This means neither trait A nor B is
+        # available so this is expected to not produce a match
+        forbidden_traits = {trait_cn1_c2, trait_cn1}
+
+        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
+            self.ctx, rp_ids, required_traits, forbidden_traits)
+        self.assertEqual(set(), rp_tuples_with_trait)
+
+        # 4) forbid CN1_C1_GC1. This means neither trait C is not available.
+        # So (A or B) and C cannot be fulfilled.
+        forbidden_traits = {trait_cn1_c1_gc1}
+
+        rp_tuples_with_trait = res_ctx._get_trees_with_traits(
+            self.ctx, rp_ids, required_traits, forbidden_traits)
+        self.assertEqual(set(), rp_tuples_with_trait)
+
    def test_get_roots_with_traits(self):
        _, avx2_t, ssd_t, geneve_t, ssl_t = self._make_trees_with_traits()