Extend the RP tree DB query to support any-traits

This extends the RP tree query at the DB layer to support any-traits with a
nested required_traits syntax [{A, B}, {C}] meaning ((A or B) and C).
The object and API layers do not support such queries yet.

Story: 2005345
Story: 2005346
Change-Id: I8704fe7350f74e0567e574eb00fc40b330817381
This commit is contained in:
Balazs Gibizer 2022-01-21 19:18:17 +01:00
parent 849c9afd2e
commit c19481a5f3
2 changed files with 298 additions and 123 deletions

View File

@ -729,8 +729,8 @@ def get_trees_matching_all(rg_ctx, rw_ctx):
# capacity and that set of providers (grouped by their tree) have all
# of the required traits and none of the forbidden traits
rp_tuples_with_trait = _get_trees_with_traits(
rg_ctx.context, provs_with_inv.rps, rg_ctx.required_trait_map,
rg_ctx.forbidden_trait_map)
rg_ctx.context, provs_with_inv.rps, rg_ctx.required_trait_map.values(),
rg_ctx.forbidden_trait_map.values())
provs_with_inv.filter_by_rp(rp_tuples_with_trait)
LOG.debug("found %d providers under %d trees after applying "
"traits filter - required: %s, forbidden: %s",
@ -767,85 +767,113 @@ def _get_trees_with_traits(ctx, rp_ids, required_traits, forbidden_traits):
:param ctx: Session context to use
:param rp_ids: a set of resource provider IDs
:param required_traits: A map, keyed by trait string name, of required
trait internal IDs that each provider TREE must
COLLECTIVELY have associated with it
:param forbidden_traits: A map, keyed by trait string name, of trait
internal IDs that a resource provider must
not have.
:param required_traits: A list of set of trait internal IDs where the
traits in each nested set are OR'd while the items in the outer list are
AND'd together. The RPs in the tree should COLLECTIVELY fulfill this
trait request.
:param forbidden_traits: A list of trait internal IDs that a resource
provider tree must not have.
"""
# We now want to restrict the returned providers to only those provider
# trees that have all our required traits.
#
# The SQL we want looks like this:
#
# SELECT outer_rp.id, outer_rp.root_provider_id
# FROM resource_providers AS outer_rp
# JOIN (
# SELECT rp.root_provider_id
# FROM resource_providers AS rp
# # Only if we have required traits...
# INNER JOIN resource_provider_traits AS rptt
# ON rp.id = rptt.resource_provider_id
# AND rptt.trait_id IN ($REQUIRED_TRAIT_IDS)
# # Only if we have forbidden_traits...
# LEFT JOIN resource_provider_traits AS rptt_forbid
# ON rp.id = rptt_forbid.resource_provider_id
# AND rptt_forbid.trait_id IN ($FORBIDDEN_TRAIT_IDS)
# WHERE rp.id IN ($RP_IDS)
# # Only if we have forbidden traits...
# AND rptt_forbid.resource_provider_id IS NULL
# GROUP BY rp.root_provider_id
# # Only if have required traits...
# HAVING COUNT(DISTINCT rptt.trait_id) == $NUM_REQUIRED_TRAITS
# ) AS trees_with_traits
# ON outer_rp.root_provider_id = trees_with_traits.root_provider_id
rpt = sa.alias(_RP_TBL, name="rp")
cond = [rpt.c.id.in_(rp_ids)]
subq = sa.select([rpt.c.root_provider_id])
subq_join = None
if required_traits:
rptt = sa.alias(_RP_TRAIT_TBL, name="rptt")
rpt_to_rptt = sa.join(
rpt, rptt, sa.and_(
rpt.c.id == rptt.c.resource_provider_id,
rptt.c.trait_id.in_(required_traits.values())))
subq_join = rpt_to_rptt
# Only get the resource providers that have ALL the required traits,
# so we need to GROUP BY the root provider and ensure that the
# COUNT(trait_id) is equal to the number of traits we are requiring
num_traits = len(required_traits)
having_cond = sa.func.count(sa.distinct(rptt.c.trait_id)) == num_traits
subq = subq.having(having_cond)
# FIXME(gibi): This is a temporary fallback to the old calling convention
# when required_traits was a flat list of trait ids. We translate such
# parameter of the new nested structure with the same meaning.
# This code should be removed once each caller is adapted to call this
# with the new structure
if all(not isinstance(trait, set) for trait in required_traits):
# old value: required_traits = [A, B, C] -> A and B and C
# new value: required_traits = [{A}, {B}, {C}] -> (A) and (B) and (C)
# the () part could be a set of traits with OR relationship but
# the old callers does not support such OR relationship hence the old
# flat structure
required_traits = [{trait} for trait in required_traits]
# Tack on an additional LEFT JOIN clause inside the derived table if we've
# got forbidden traits in the mix.
if forbidden_traits:
rptt_forbid = sa.alias(_RP_TRAIT_TBL, name="rptt_forbid")
join_to = rpt
if subq_join is not None:
join_to = subq_join
rpt_to_rptt_forbid = sa.outerjoin(
join_to, rptt_forbid, sa.and_(
rpt.c.id == rptt_forbid.c.resource_provider_id,
rptt_forbid.c.trait_id.in_(forbidden_traits.values())))
cond.append(rptt_forbid.c.resource_provider_id == sa.null())
subq_join = rpt_to_rptt_forbid
# TODO(gibi): if somebody can formulate the below three SQL query to a
# single one then probably that will improve performance
subq = subq.select_from(subq_join)
subq = subq.where(sa.and_(*cond))
subq = subq.group_by(rpt.c.root_provider_id)
trees_with_traits = sa.alias(subq, name="trees_with_traits")
# Get the root of all rps in the rp_ids as we need to return every rp from
# rp_ids that is in a matching tree but below we will filter out rps by
# traits. So we need a copy and also that copy needs to associate rps to
# trees by root_id
rpt = sa.alias(_RP_TBL, name='rpt')
sel = sa.select([rpt.c.id, rpt.c.root_provider_id]).select_from(rpt)
sel = sel.where(rpt.c.id.in_(rp_ids))
res = ctx.session.execute(sel).fetchall()
original_rp_ids = {rp_id: root_id for rp_id, root_id in res}
outer_rps = sa.alias(_RP_TBL, name="outer_rps")
outer_to_subq = sa.join(
outer_rps, trees_with_traits,
outer_rps.c.root_provider_id == trees_with_traits.c.root_provider_id)
sel = sa.select([outer_rps.c.id, outer_rps.c.root_provider_id])
sel = sel.select_from(outer_to_subq)
# First filter out the rps from the rp_ids list that provide forbidden
# traits. To do that we collect those rps that provide any of the forbidden
# traits and with the outer join and the null check we filter them out
# of the result
rptt_forbidden = sa.alias(_RP_TRAIT_TBL, name="rptt_forbidden")
rp_to_trait = sa.outerjoin(
rpt, rptt_forbidden,
sa.and_(
rpt.c.id == rptt_forbidden.c.resource_provider_id,
rptt_forbidden.c.trait_id.in_(forbidden_traits)
)
)
sel = sa.select(
[rpt.c.id, rpt.c.root_provider_id]).select_from(rp_to_trait)
sel = sel.where(
sa.and_(
rpt.c.id.in_(original_rp_ids.keys()),
rptt_forbidden.c.trait_id == sa.null()
)
)
res = ctx.session.execute(sel).fetchall()
return set((rp_id, root_id) for rp_id, root_id in res)
# These are the rps that does not provide any forbidden traits
good_rp_ids = {}
for rp_id, root_id in res:
good_rp_ids[rp_id] = root_id
# shortcut if no traits required the good_rp_ids.values() contains all the
# good roots
if not required_traits:
return {
(rp_id, root_id)
for rp_id, root_id in original_rp_ids.items()
if root_id in good_rp_ids.values()
}
# now get the traits provided by the good rps per tree
rptt = sa.alias(_RP_TRAIT_TBL, name="rptt")
rp_to_trait = sa.join(
rpt, rptt, rpt.c.id == rptt.c.resource_provider_id)
sel = sa.select(
[rpt.c.root_provider_id, rptt.c.trait_id]
).select_from(rp_to_trait)
sel = sel.where(rpt.c.id.in_(good_rp_ids))
res = ctx.session.execute(sel).fetchall()
root_to_traits = collections.defaultdict(set)
for root_id, trait_id in res:
root_to_traits[root_id].add(trait_id)
result = set()
# filter the trees by checking if each tree provides all the
# required_traits
for root_id, provided_traits in root_to_traits.items():
# we need a match for all the items from the outer list of the
# required_traits as that describes AND relationship
if all(
# we need at least one match per nested trait set as that set
# describes OR relationship
any_traits.intersection(provided_traits)
for any_traits in required_traits
):
# This tree is matching the required traits so add result all the
# rps from the original rp_ids that belongs to this tree
result.update(
{
(rp_id, root_id)
for rp_id, original_root_id in original_rp_ids.items()
if root_id == original_root_id
}
)
return result
@db_api.placement_context_manager.reader

View File

@ -776,9 +776,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
rp_ids, avx2_t, ssd_t, geneve_t, ssl_t = self._make_trees_with_traits()
# Case1: required on root
required_traits = {
avx2_t.name: avx2_t.id,
}
required_traits = [{avx2_t.id}]
forbidden_traits = {}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -791,13 +789,9 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
self.assertEqual(expect_root_ids, tree_root_ids)
# Case1': required on root with forbidden traits
# Let's validate that cn3 dissapears
required_traits = {
avx2_t.name: avx2_t.id,
}
forbidden_traits = {
ssd_t.name: ssd_t.id,
}
# Let's validate that cn3 disappears
required_traits = [{avx2_t.id}]
forbidden_traits = {ssd_t.id}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
self.ctx, rp_ids, required_traits, forbidden_traits)
@ -809,10 +803,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
self.assertEqual(expect_root_ids, tree_root_ids)
# Case2: multiple required on root
required_traits = {
avx2_t.name: avx2_t.id,
ssd_t.name: ssd_t.id
}
required_traits = [{avx2_t.id}, {ssd_t.id}]
forbidden_traits = {}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -825,9 +816,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
self.assertEqual(expect_root_ids, tree_root_ids)
# Case3: required on child
required_traits = {
geneve_t.name: geneve_t.id
}
required_traits = [{geneve_t.id}]
forbidden_traits = {}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -840,13 +829,9 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
self.assertEqual(expect_root_ids, tree_root_ids)
# Case3': required on child with forbidden traits
# Let's validate that cn4 dissapears
required_traits = {
geneve_t.name: geneve_t.id
}
forbidden_traits = {
ssl_t.name: ssl_t.id
}
# Let's validate that cn4 disappears
required_traits = [{geneve_t.id}]
forbidden_traits = {ssl_t.id}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
self.ctx, rp_ids, required_traits, forbidden_traits)
@ -858,10 +843,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
self.assertEqual(expect_root_ids, tree_root_ids)
# Case4: multiple required on child
required_traits = {
geneve_t.name: geneve_t.id,
ssl_t.name: ssl_t.id
}
required_traits = [{geneve_t.id}, {ssl_t.id}]
forbidden_traits = {}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -874,10 +856,7 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
self.assertEqual(expect_root_ids, tree_root_ids)
# Case5: required on root and child
required_traits = {
avx2_t.name: avx2_t.id,
geneve_t.name: geneve_t.id
}
required_traits = [{avx2_t.id}, {geneve_t.id}]
forbidden_traits = {}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -901,11 +880,8 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
tb.set_traits(cn1, 'CUSTOM_FOO')
custom_foo = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_FOO')
required_traits = {
}
forbidden_traits = {
custom_foo.name: custom_foo.id,
}
required_traits = []
forbidden_traits = {custom_foo.id}
rp_ids = {cn1.id, cn1_c1.id} # both RP from the tree
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -939,11 +915,8 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
custom_foo = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_FOO')
tb.set_traits(cn1_c1, 'CUSTOM_FOO')
required_traits = {
}
forbidden_traits = {
custom_foo.name: custom_foo.id,
}
required_traits = []
forbidden_traits = {custom_foo.id}
rp_ids = {cn1.id, cn1_c1.id}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -964,12 +937,8 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
custom_foo = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_FOO')
custom_bar = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_BAR')
required_traits = {
custom_bar.name: custom_bar.id
}
forbidden_traits = {
custom_foo.name: custom_foo.id,
}
required_traits = [{custom_bar.id}]
forbidden_traits = {custom_foo.id}
rp_ids = {cn1.id, cn1_c1.id}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
@ -988,6 +957,184 @@ class ProviderTreeDBHelperTestCase(tb.PlacementDbBaseTestCase):
# forbidden trait. There is no other rps in the tree to be considered.
self.assertEqual(set(), rp_tuples_with_trait)
def make_tree_for_any_traits(self, tree_index, trait_list):
"""Create an RP tree with traits
CNx
/ \
CNx_C1 CNx_C2
|
CNx_C1_GC1
"""
cn_name = f'cn{tree_index}'
cn = self._create_provider(cn_name)
cn_c1 = self._create_provider(cn_name + 'c1', parent=cn.uuid)
cn_c1_gc1 = self._create_provider(
cn_name + 'c1_gc1', parent=cn_c1.uuid)
cn_c2 = self._create_provider(cn_name + 'c2', parent=cn.uuid)
rps = [cn, cn_c1, cn_c2, cn_c1_gc1]
for rp, traits in zip(rps, trait_list):
tb.set_traits(rp, *traits)
return [(rp.id, cn.id) for rp in rps]
def make_trees_with_traits_for_any_traits(self, rp_trait_list):
rp_ids = []
for index, rp_traits in rp_trait_list:
rp_ids += self.make_tree_for_any_traits(index, rp_traits)
return rp_ids
def test_get_trees_with_traits_any_traits(self):
"""We are setting up multiple RP trees with the same structure but
with different traits. The structure is
CNx
/ \
CNx_C1 CNx_C2
|
CNx_C1_GC1
The required trait query is ((A or B) and C). Then we assert that
only the matching trees are returned.
"""
a = 'CUSTOM_A'
b = 'CUSTOM_B'
c = 'CUSTOM_C'
matching_trees = [
# CN C1 C2 C1_GC1
(1, [[a, b, c], [], [], [], ], ), # noqa
(2, [[a, c], [b], [], [], ], ), # noqa
(3, [[a], [b, c], [], [], ], ), # noqa
(4, [[a], [b], [c], [], ], ), # noqa
(5, [[c], [b], [a], [], ], ), # noqa
(6, [[], [a], [b], [c], ], ), # noqa
(7, [[c], [], [a, b], [], ], ), # noqa
(8, [[c], [], [], [a, b], ], ), # noqa
(9, [[a, b], [b], [a], [c], ], ), # noqa
(10, [[b, c], [], [], [], ], ), # noqa
(11, [[c], [a], [], [], ], ), # noqa
(12, [[a], [], [c], [], ], ), # noqa
(13, [[b], [], [], [c], ], ), # noqa
(14, [[], [b], [], [c], ], ), # noqa
]
non_matching_trees = [
# CN C1 C2 C1_GC1
(15, [[a, b], [], [], [], ], ), # noqa
(16, [[], [a], [], [b], ], ), # noqa
(17, [[c], [], [], [], ], ), # noqa
(18, [[], [c], [], [], ], ), # noqa
(19, [[], [], [a], [], ], ), # noqa
]
matching_rp_ids = self.make_trees_with_traits_for_any_traits(
matching_trees)
non_matching_rp_ids = self.make_trees_with_traits_for_any_traits(
non_matching_trees)
trait_a = trait_obj.Trait.get_by_name(self.ctx, a).id
trait_b = trait_obj.Trait.get_by_name(self.ctx, b).id
trait_c = trait_obj.Trait.get_by_name(self.ctx, c).id
# (A or B) and C
required_traits = [{trait_a, trait_b}, {trait_c}]
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
self.ctx,
{rp_id for rp_id, _ in matching_rp_ids + non_matching_rp_ids},
required_traits,
{}
)
# we check that every RP from every tree we expected to match is
# returned and none of the RPs from the other trees are returned
self.assertEqual(set(matching_rp_ids), rp_tuples_with_trait)
def test_get_trees_with_traits_any_traits_forbidden(self):
"""Query RP trees with complex trait query involving both AND and OR
and forbidden traits
We use the following tree structure for these test with specific
traits.
CN1 CUSTOM_A
/ \
CN1_C1 CN1_C2 CUSTOM_B,
|
CN1_C1_GC1 CUSTOM_C
And each node has one extra custom trait with its own name so the test
can easily forbid one or more RPs directly from the tree.
We use the formula (CUSTOM_A or CUSTOM_B) and CUSTOM_C) in this test.
Then we do the following cases where forbidden traits remove RPs:
1) with an unnecessary trait -> OK
2) with one side of an OR -> OK
3) with both side of an OR -> NOK
4) with one side of an AND -> NOK
"""
cn1 = self._create_provider('cn1')
tb.set_traits(cn1, 'CUSTOM_A', 'CUSTOM_CN1')
cn1_c1 = self._create_provider('cn1_c1', parent=cn1.uuid)
tb.set_traits(cn1_c1, 'CUSTOM_CN1_C1')
cn1_c1_gc1 = self._create_provider('cn1_c1_gc1', parent=cn1_c1.uuid)
tb.set_traits(cn1_c1_gc1, 'CUSTOM_C', 'CUSTOM_CN1_C1_GC1')
cn1_c2 = self._create_provider('cn1_c2', parent=cn1.uuid)
tb.set_traits(cn1_c2, 'CUSTOM_B', 'CUSTOM_CN1_C2')
trait_a = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_A').id
trait_b = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_B').id
trait_c = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_C').id
trait_cn1 = trait_obj.Trait.get_by_name(self.ctx, 'CUSTOM_CN1').id
trait_cn1_c1 = trait_obj.Trait.get_by_name(
self.ctx, 'CUSTOM_CN1_C1').id
trait_cn1_c1_gc1 = trait_obj.Trait.get_by_name(
self.ctx, 'CUSTOM_CN1_C1_GC1').id
trait_cn1_c2 = trait_obj.Trait.get_by_name(
self.ctx, 'CUSTOM_CN1_C2').id
rp_ids = {cn1.id, cn1_c1.id, cn1_c1_gc1.id, cn1_c2.id}
expected_whole_tree = {(rp_id, cn1.id) for rp_id in rp_ids}
# (A or B) and C
required_traits = [{trait_a, trait_b}, {trait_c}]
# 1) forbid CN1_C1 but that is not needed
forbidden_traits = {trait_cn1_c1}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
self.ctx, rp_ids, required_traits, forbidden_traits)
self.assertEqual(expected_whole_tree, rp_tuples_with_trait)
# 2) forbid CN1_C2 which has trait B. But trait A is also enough, and
# we have that on CN1 so this should still match
forbidden_traits = {trait_cn1_c2}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
self.ctx, rp_ids, required_traits, forbidden_traits)
self.assertEqual(expected_whole_tree, rp_tuples_with_trait)
# 3) forbid CN1 and CN1_C2. This means neither trait A nor B is
# available so this is expected to not produce a match
forbidden_traits = {trait_cn1_c2, trait_cn1}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
self.ctx, rp_ids, required_traits, forbidden_traits)
self.assertEqual(set(), rp_tuples_with_trait)
# 4) forbid CN1_C1_GC1. This means neither trait C is not available.
# So (A or B) and C cannot be fulfilled.
forbidden_traits = {trait_cn1_c1_gc1}
rp_tuples_with_trait = res_ctx._get_trees_with_traits(
self.ctx, rp_ids, required_traits, forbidden_traits)
self.assertEqual(set(), rp_tuples_with_trait)
def test_get_roots_with_traits(self):
_, avx2_t, ssd_t, geneve_t, ssl_t = self._make_trees_with_traits()