Merge "placement: resource requests for nested providers"

This commit is contained in:
Zuul 2018-04-27 14:09:55 +00:00 committed by Gerrit Code Review
commit 5ffa4be6a6
2 changed files with 324 additions and 136 deletions

View File

@ -2821,12 +2821,12 @@ def _has_provider_trees(ctx):
@db_api.api_context_manager.reader
def _get_provider_ids_matching(ctx, resources, required_traits,
forbidden_traits, member_of=None):
"""Returns a list of resource provider internal IDs that have available
inventory to satisfy all the supplied requests for resources.
"""Returns a list of tuples of (internal provider ID, root provider ID)
that have available inventory to satisfy all the supplied requests for
resources.
:note: This function is used for scenarios that do NOT involve sharing
providers. It also only looks at individual resource providers, not
provider trees.
providers.
:param ctx: Session context to use
:param resources: A dict, keyed by resource class ID, of the amount
@ -2883,7 +2883,7 @@ def _get_provider_ids_matching(ctx, resources, required_traits,
for rc_id in resources
}
sel = sa.select([rpt.c.id])
sel = sa.select([rpt.c.id, rpt.c.root_provider_id])
# List of the WHERE conditions we build up by iterating over the requested
# resources
@ -2947,7 +2947,7 @@ def _get_provider_ids_matching(ctx, resources, required_traits,
sel = sel.select_from(join_chain)
sel = sel.where(sa.and_(*where_conds))
return [r[0] for r in ctx.session.execute(sel)]
return [(r[0], r[1]) for r in ctx.session.execute(sel)]
@db_api.api_context_manager.reader
@ -2976,10 +2976,86 @@ def _provider_aggregates(ctx, rp_ids):
@db_api.api_context_manager.reader
def _get_trees_matching_all_resources(ctx, resources):
"""Returns a list of root provider internal IDs for provider trees where
the nodes in the tree collectively have available inventory to satisfy all
the supplied requests for resources.
def _get_providers_with_resource(ctx, rc_id, amount):
"""Returns a set of tuples of (provider ID, root provider ID) of providers
that satisfy the request for a single resource class.
:param ctx: Session context to use
:param rc_id: Internal ID of resource class to check inventory for
:param amount: Amount of resource being requested
"""
# SELECT rp.id, rp.root_provider_id
# FROM resource_providers AS rp
# JOIN inventories AS inv
# ON rp.id = inv.resource_provider_id
# AND inv.resource_class_id = $RC_ID
# LEFT JOIN (
# SELECT
# alloc.resource_provider_id,
# SUM(allocs.used) AS used
# FROM allocations AS alloc
# WHERE allocs.resource_class_id = $RC_ID
# GROUP BY allocs.resource_provider_id
# ) AS usage
# ON inv.resource_provider_id = usage.resource_provider_id
# WHERE
# used + $AMOUNT <= ((total - reserved) * inv.allocation_ratio)
# AND inv.min_unit <= $AMOUNT
# AND inv.max_unit >= $AMOUNT
# AND $AMOUNT % inv.step_size == 0
rpt = sa.alias(_RP_TBL, name="rp")
inv = sa.alias(_INV_TBL, name="inv")
allocs = sa.alias(_ALLOC_TBL, name="alloc")
usage = sa.select([
allocs.c.resource_provider_id,
sql.func.sum(allocs.c.used).label('used')])
usage = usage.where(allocs.c.resource_class_id == rc_id)
usage = usage.group_by(allocs.c.resource_provider_id)
usage = sa.alias(usage, name="usage")
where_conds = [
sql.func.coalesce(usage.c.used, 0) + amount <= (
(inv.c.total - inv.c.reserved) * inv.c.allocation_ratio),
inv.c.min_unit <= amount,
inv.c.max_unit >= amount,
amount % inv.c.step_size == 0,
]
rp_to_inv = sa.join(
rpt, inv, sa.and_(
rpt.c.id == inv.c.resource_provider_id,
inv.c.resource_class_id == rc_id))
inv_to_usage = sa.outerjoin(
rp_to_inv, usage,
inv.c.resource_provider_id == usage.c.resource_provider_id)
sel = sa.select([rpt.c.id, rpt.c.root_provider_id])
sel = sel.select_from(inv_to_usage)
sel = sel.where(sa.and_(*where_conds))
res = ctx.session.execute(sel).fetchall()
res = set((r[0], r[1]) for r in res)
return res
@db_api.api_context_manager.reader
def _get_trees_matching_all(ctx, resources, required_traits, forbidden_traits,
member_of):
"""Returns a list of two-tuples (provider internal ID, root provider
internal ID) for providers that satisfy the request for resources.
If traits are also required, this function only returns results where the
set of providers within a tree that satisfy the resource request
collectively have all the required traits associated with them. This means
that given the following provider tree:
cn1
|
--> pf1 (SRIOV_NET_VF:2)
|
--> pf2 (SRIOV_NET_VF:1, HW_NIC_OFFLOAD_GENEVE)
If a user requests 1 SRIOV_NET_VF resource and no required traits will
return both pf1 and pf2. However, a request for 2 SRIOV_NET_VF and required
trait of HW_NIC_OFFLOAD_GENEVE will return no results (since pf1 is the
only provider with enough inventory of SRIOV_NET_VF but it does not have
the required HW_NIC_OFFLOAD_GENEVE trait).
:note: This function is used for scenarios that do NOT involve sharing
providers AND where there are nested providers present in the deployment.
@ -2987,108 +3063,125 @@ def _get_trees_matching_all_resources(ctx, resources):
:param ctx: Session context to use
:param resources: A dict, keyed by resource class ID, of the amount
requested of that resource class.
:param required_traits: A map, keyed by trait string name, of required
trait internal IDs that each provider TREE must
COLLECTIVELY have associated with it
:param forbidden_traits: A map, keyed by trait string name, of trait
internal IDs that a resource provider must
not have.
:param member_of: An optional list of aggregate UUIDs. If provided, the
allocation_candidates returned will only be for resource
providers that are members of one or more of the supplied
aggregates.
"""
# Imagine a request group that contains a request for the following
# resources:
#
# * VCPU: 2
# * MEMORY_MB: 2048
# * SRIOV_NET_VF: 1
#
# The SQL we want to produce looks like this:
#
# SELECT rp.root_provider_id
# FROM resource_providers AS rp
# JOIN inventories AS inv
# ON rp.id = inv.resource_provider_id
# LEFT JOIN (
# SELECT resource_provider_id, resource_class_id, SUM(used) AS used
# FROM allocations
# WHERE resource_class_id IN ($RESOURCES)
# GROUP BY resource_provider_id, resource_class_id
# ) AS usages
# ON inv.resource_provider_id = usages.resource_provider_id
# AND inv.resource_class_id = usages.resource_class_id
# WHERE inv.resource_class_id IN ($RESOURCES) AND
# (
# inv.resource_class_id = $VCPU
# AND (((inv.total - inv.reserved) * inv.allocation_ratio) <
# (COALESCE(usage.used, 0) + $VCPU_REQUESTED))
# AND inv.min_unit >= $VCPU_REQUESTED
# AND inv.max_unit <= $VCPU_REQUESTED
# AND inv.step_size % $VCPU_REQUESTED = 0
# ) OR (
# inv.resource_class_id = $RAM
# AND (((inv.total - inv.reserved) * inv.allocation_ratio) <
# (COALESCE(usage.used, 0) + $RAM_REQUESTED))
# AND inv.min_unit >= $RAM_REQUESTED
# AND inv.max_unit <= $RAM_REQUESTED
# AND inv.step_size % $RAM_REQUESTED = 0
# ) OR (
# inv.resource_class_id = $SRIOV_NET_VF
# AND (((inv.total - inv.reserved) * inv.allocation_ratio) <
# (COALESCE(usage.used, 0) + $VF_REQUESTED))
# AND inv.min_unit >= $VF_REQUESTED
# AND inv.max_unit <= $VF_REQUESTED
# AND inv.step_size % $VF_REQUESTED = 0
# )
# GROUP BY rp.root_provider_id
# HAVING COUNT(DISTINCT inv.resource_class_id) = 3;
rpt = sa.alias(_RP_TBL, name="rp")
inv = sa.alias(_INV_TBL, name="inv")
# We first grab the provider trees that have nodes that meet the request
# for each resource class. Once we have this information, we'll then do a
# followup query to winnow the set of resource providers to only those
# provider *trees* that have all of the required traits.
provs_with_inv = set()
# provs_with_inv is a list of two-tuples with the second element being the
# root provider ID. Get the list of root provider IDs and get all trees
# that collectively have all required traits
trees_with_inv = set()
# Derived table containing usage numbers for all resource providers for
# each resource class involved in the request
usages = sa.alias(
sa.select([
_ALLOC_TBL.c.resource_provider_id,
_ALLOC_TBL.c.resource_class_id,
sql.func.sum(_ALLOC_TBL.c.used).label('used'),
]).where(
_ALLOC_TBL.c.resource_class_id.in_(resources),
).group_by(
_ALLOC_TBL.c.resource_provider_id,
_ALLOC_TBL.c.resource_class_id
),
name='usage',
)
sel = sa.select([rpt.c.root_provider_id])
rp_inv_join = sa.join(rpt, inv, rpt.c.id == inv.c.resource_provider_id)
rp_inv_usage_join = sa.outerjoin(
rp_inv_join, usages,
sa.and_(
inv.c.resource_provider_id ==
usages.c.resource_provider_id,
inv.c.resource_class_id ==
usages.c.resource_class_id,
))
usage_conds = []
for rc_id, amount in resources.items():
usage_cond = sa.and_(
inv.c.resource_class_id == rc_id,
(
(sql.func.coalesce(usages.c.used, 0) + amount) <=
(inv.c.total - inv.c.reserved) * inv.c.allocation_ratio
),
inv.c.min_unit <= amount,
inv.c.max_unit >= amount,
amount % inv.c.step_size == 0,
)
usage_conds.append(usage_cond)
rc_provs_with_inv = _get_providers_with_resource(ctx, rc_id, amount)
if not rc_provs_with_inv:
# If there's no providers that have one of the resource classes,
# then we can short-circuit
return []
rc_trees = set(p[1] for p in rc_provs_with_inv)
provs_with_inv |= rc_provs_with_inv
if trees_with_inv:
trees_with_inv &= rc_trees
if not trees_with_inv:
return []
else:
trees_with_inv = rc_trees
sel = sel.select_from(rp_inv_usage_join)
sel = sel.where(
sa.and_(inv.c.resource_class_id.in_(resources),
sa.or_(*usage_conds)))
sel = sel.group_by(rpt.c.root_provider_id)
sel = sel.having(
sql.func.count(
sql.func.distinct(inv.c.resource_class_id)) == len(resources))
# Select only those tuples where there are providers for all requested
# resource classes (trees_with_inv contains the root provider IDs of those
# trees that contain all our requested resources)
provs_with_inv = set(p for p in provs_with_inv if p[1] in trees_with_inv)
return [r[0] for r in ctx.session.execute(sel)]
if not provs_with_inv:
return []
# TODO(jaypipes): Handle filtering on member_of parameter
if not required_traits and not forbidden_traits:
# If there were no traits required, there's no difference in how we
# calculate allocation requests between nested and non-nested
# environments, so just short-circuit and return
return list(provs_with_inv)
# Return the providers where the providers have the available inventory
# capacity and that set of providers (grouped by their tree) have all
# of the required traits and none of the forbidden traits
# We now want to restrict the returned providers to only those provider
# trees that have all our required traits.
#
# The SQL we want looks like this:
#
# SELECT outer_rp.id, outer_rp.root_provider_id
# FROM resource_providers AS outer_rp
# JOIN (
# SELECT rp.root_provider_id
# FROM resource_providers AS rp
# JOIN resource_provider_traits AS rptt
# ON rp.id = rptt.resource_provider_id
# WHERE rp.id IN ($RP_IDS_WITH_INV)
# AND rptt.trait_id IN ($REQUIRED_TRAIT_IDS)
# GROUP BY rp.root_provider_id
# HAVING COUNT(DISTINCT rptt.trait_id) == $NUM_REQUIRED_TRAITS
# ) AS trees_with_traits
# ON outer_rp.root_provider_id = trees_with_traits.root_provider_id
rp_ids_with_inv = set(p[0] for p in provs_with_inv)
# Build our inner subquery
rpt = sa.alias(_RP_TBL, name="rp")
rptt = sa.alias(_RP_TRAIT_TBL, name="rptt")
rpt_to_rptt = sa.join(
rpt, rptt, rpt.c.id == rptt.c.resource_provider_id)
subq = sa.select([rpt.c.root_provider_id])
subq = subq.select_from(rpt_to_rptt)
subq = subq.where(
sa.and_(
rpt.c.id.in_(rp_ids_with_inv),
rptt.c.trait_id.in_(required_traits.values())))
# Tack on an additional WHERE clause for the derived table if we've got
# forbidden traits in the mix.
# TODO(jaypipes): This approach is not efficient. We could potentially
# change _get_provider_ids_having_any_trait() to accept an optional rp_ids
# parameter that would further winnow results to a set of resource provider
# IDs (which we have here as we've already looked up the providers that
# have appropriate inventory capacity)
if forbidden_traits:
forbidden_rp_ids = _get_provider_ids_having_any_trait(
ctx, forbidden_traits)
subq = subq.where(~rpt.c.id.in_(forbidden_rp_ids))
subq = subq.group_by(rpt.c.root_provider_id)
# Only get the resource providers that have ALL the required traits, so we
# need to GROUP BY the resource provider and ensure that the
# COUNT(trait_id) is equal to the number of traits we are requiring
num_traits = len(required_traits)
having_cond = sa.func.count(sa.distinct(rptt.c.trait_id)) == num_traits
subq = subq.having(having_cond)
trees_with_traits = sa.alias(subq, name="trees_with_traits")
outer_rps = sa.alias(_RP_TBL, name="outer_rps")
outer_to_subq = sa.join(
outer_rps, trees_with_traits,
outer_rps.c.root_provider_id == trees_with_traits.c.root_provider_id)
sel = sa.select([outer_rps.c.id, outer_rps.c.root_provider_id])
sel = sel.select_from(outer_to_subq)
res = ctx.session.execute(sel).fetchall()
return [(rp_id, root_id) for rp_id, root_id in res]
def _build_provider_summaries(context, usages, prov_traits):
@ -3219,7 +3312,7 @@ def _allocation_request_for_provider(ctx, requested_resources, rp_uuid):
return AllocationRequest(ctx, resource_requests=resource_requests)
def _alloc_candidates_no_shared(ctx, requested_resources, rp_ids):
def _alloc_candidates_no_shared(ctx, requested_resources, rps):
"""Returns a tuple of (allocation requests, provider summaries) for a
supplied set of requested resource amounts and resource providers. The
supplied resource providers have capacity to satisfy ALL of the resources
@ -3235,13 +3328,14 @@ def _alloc_candidates_no_shared(ctx, requested_resources, rp_ids):
:param ctx: nova.context.RequestContext object
:param requested_resources: dict, keyed by resource class ID, of amounts
being requested for that resource class
:param rp_ids: List of resource provider IDs for providers that matched the
requested resources
:param rps: List of two-tuples of (provider ID, root provider ID)s for
providers that matched the requested resources
"""
if not rp_ids:
if not rps:
return [], []
# Grab usage summaries for each provider and resource class requested
requested_rc_ids = list(requested_resources)
rp_ids = set(p[0] for p in rps)
usages = _get_usages_by_provider_and_rc(ctx, rp_ids, requested_rc_ids)
# Get a dict, keyed by resource provider internal ID, of trait string names

View File

@ -11,6 +11,7 @@
# under the License.
import os_traits
from oslo_utils import uuidutils
import six
import sqlalchemy as sa
from nova.api.openstack.placement import exception
@ -229,7 +230,7 @@ class ProviderDBHelperTestCase(ProviderDBBase):
# We should get all the incl_* RPs
expected = [incl_biginv_noalloc, incl_extra_full]
self.assertEqual(set(rp.id for rp in expected), set(res))
self.assertEqual(set((rp.id, rp.id) for rp in expected), set(res))
# Now request that the providers must have a set of required traits and
# that this results in no results returned, since we haven't yet
@ -249,7 +250,8 @@ class ProviderDBHelperTestCase(ProviderDBBase):
res = rp_obj._get_provider_ids_matching(self.ctx, resources,
req_traits, {})
self.assertEqual([incl_biginv_noalloc.id], res)
rp_ids = [r[0] for r in res]
self.assertEqual([incl_biginv_noalloc.id], rp_ids)
def test_get_provider_ids_having_all_traits(self):
def run(traitnames, expected_ids):
@ -1809,18 +1811,39 @@ class AllocationCandidatesTestCase(ProviderDBBase):
"""Utility function to look up root provider IDs from a set of supplied
provider names directly from the API DB.
"""
names = map(six.text_type, names)
sel = sa.select([rp_obj._RP_TBL.c.root_provider_id])
sel = sel.where(rp_obj._RP_TBL.c.name.in_(names))
with self.api_db.get_engine().connect() as conn:
cn_root_ids = set([r[0] for r in conn.execute(sel)])
return cn_root_ids
def test_trees_matching_all_resources(self):
def test_trees_matching_all(self):
"""Creates a few provider trees having different inventories and
allocations and tests the _get_trees_matching_all_resources() utility
function to ensure that only the root provider IDs of matching provider
trees are returned.
"""
# NOTE(jaypipes): _get_trees_matching_all() expects a dict of resource
# class internal identifiers, not string names
resources = {
fields.ResourceClass.STANDARD.index(
fields.ResourceClass.VCPU): 2,
fields.ResourceClass.STANDARD.index(
fields.ResourceClass.MEMORY_MB): 256,
fields.ResourceClass.STANDARD.index(
fields.ResourceClass.SRIOV_NET_VF): 1,
}
req_traits = {}
forbidden_traits = {}
member_of = []
# Before we even set up any providers, verify that the short-circuits
# work to return empty lists
trees = rp_obj._get_trees_matching_all(
self.ctx, resources, req_traits, forbidden_traits, member_of)
self.assertEqual([], trees)
# We are setting up 3 trees of providers that look like this:
#
# compute node (cn)
@ -1851,25 +1874,22 @@ class AllocationCandidatesTestCase(ProviderDBBase):
name = 'cn' + x + '_numa1_pf1'
pf1 = self._create_provider(name, parent=numa_cell1.uuid)
_add_inventory(pf1, fields.ResourceClass.SRIOV_NET_VF, 8)
# Mark only the second PF on the third compute node as having
# GENEVE offload enabled
if x == '3':
_set_traits(pf1, os_traits.HW_NIC_OFFLOAD_GENEVE)
# Doesn't really make a whole lot of logical sense, but allows
# us to test situations where the same trait is associated with
# multiple providers in the same tree and one of the providers
# has inventory we will use...
_set_traits(cn, os_traits.HW_NIC_OFFLOAD_GENEVE)
# NOTE(jaypipes): _get_trees_matching_all() expects a dict of resource
# class internal identifiers, not string names
resources = {
fields.ResourceClass.STANDARD.index(
fields.ResourceClass.VCPU): 2,
fields.ResourceClass.STANDARD.index(
fields.ResourceClass.MEMORY_MB): 256,
fields.ResourceClass.STANDARD.index(
fields.ResourceClass.SRIOV_NET_VF): 1,
}
trees = rp_obj._get_trees_matching_all_resources(self.ctx, resources)
self.assertEqual(3, len(trees))
# The returned results are the internal root_provider_id values of the
# three compute node providers. Grab those values using a manual query
# to double-check the results of _get_trees_matching_all()
cn_root_ids = self._get_root_ids_matching_names(cn_names)
self.assertEqual(cn_root_ids, set(trees))
trees = rp_obj._get_trees_matching_all(
self.ctx, resources, req_traits, forbidden_traits, member_of)
# trees is a list of two-tuples of (provider ID, root provider ID)
tree_root_ids = set(p[1] for p in trees)
expect_root_ids = self._get_root_ids_matching_names(cn_names)
self.assertEqual(expect_root_ids, tree_root_ids)
# OK, now consume all the VFs in the second compute node and verify
# only the first and third computes are returned as root providers from
@ -1882,11 +1902,85 @@ class AllocationCandidatesTestCase(ProviderDBBase):
uuids.cn2_numa1_pf1)
_allocate_from_provider(cn2_pf1, fields.ResourceClass.SRIOV_NET_VF, 8)
trees = rp_obj._get_trees_matching_all_resources(self.ctx, resources)
self.assertEqual(2, len(trees))
trees = rp_obj._get_trees_matching_all(
self.ctx, resources, req_traits, forbidden_traits, member_of)
tree_root_ids = set(p[1] for p in trees)
self.assertEqual(2, len(tree_root_ids))
# cn2 had all its VFs consumed, so we should only get cn1 and cn3's IDs
# as the root provider IDs.
cn_names = ['cn1', 'cn3']
cn_root_ids = self._get_root_ids_matching_names(cn_names)
self.assertEqual(cn_root_ids, set(trees))
expect_root_ids = self._get_root_ids_matching_names(cn_names)
self.assertEqual(expect_root_ids, set(tree_root_ids))
# OK, now we're going to add a required trait to the mix. The only
# provider that is decorated with the HW_NIC_OFFLOAD_GENEVE trait is
# the second physical function on the third compute host. So we should
# only get the third compute node back if we require that trait
geneve_t = rp_obj.Trait.get_by_name(
self.ctx, os_traits.HW_NIC_OFFLOAD_GENEVE)
# required_traits parameter is a dict of trait name to internal ID
req_traits = {
geneve_t.name: geneve_t.id,
}
trees = rp_obj._get_trees_matching_all(
self.ctx, resources, req_traits, forbidden_traits, member_of)
tree_root_ids = set(p[1] for p in trees)
self.assertEqual(1, len(tree_root_ids))
cn_names = ['cn3']
expect_root_ids = self._get_root_ids_matching_names(cn_names)
self.assertEqual(expect_root_ids, set(tree_root_ids))
# Add in a required trait that no provider has associated with it and
# verify that there are no returned allocation candidates
avx2_t = rp_obj.Trait.get_by_name(
self.ctx, os_traits.HW_CPU_X86_AVX2)
# required_traits parameter is a dict of trait name to internal ID
req_traits = {
geneve_t.name: geneve_t.id,
avx2_t.name: avx2_t.id,
}
trees = rp_obj._get_trees_matching_all(
self.ctx, resources, req_traits, forbidden_traits, member_of)
tree_root_ids = set(p[1] for p in trees)
self.assertEqual(0, len(tree_root_ids))
# If we add the AVX2 trait as forbidden, not required, then we
# should get back the original cn3
req_traits = {
geneve_t.name: geneve_t.id,
}
forbidden_traits = {
avx2_t.name: avx2_t.id,
}
trees = rp_obj._get_trees_matching_all(
self.ctx, resources, req_traits, forbidden_traits, member_of)
tree_root_ids = set(p[1] for p in trees)
self.assertEqual(1, len(tree_root_ids))
cn_names = ['cn3']
expect_root_ids = self._get_root_ids_matching_names(cn_names)
self.assertEqual(expect_root_ids, set(tree_root_ids))
# Consume all the VFs in first and third compute nodes and verify
# no more providers are returned
cn1_pf0 = rp_obj.ResourceProvider.get_by_uuid(self.ctx,
uuids.cn1_numa0_pf0)
_allocate_from_provider(cn1_pf0, fields.ResourceClass.SRIOV_NET_VF, 8)
cn1_pf1 = rp_obj.ResourceProvider.get_by_uuid(self.ctx,
uuids.cn1_numa1_pf1)
_allocate_from_provider(cn1_pf1, fields.ResourceClass.SRIOV_NET_VF, 8)
cn3_pf0 = rp_obj.ResourceProvider.get_by_uuid(self.ctx,
uuids.cn3_numa0_pf0)
_allocate_from_provider(cn3_pf0, fields.ResourceClass.SRIOV_NET_VF, 8)
cn3_pf1 = rp_obj.ResourceProvider.get_by_uuid(self.ctx,
uuids.cn3_numa1_pf1)
_allocate_from_provider(cn3_pf1, fields.ResourceClass.SRIOV_NET_VF, 8)
trees = rp_obj._get_trees_matching_all(
self.ctx, resources, req_traits, forbidden_traits, member_of)
self.assertEqual([], trees)