required traits for no sharing providers

Adds filtering on a set of required traits to the
AllocationCandidates._get_by_filters() method, for deployments with no
shared providers that are involved in the request.

This patch should demonstrate where I'm heading with the breaking out of
the various SQL-related pieces into their own functions. The next patch
will add required traits support for scenarios with sharing providers.

Change-Id: Iac246245ef7aedfa2d23e623f83fdf384e252159
This commit is contained in:
Jay Pipes 2017-11-01 12:02:28 -04:00 committed by Balazs Gibizer
parent ea63b3b40f
commit 828d9eb516
3 changed files with 274 additions and 22 deletions

View File

@ -2427,7 +2427,36 @@ def _get_usages_by_provider_and_rc(ctx, rp_ids, rc_ids):
@db_api.api_context_manager.reader
def _get_provider_ids_matching_all(ctx, resources):
def _get_provider_ids_having_all_traits(ctx, required_traits):
"""Returns a list of resource provider internal IDs that have ALL of the
required traits.
NOTE: Don't call this method with no required_traits.
:param ctx: Session context to use
:param required_traits: A map, keyed by trait string name, of required
trait internal IDs that each provider must have
associated with it
:raise ValueError: If required_traits is empty or None.
"""
if not required_traits:
raise ValueError('required_traits must not be empty')
rptt = sa.alias(_RP_TRAIT_TBL, name="rpt")
sel = sa.select([rptt.c.resource_provider_id])
sel = sel.where(rptt.c.trait_id.in_(required_traits.values()))
sel = sel.group_by(rptt.c.resource_provider_id)
# Only get the resource providers that have ALL the required traits, so we
# need to GROUP BY the resource provider and ensure that the
# COUNT(trait_id) is equal to the number of traits we are requiring
num_traits = len(required_traits)
cond = sa.func.count(rptt.c.trait_id) == num_traits
sel = sel.having(cond)
return [r[0] for r in ctx.session.execute(sel)]
@db_api.api_context_manager.reader
def _get_provider_ids_matching_all(ctx, resources, required_traits):
"""Returns a list of resource provider internal IDs that have available
inventory to satisfy all the supplied requests for resources.
@ -2438,7 +2467,16 @@ def _get_provider_ids_matching_all(ctx, resources):
:param ctx: Session context to use
:param resources: A dict, keyed by resource class ID, of the amount
requested of that resource class.
:param required_traits: A map, keyed by trait string name, of required
trait internal IDs that each provider must have
associated with it
"""
trait_rps = None
if required_traits:
trait_rps = _get_provider_ids_having_all_traits(ctx, required_traits)
if not trait_rps:
return []
rpt = sa.alias(_RP_TBL, name="rp")
rc_name_map = {
@ -2477,6 +2515,10 @@ def _get_provider_ids_matching_all(ctx, resources):
# resources
where_conds = []
# First filter by the resource providers that had all the required traits
if trait_rps:
where_conds.append(rpt.c.id.in_(trait_rps))
# The chain of joins that we eventually pass to select_from()
join_chain = rpt
@ -2520,9 +2562,10 @@ def _get_provider_ids_matching_all(ctx, resources):
return [r[0] for r in ctx.session.execute(sel)]
def _build_provider_summaries(context, usages):
"""Given a list of dicts of usage information, returns a dict, keyed by
resource provider ID, of ProviderSummary objects.
def _build_provider_summaries(context, usages, prov_traits):
"""Given a list of dicts of usage information and a map of providers to
their associated string traits, returns a dict, keyed by resource provider
ID, of ProviderSummary objects.
:param context: nova.context.Context object
:param usages: A list of dicts with the following format:
@ -2535,6 +2578,8 @@ def _build_provider_summaries(context, usages):
'reserved': integer,
'allocation_ratio': float,
}
:param prov_traits: A dict, keyed by internal resource provider ID, of
string trait names associated with that provider
"""
# Build up a dict, keyed by internal resource provider ID, of
# ProviderSummary objects containing one or more ProviderSummaryResource
@ -2549,6 +2594,7 @@ def _build_provider_summaries(context, usages):
used = usage['used'] or 0
allocation_ratio = usage['allocation_ratio']
cap = int((usage['total'] - usage['reserved']) * allocation_ratio)
traits = prov_traits.get(rp_id) or []
summary = summaries.get(rp_id)
if not summary:
@ -2570,6 +2616,7 @@ def _build_provider_summaries(context, usages):
used=used,
)
summary.resources.append(rpsr)
summary.traits = [Trait(context, name=tname) for tname in traits]
return summaries
@ -2627,7 +2674,8 @@ def _alloc_candidates_no_shared(ctx, requested_resources, rp_ids):
"""Returns a tuple of (allocation requests, provider summaries) for a
supplied set of requested resource amounts and resource providers. The
supplied resource providers have capacity to satisfy ALL of the resources
in the requested resources.
in the requested resources as well as ALL required traits that were
requested by the user.
This is an optimized code path for the common scenario when no sharing
providers exist in the system for any requested resource. In this scenario,
@ -2647,9 +2695,13 @@ def _alloc_candidates_no_shared(ctx, requested_resources, rp_ids):
requested_rc_ids = list(requested_resources)
usages = _get_usages_by_provider_and_rc(ctx, rp_ids, requested_rc_ids)
# Get a dict, keyed by resource provider internal ID, of trait string names
# that provider has associated with it
prov_traits = _provider_traits(ctx, rp_ids)
# Get a dict, keyed by resource provider internal ID, of ProviderSummary
# objects for all providers
summaries = _build_provider_summaries(ctx, usages)
summaries = _build_provider_summaries(ctx, usages, prov_traits)
# Next, build up a list of allocation requests. These allocation requests
# are AllocationRequest objects, containing resource provider UUIDs,
@ -2707,7 +2759,8 @@ def _alloc_candidates_with_shared(ctx, requested_resources, ns_rp_ids,
# Get a dict, keyed by resource provider internal ID, of ProviderSummary
# objects for all providers involved in the request
summaries = _build_provider_summaries(ctx, usages)
# TODO(jaypipes): Handle traits for sharing providers scenario
summaries = _build_provider_summaries(ctx, usages, {})
# Next, build up a list of allocation requests. These allocation requests
# are AllocationRequest objects, containing resource provider UUIDs,
@ -2793,6 +2846,52 @@ def _alloc_candidates_with_shared(ctx, requested_resources, ns_rp_ids,
return alloc_requests, list(summaries.values())
@db_api.api_context_manager.reader
def _provider_traits(ctx, rp_ids):
"""Given a list of resource provider internal IDs, returns a dict, keyed by
those provider IDs, of string trait names associated with that provider.
:raises: ValueError when rp_ids is empty.
:param ctx: nova.context.Context object
:param rp_ids: list of resource provider IDs
"""
if not rp_ids:
raise ValueError(_("Expected rp_ids to be a list of resource provider "
"internal IDs, but got an empty list."))
rptt = sa.alias(_RP_TRAIT_TBL, name='rptt')
tt = sa.alias(_TRAIT_TBL, name='t')
j = sa.join(rptt, tt, rptt.c.trait_id == tt.c.id)
sel = sa.select([rptt.c.resource_provider_id, tt.c.name]).select_from(j)
sel = sel.where(rptt.c.resource_provider_id.in_(rp_ids))
res = collections.defaultdict(list)
for r in ctx.session.execute(sel):
res[r[0]].append(r[1])
return res
@db_api.api_context_manager.reader
def _trait_ids_from_names(ctx, names):
"""Given a list of string trait names, returns a dict, keyed by those
string names, of the corresponding internal integer trait ID.
:raises: ValueError when names is empty.
:param ctx: nova.context.Context object
:param names: list of string trait names
"""
if not names:
raise ValueError(_("Expected names to be a list of string trait "
"names, but got an empty list."))
# Avoid SAWarnings about unicode types...
unames = map(six.text_type, names)
tt = sa.alias(_TRAIT_TBL, name='t')
sel = sa.select([tt.c.name, tt.c.id]).where(tt.c.name.in_(unames))
return {r[0]: r[1] for r in ctx.session.execute(sel)}
@base.NovaObjectRegistry.register_if(False)
class AllocationCandidates(base.NovaObject):
"""The AllocationCandidates object is a collection of possible allocations
@ -2821,6 +2920,7 @@ class AllocationCandidates(base.NovaObject):
:param requests: List of nova.api.openstack.placement.util.RequestGroup
"""
_ensure_rc_cache(context)
_ensure_trait_sync(context)
alloc_reqs, provider_summaries = cls._get_by_requests(context,
requests)
return cls(
@ -2835,22 +2935,31 @@ class AllocationCandidates(base.NovaObject):
# We first get the list of "root providers" that either have the
# requested resources or are associated with the providers that
# share one or more of the requested resource(s)
# TODO(efried): Handle traits; handle non-sharing groups.
# For now, this extracts just the data expected by 1.10 - no API change
resources = [request_group.resources for request_group in requests
if not request_group.use_same_provider]
if len(resources) != 1:
# TODO(efried): Handle non-sharing groups.
# For now, this extracts just the sharing group's resources & traits.
sharing_groups = [request_group for request_group in requests
if not request_group.use_same_provider]
if len(sharing_groups) != 1 or not sharing_groups[0].resources:
raise ValueError(_("The requests parameter must contain one "
"RequestGroup with use_same_provider=False and "
"nonempty resources."))
resources = resources[0]
# Transform resource string names to internal integer IDs
resources = {
_RC_CACHE.id_from_string(key): value
for key, value in resources.items()
for key, value in sharing_groups[0].resources.items()
}
traits = sharing_groups[0].required_traits
# maps the trait name to the trait internal ID
trait_map = {}
if traits:
trait_map = _trait_ids_from_names(context, traits)
# Double-check that we found a trait ID for each requested name
if len(trait_map) != len(traits):
missing = traits - set(trait_map)
raise ValueError(_("Unknown traits requested: %s") % missing)
# Contains a set of resource provider IDs that share some inventory for
# each resource class requested. We do this here as an optimization. If
# we have no sharing providers, the SQL to find matching providers for
@ -2873,7 +2982,8 @@ class AllocationCandidates(base.NovaObject):
# add new code paths or modify this code path to return root
# provider IDs of provider trees instead of the resource provider
# IDs.
rp_ids = _get_provider_ids_matching_all(context, resources)
rp_ids = _get_provider_ids_matching_all(context, resources,
trait_map)
return _alloc_candidates_no_shared(context, resources, rp_ids)
# rp_ids contains a list of resource provider IDs that EITHER have all

View File

@ -9,6 +9,7 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os_traits
from oslo_utils import uuidutils
from nova.api.openstack.placement import lib as placement_lib
@ -74,12 +75,12 @@ def _find_summary_for_resource(p_sum, rc_name):
return resource
class ProviderDBHelperTestCase(test.NoDBTestCase):
class ProviderDBBase(test.NoDBTestCase):
USES_DB_SELF = True
def setUp(self):
super(ProviderDBHelperTestCase, self).setUp()
super(ProviderDBBase, self).setUp()
self.useFixture(fixtures.Database())
self.api_db = self.useFixture(fixtures.Database(database='api'))
self.ctx = context.RequestContext('fake-user', 'fake-project')
@ -101,6 +102,9 @@ class ProviderDBHelperTestCase(test.NoDBTestCase):
self.rp_uuid_to_name[rp.uuid] = name
return rp
class ProviderDBHelperTestCase(ProviderDBBase):
def test_get_provider_ids_matching_all(self):
# These RPs are named based on whether we expect them to be 'incl'uded
# or 'excl'uded in the result.
@ -233,15 +237,86 @@ class ProviderDBHelperTestCase(test.NoDBTestCase):
}
# Run it!
res = rp_obj._get_provider_ids_matching_all(self.ctx, resources)
res = rp_obj._get_provider_ids_matching_all(self.ctx, resources, {})
# We should get all the incl_* RPs
expected = [incl_biginv_noalloc, incl_extra_full]
self.assertEqual(set(rp.id for rp in expected), set(res))
# Now request that the providers must have a set of required traits and
# that this results in no results returned, since we haven't yet
# associated any traits with the providers
avx2_t = rp_obj.Trait.get_by_name(self.ctx, os_traits.HW_CPU_X86_AVX2)
# _get_provider_ids_matching_all()'s required_traits argument is a map,
# keyed by trait name, of the trait internal ID
req_traits = {os_traits.HW_CPU_X86_AVX2: avx2_t.id}
res = rp_obj._get_provider_ids_matching_all(self.ctx, resources,
req_traits)
class AllocationCandidatesTestCase(ProviderDBHelperTestCase):
self.assertEqual([], res)
# OK, now add the trait to one of the providers and verify that
# provider now shows up in our results
incl_biginv_noalloc.set_traits([avx2_t])
res = rp_obj._get_provider_ids_matching_all(self.ctx, resources,
req_traits)
self.assertEqual([incl_biginv_noalloc.id], res)
def test_get_provider_ids_having_all_traits(self):
def run(traitnames, expected_ids):
tmap = {}
if traitnames:
tmap = rp_obj._trait_ids_from_names(self.ctx, traitnames)
obs = rp_obj._get_provider_ids_having_all_traits(self.ctx, tmap)
self.assertEqual(sorted(expected_ids), sorted(obs))
# No traits. This will never be returned, because it's illegal to
# invoke the method with no traits.
self._create_provider('one')
# One trait
rp2 = self._create_provider('two')
_set_traits(rp2, 'HW_CPU_X86_TBM')
# One the same as rp2
rp3 = self._create_provider('three')
_set_traits(rp3, 'HW_CPU_X86_TBM', 'HW_CPU_X86_TSX', 'HW_CPU_X86_SGX')
# Disjoint
rp4 = self._create_provider('four')
_set_traits(rp4, 'HW_CPU_X86_SSE2', 'HW_CPU_X86_SSE3', 'CUSTOM_FOO')
# Request with no traits not allowed
self.assertRaises(
ValueError,
rp_obj._get_provider_ids_having_all_traits, self.ctx, None)
self.assertRaises(
ValueError,
rp_obj._get_provider_ids_having_all_traits, self.ctx, {})
# Common trait returns both RPs having it
run(['HW_CPU_X86_TBM'], [rp2.id, rp3.id])
# Just the one
run(['HW_CPU_X86_TSX'], [rp3.id])
run(['HW_CPU_X86_TSX', 'HW_CPU_X86_SGX'], [rp3.id])
run(['CUSTOM_FOO'], [rp4.id])
# Including the common one still just gets me rp3
run(['HW_CPU_X86_TBM', 'HW_CPU_X86_SGX'], [rp3.id])
run(['HW_CPU_X86_TBM', 'HW_CPU_X86_TSX', 'HW_CPU_X86_SGX'], [rp3.id])
# Can't be satisfied
run(['HW_CPU_X86_TBM', 'HW_CPU_X86_TSX', 'CUSTOM_FOO'], [])
run(['HW_CPU_X86_TBM', 'HW_CPU_X86_TSX', 'HW_CPU_X86_SGX',
'CUSTOM_FOO'], [])
run(['HW_CPU_X86_SGX', 'HW_CPU_X86_SSE3'], [])
run(['HW_CPU_X86_TBM', 'CUSTOM_FOO'], [])
run(['HW_CPU_X86_BMI'], [])
rp_obj.Trait(self.ctx, name='CUSTOM_BAR').create()
run(['CUSTOM_BAR'], [])
class AllocationCandidatesTestCase(ProviderDBBase):
"""Tests a variety of scenarios with both shared and non-shared resource
providers that the AllocationCandidates.get_by_requests() method returns a
set of alternative allocation requests and provider summaries that may be
@ -303,6 +378,22 @@ class AllocationCandidatesTestCase(ProviderDBHelperTestCase):
# Now we ought to be able to compare them
self.assertEqual(expected, observed)
def test_no_resources_in_first_request_group(self):
requests = [placement_lib.RequestGroup(use_same_provider=False,
resources={})]
self.assertRaises(ValueError,
rp_obj.AllocationCandidates.get_by_requests,
self.ctx, requests)
def test_unknown_traits(self):
missing = set(['UNKNOWN_TRAIT'])
requests = [placement_lib.RequestGroup(
use_same_provider=False, resources=self.requested_resources,
required_traits=missing)]
self.assertRaises(ValueError,
rp_obj.AllocationCandidates.get_by_requests,
self.ctx, requests)
def test_all_local(self):
"""Create some resource providers that can satisfy the request for
resources with local (non-shared) resources and verify that the
@ -310,13 +401,14 @@ class AllocationCandidatesTestCase(ProviderDBHelperTestCase):
each of these resource providers.
"""
# Create three compute node providers with VCPU, RAM and local disk
for name in ('cn1', 'cn2', 'cn3'):
cn = self._create_provider(name)
cn1, cn2, cn3 = (self._create_provider(name)
for name in ('cn1', 'cn2', 'cn3'))
for cn in (cn1, cn2, cn3):
_add_inventory(cn, fields.ResourceClass.VCPU, 24,
allocation_ratio=16.0)
_add_inventory(cn, fields.ResourceClass.MEMORY_MB, 32768,
min_unit=64, step_size=64, allocation_ratio=1.5)
total_gb = 1000 if name == 'cn3' else 2000
total_gb = 1000 if cn.name == 'cn3' else 2000
_add_inventory(cn, fields.ResourceClass.DISK_GB, total_gb,
reserved=100, min_unit=10, step_size=10,
allocation_ratio=1.0)
@ -374,6 +466,48 @@ class AllocationCandidatesTestCase(ProviderDBHelperTestCase):
]
self._validate_allocation_requests(expected, alloc_cands)
# Now let's add traits into the mix. Currently, none of the compute
# nodes has the AVX2 trait associated with it, so we should get 0
# results if we required AVX2
alloc_cands = rp_obj.AllocationCandidates.get_by_requests(
self.ctx,
requests=[placement_lib.RequestGroup(
use_same_provider=False,
resources=self.requested_resources,
required_traits=set([os_traits.HW_CPU_X86_AVX2])
)],
)
self._validate_allocation_requests([], alloc_cands)
# If we then associate the AVX2 trait to just compute node 2, we should
# get back just that compute node in the provider summaries
_set_traits(cn2, 'HW_CPU_X86_AVX2')
alloc_cands = rp_obj.AllocationCandidates.get_by_requests(
self.ctx,
requests=[placement_lib.RequestGroup(
use_same_provider=False,
resources=self.requested_resources,
required_traits=set([os_traits.HW_CPU_X86_AVX2])
)],
)
# Only cn2 should be in our allocation requests now since that's the
# only one with the required trait
expected = [
[('cn2', fields.ResourceClass.VCPU, 1),
('cn2', fields.ResourceClass.MEMORY_MB, 64),
('cn2', fields.ResourceClass.DISK_GB, 1500)],
]
self._validate_allocation_requests(expected, alloc_cands)
p_sums = alloc_cands.provider_summaries
self.assertEqual(1, len(p_sums))
# And let's verify the provider summary shows the trait
cn2_p_sum = _find_summary_for_provider(p_sums, cn2.uuid)
self.assertIsNotNone(cn2_p_sum)
self.assertEqual(1, len(cn2_p_sum.traits))
self.assertEqual(os_traits.HW_CPU_X86_AVX2, cn2_p_sum.traits[0].name)
def test_local_with_shared_disk(self):
"""Create some resource providers that can satisfy the request for
resources with local VCPU and MEMORY_MB but rely on a shared storage

View File

@ -74,6 +74,14 @@ class ResourceProviderBaseCase(test.NoDBTestCase):
class ResourceProviderTestCase(ResourceProviderBaseCase):
"""Test resource-provider objects' lifecycles."""
def test_provider_traits_empty_param(self):
self.assertRaises(ValueError, rp_obj._provider_traits,
self.ctx, [])
def test_trait_ids_from_names_empty_param(self):
self.assertRaises(ValueError, rp_obj._trait_ids_from_names,
self.ctx, [])
def test_create_resource_provider_requires_uuid(self):
resource_provider = rp_obj.ResourceProvider(
context = self.ctx)