Merge "Add a global limit on the number of allocation candidates"
This commit is contained in:
commit
7d195137b6
@ -26,11 +26,14 @@ placement_opts = [
|
||||
default=False,
|
||||
help="""
|
||||
If True, when limiting allocation candidate results, the results will be
|
||||
a random sampling of the full result set. If False, allocation candidates
|
||||
are returned in a deterministic but undefined order. That is, all things
|
||||
being equal, two requests for allocation candidates will return the same
|
||||
results in the same order; but no guarantees are made as to how that order
|
||||
is determined.
|
||||
a random sampling of the full result set. The
|
||||
[placement]max_allocation_candidates config might limit the size of the full
|
||||
set used as the input of the sampling.
|
||||
|
||||
If False, allocation candidates are returned in a deterministic but undefined
|
||||
order. That is, all things being equal, two requests for allocation candidates
|
||||
will return the same results in the same order; but no guarantees are made as
|
||||
to how that order is determined.
|
||||
"""),
|
||||
cfg.StrOpt(
|
||||
'incomplete_consumer_project_id',
|
||||
@ -59,6 +62,28 @@ doesn't provide.
|
||||
The number of times to retry, server-side, writing allocations when there is
|
||||
a resource provider generation conflict. Raising this value may be useful
|
||||
when many concurrent allocations to the same resource provider are expected.
|
||||
"""),
|
||||
cfg.IntOpt(
|
||||
'max_allocation_candidates',
|
||||
default=-1,
|
||||
help="""
|
||||
The maximum number of allocation candidates placement generates for a single
|
||||
request. This is a global limit to avoid excessive memory use and query
|
||||
runtime. If set to -1 it means that the number of generated candidates are
|
||||
only limited by the number and structure of the resource providers and the
|
||||
content of the allocation_candidates query.
|
||||
|
||||
Note that the limit param of the allocation_candidates query is applied after
|
||||
all the viable candidates are generated so that limit alone is not enough to
|
||||
restrict the runtime or memory consumption of the query.
|
||||
|
||||
In a deployment with thousands of resource providers or if the deployment has
|
||||
wide and symmetric provider trees, i.e. there are multiple children providers
|
||||
under the same root having inventory from the same resource class
|
||||
(e.g. in case of nova's mdev GPU or PCI in Placement features) we recommend
|
||||
to tune this config option based on the memory available for the
|
||||
placement service and the client timeout setting on the client side. A good
|
||||
initial value could be around 100000.
|
||||
"""),
|
||||
]
|
||||
|
||||
|
@ -731,6 +731,7 @@ def _merge_candidates(candidates, rw_ctx):
|
||||
areqs = set()
|
||||
all_suffixes = set(candidates)
|
||||
num_granular_groups = len(all_suffixes - set(['']))
|
||||
max_a_c = rw_ctx.config.placement.max_allocation_candidates
|
||||
for areq_lists_by_suffix in areq_lists_by_anchor.values():
|
||||
# Filter out any entries that don't have allocation requests for
|
||||
# *all* suffixes (i.e. all RequestGroups)
|
||||
@ -754,6 +755,10 @@ def _merge_candidates(candidates, rw_ctx):
|
||||
# [areq__B, areq_1_B, ..., areq_42_B], return.
|
||||
# ...,
|
||||
# ]
|
||||
|
||||
# This loops on each merged candidate where a candidate is represented
|
||||
# by the areq_list containing the allocations that fulfills each
|
||||
# request groups
|
||||
for areq_list in itertools.product(
|
||||
*list(areq_lists_by_suffix.values())):
|
||||
# At this point, each AllocationRequest in areq_list is still
|
||||
@ -790,6 +795,14 @@ def _merge_candidates(candidates, rw_ctx):
|
||||
continue
|
||||
areqs.add(areq)
|
||||
|
||||
if max_a_c >= 0 and len(areqs) >= max_a_c:
|
||||
# This duplicated check will go away in the next patch that
|
||||
# refactors this logic a bit.
|
||||
break
|
||||
|
||||
if max_a_c >= 0 and len(areqs) >= max_a_c:
|
||||
break
|
||||
|
||||
# It's possible we've filtered out everything. If so, short out.
|
||||
if not areqs:
|
||||
return [], []
|
||||
|
@ -373,6 +373,10 @@ class RequestWideSearchContext(object):
|
||||
return True
|
||||
return False
|
||||
|
||||
@property
|
||||
def config(self):
|
||||
return self._ctx.config
|
||||
|
||||
|
||||
@db_api.placement_context_manager.reader
|
||||
def provider_ids_from_uuid(context, uuid):
|
||||
|
142
placement/tests/functional/test_allocation_candidates.py
Normal file
142
placement/tests/functional/test_allocation_candidates.py
Normal file
@ -0,0 +1,142 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import collections
|
||||
|
||||
from placement import direct
|
||||
from placement.tests.functional import base
|
||||
from placement.tests.functional.db import test_base as tb
|
||||
|
||||
|
||||
class TestWideTreeAllocationCandidateExplosion(base.TestCase):
|
||||
"""Test candidate generation ordering and limiting in wide symmetric trees,
|
||||
i.e. with trees of many similar child RPs.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.headers = {
|
||||
'x-auth-token': 'admin',
|
||||
'content-type': 'application/json',
|
||||
'OpenStack-API-Version': 'placement 1.38',
|
||||
'X_ROLES': 'admin,service'
|
||||
}
|
||||
|
||||
self.conf_fixture.conf.set_override(
|
||||
"max_allocation_candidates", 100000, group="placement")
|
||||
|
||||
def create_tree(self, num_roots, num_child, num_res_per_child):
|
||||
self.roots = {}
|
||||
|
||||
for i in range(num_roots):
|
||||
compute = tb.create_provider(
|
||||
self.context, f'compute{i}')
|
||||
self.roots[compute.uuid] = compute.name
|
||||
tb.add_inventory(compute, 'VCPU', 8)
|
||||
tb.add_inventory(compute, 'MEMORY_MB', 4096)
|
||||
tb.add_inventory(compute, 'DISK_GB', 500)
|
||||
|
||||
for j in range(num_child):
|
||||
child = tb.create_provider(
|
||||
self.context, f'compute{i}:PF{j}', parent=compute.uuid)
|
||||
tb.add_inventory(child, 'CUSTOM_VF', num_res_per_child)
|
||||
|
||||
@staticmethod
|
||||
def get_candidate_query(num_groups, num_res, limit):
|
||||
query = ("/allocation_candidates?"
|
||||
"resources=DISK_GB%3A20%2CMEMORY_MB%3A2048%2CVCPU%3A2")
|
||||
|
||||
for g in range(num_groups):
|
||||
query += f"&resources{g}=CUSTOM_VF%3A{num_res}"
|
||||
|
||||
query += "&group_policy=none"
|
||||
query += f"&limit={limit}"
|
||||
|
||||
return query
|
||||
|
||||
def _test_num_candidates_and_computes(
|
||||
self, computes, pfs, vfs_per_pf, req_groups, req_res_per_group,
|
||||
req_limit, expected_candidates, expected_computes_with_candidates
|
||||
):
|
||||
self.create_tree(
|
||||
num_roots=computes, num_child=pfs, num_res_per_child=vfs_per_pf)
|
||||
|
||||
conf = self.conf_fixture.conf
|
||||
with direct.PlacementDirect(conf) as client:
|
||||
resp = client.get(
|
||||
self.get_candidate_query(
|
||||
num_groups=req_groups, num_res=req_res_per_group,
|
||||
limit=req_limit),
|
||||
headers=self.headers)
|
||||
self.assertEqual(200, resp.status_code)
|
||||
|
||||
body = resp.json()
|
||||
self.assertEqual(expected_candidates, len(body["allocation_requests"]))
|
||||
|
||||
root_rps = set(self.roots.keys())
|
||||
roots_with_candidates = set()
|
||||
nr_of_candidates_per_compute = collections.Counter()
|
||||
for ar in body["allocation_requests"]:
|
||||
allocated_rps = set(ar["allocations"].keys())
|
||||
root_allocated_rps = allocated_rps.intersection(root_rps)
|
||||
roots_with_candidates |= root_allocated_rps
|
||||
nr_of_candidates_per_compute.update(root_allocated_rps)
|
||||
|
||||
self.assertEqual(
|
||||
expected_computes_with_candidates, len(roots_with_candidates))
|
||||
|
||||
def test_all_candidates_generated_and_returned(self):
|
||||
self._test_num_candidates_and_computes(
|
||||
computes=2, pfs=8, vfs_per_pf=8, req_groups=2, req_res_per_group=1,
|
||||
req_limit=1000,
|
||||
expected_candidates=2 * 64, expected_computes_with_candidates=2,)
|
||||
|
||||
def test_requested_limit_is_hit_result_balanced(self):
|
||||
# 8192 possible candidates, all generated, returned 1000,
|
||||
# result is balanced due to python sets usage
|
||||
self._test_num_candidates_and_computes(
|
||||
computes=2, pfs=8, vfs_per_pf=8, req_groups=4, req_res_per_group=1,
|
||||
req_limit=1000,
|
||||
expected_candidates=1000, expected_computes_with_candidates=2)
|
||||
|
||||
def test_too_many_candidates_global_limit_is_hit_result_unbalanced(self):
|
||||
# With max_allocation_candidates set to 100k limit this test now
|
||||
# runs in reasonable time (10 sec on my machine), without that it would
|
||||
# time out.
|
||||
# However, with the global limit in place only the first compute gets
|
||||
# candidates.
|
||||
# 524288 valid candidates, the generation stops at 100k candidates,
|
||||
# only 1000 is returned, result is unbalanced as the first 100k
|
||||
# candidate is always from the first compute.
|
||||
self._test_num_candidates_and_computes(
|
||||
computes=2, pfs=8, vfs_per_pf=8, req_groups=6, req_res_per_group=1,
|
||||
req_limit=1000,
|
||||
expected_candidates=1000, expected_computes_with_candidates=1)
|
||||
|
||||
def test_global_limit_hit(self):
|
||||
# 8192 possible candidates, global limit is set to 8000, higher request
|
||||
# limit so number of candidates are limited by the global limit
|
||||
self.conf_fixture.conf.set_override(
|
||||
"max_allocation_candidates", 8000, group="placement")
|
||||
self._test_num_candidates_and_computes(
|
||||
computes=2, pfs=8, vfs_per_pf=8, req_groups=4, req_res_per_group=1,
|
||||
req_limit=9000,
|
||||
expected_candidates=8000, expected_computes_with_candidates=2)
|
||||
|
||||
def test_no_global_limit(self):
|
||||
# 8192 possible candidates, there is no global limit, high request
|
||||
# limit so all candidates returned
|
||||
self.conf_fixture.conf.set_override(
|
||||
"max_allocation_candidates", -1, group="placement")
|
||||
self._test_num_candidates_and_computes(
|
||||
computes=2, pfs=8, vfs_per_pf=8, req_groups=4, req_res_per_group=1,
|
||||
req_limit=9000,
|
||||
expected_candidates=8192, expected_computes_with_candidates=2)
|
Loading…
x
Reference in New Issue
Block a user