Merge "scheduler: fix _get_sharing_providers to support unlimited aggr"

This commit is contained in:
Zuul 2024-07-07 00:02:39 +00:00 committed by Gerrit Code Review
commit 240296298c
4 changed files with 101 additions and 29 deletions

View File

@ -781,6 +781,20 @@ Possible values:
Related options:
* ``shutdown_timeout``
"""),
cfg.IntOpt('sharing_providers_max_uuids_per_request',
default=200,
min=1,
help="""
Maximum number of aggregate UUIDs per API request. The default is 200.
In deployments with a large number of aggregates, a 'Request-Too-Long'
error may be raised by the web server or load balancer. This value
allows setting the batch size to limit the query length.
Possible values:
* Any positive integer.
"""),
cfg.IntOpt('resource_provider_association_refresh',
default=300,

View File

@ -497,25 +497,39 @@ class SchedulerReportClient(object):
if not agg_uuids:
return []
aggs = ','.join(agg_uuids)
url = "/resource_providers?member_of=in:%s&required=%s" % (
aggs, os_traits.MISC_SHARES_VIA_AGGREGATE)
resp = self.get(url, version='1.18',
global_request_id=context.global_id)
if resp.status_code == 200:
return resp.json()['resource_providers']
maxuuids = CONF.compute.sharing_providers_max_uuids_per_request
msg = _("[%(placement_req_id)s] Failed to retrieve sharing resource "
"providers associated with the following aggregates from "
"placement API: %(aggs)s. Got %(status_code)d: %(err_text)s.")
args = {
'aggs': aggs,
'status_code': resp.status_code,
'err_text': resp.text,
'placement_req_id': get_placement_request_id(resp),
}
LOG.error(msg, args)
raise exception.ResourceProviderRetrievalFailed(message=msg % args)
agg_uuids = list(agg_uuids)
resource_providers = {}
for i in range(0, len(agg_uuids), maxuuids):
aggs = ','.join(agg_uuids[i:i + maxuuids])
url = "/resource_providers?member_of=in:%s&required=%s" % (
aggs, os_traits.MISC_SHARES_VIA_AGGREGATE)
resp = self.get(url, version='1.18',
global_request_id=context.global_id)
if resp.status_code == 200:
# We want to ensure that an RP on different aggregate
# will not be duplicated.
for rp in resp.json()['resource_providers']:
if not rp['uuid'] in resource_providers:
resource_providers[rp['uuid']] = rp
else:
msg = _("[%(placement_req_id)s] %(iquery)s/%(isize)s Failed "
"to retrieve sharing resource providers associated "
"with the following aggregates from placement API: "
"%(aggs)s. Got %(status_code)d: %(err_text)s.")
args = {
'aggs': aggs,
'status_code': resp.status_code,
'err_text': resp.text,
'placement_req_id': get_placement_request_id(resp),
'iquery': i + 1,
'isize': len(agg_uuids)
}
LOG.error(msg, args)
raise exception.ResourceProviderRetrievalFailed(
message=msg % args)
return list(resource_providers.values())
def get_providers_in_tree(self, context, uuid):
"""Queries the placement API for a list of the resource providers in

View File

@ -2335,8 +2335,11 @@ class TestProviderOperations(SchedulerReportClientTestCase):
logging_mock.call_args[0][1]['placement_req_id'])
def test_get_sharing_providers(self):
self.flags(
sharing_providers_max_uuids_per_request=3, group='compute')
resp_mock = mock.Mock(status_code=200)
rpjson = [
rpjson1 = [
{
'uuid': uuids.sharing1,
'name': 'bandwidth_provider',
@ -2353,20 +2356,54 @@ class TestProviderOperations(SchedulerReportClientTestCase):
'root_provider_uuid': None,
'links': [],
},
{
'uuid': uuids.sharing3,
'name': 'storage_provider',
'generation': 42,
'parent_provider_uuid': None,
'root_provider_uuid': None,
'links': [],
}
]
rpjson2 = [
{
'uuid': uuids.sharing4,
'name': 'storage_provider',
'generation': 42,
'parent_provider_uuid': None,
'root_provider_uuid': None,
'links': [],
},
]
resp_mock.json.side_effect = [
{'resource_providers': rpjson1},
{'resource_providers': rpjson2}
]
resp_mock.json.return_value = {'resource_providers': rpjson}
self.ks_adap_mock.get.return_value = resp_mock
result = self.client._get_sharing_providers(
self.context, [uuids.agg1, uuids.agg2])
self.context, [uuids.agg1, uuids.agg2,
uuids.agg3, uuids.agg4])
expected_url = ('/resource_providers?member_of=in:' +
','.join((uuids.agg1, uuids.agg2)) +
'&required=MISC_SHARES_VIA_AGGREGATE')
self.ks_adap_mock.get.assert_called_once_with(
expected_url, microversion='1.18',
global_request_id=self.context.global_id)
self.assertEqual(rpjson, result)
self.ks_adap_mock.get.assert_has_calls(
[
# Asserting first request with 3 uuids
mock.call(
'/resource_providers?member_of=in:' +
','.join((uuids.agg1, uuids.agg2, uuids.agg3)) +
'&required=MISC_SHARES_VIA_AGGREGATE',
microversion='1.18',
global_request_id=self.context.global_id),
mock.call().json(),
# Asserting second request with 1 uuid
mock.call(
'/resource_providers?member_of=in:' +
uuids.agg4 +
'&required=MISC_SHARES_VIA_AGGREGATE',
microversion='1.18',
global_request_id=self.context.global_id),
mock.call().json(),
])
self.assertEqual(rpjson1 + rpjson2, result)
def test_get_sharing_providers_emptylist(self):
self.assertEqual(

View File

@ -0,0 +1,7 @@
---
fixes:
- |
Introduced a new compute configuration option
`sharing_providers_max_uuids_per_request` and applied a fix to
handle the "Request-Too-Long" error that can occur when querying
the placement API with a large number of aggregate UUIDs.