Optimize Gnocchi fetcher processing time

Following the footsteps of [1], depending on the environment where
CloudKitty is applied, when using Gnocchi fetcher, if the environment is
considerably big, CloudKitty tends to take too much time loading the
scope IDs from the Gnocchi API. To reduce that process time, we adopted
a different approach to discover the scope IDs registered in Gnocchi.

This patch proposes a change in that process, building on top of [1] to
load the chunk of Gnocchi resources and execute the retrieval of the
scope ID and selecting only the unique ones right away. Then, in the
worst case scenario, we would only have 1000 resources being loaded to
memory. Furthermore, we use the ``search`` API method from Gnocchi to
filter resources that are from scopes that are not in the set already
loaded. Therefore, we do not need to go over all of the resources in
Gnocchi. We will only list all of the resources that do not have the
scope IDs already loaded.

This reduced the processing time to load scope IDs from about 5 minutes
to 40 seconds in an environment with 500 scopes and ~50,000 resources.

[1] https://review.opendev.org/c/openstack/cloudkitty/+/864269

Change-Id: I9fa8934c5c857bd0069bb8038423e0126739a310
This commit is contained in:
Rafael Weingärtner 2023-01-27 08:49:33 -03:00 committed by Pierre Riteau
parent 974b8b9607
commit 15be883a18
3 changed files with 81 additions and 38 deletions

View File

@ -127,36 +127,42 @@ class GnocchiFetcher(fetcher.BaseFetcher):
scope_attribute = CONF.fetcher_gnocchi.scope_attribute
resource_types = CONF.fetcher_gnocchi.resource_types
for resource_type in resource_types:
marker = None
while True:
resources_chunk = self._conn.resource.list(
resource_type=resource_type, marker=marker, details=True)
search_scopes_query = None
if unique_scope_ids:
unique_scope_ids_list = list(unique_scope_ids)
unique_scope_ids_list.sort()
search_scopes_query = {"not": {
"in": {scope_attribute: unique_scope_ids_list}}
}
resources_chunk = self._conn.resource.search(
resource_type=resource_type, details=True,
query=search_scopes_query
)
chunk_len = len(resources_chunk)
is_last_chunk_equals_marker =\
chunk_len > 0 and resources_chunk[
chunk_len - 1]['id'] == marker
if chunk_len < 1 or (
chunk_len == 1 and is_last_chunk_equals_marker):
if chunk_len < 1:
LOG.debug("Scopes IDs [%s] loaded. The total number of "
"unique scope IDs loaded is [%s]. Total number "
"of resources navigated [%s].", unique_scope_ids,
len(unique_scope_ids), total_resources_navigated)
break
marker = resources_chunk[-1]['id']
total_resources_navigated += chunk_len
scope_ids = [resource.get(
scope_attribute, None) for resource in resources_chunk]
scope_ids = [s_id for s_id in scope_ids if s_id]
unique_scope_ids.update(set(scope_ids))
LOG.debug("Scopes IDs [%s] loaded. The total number of unique "
"scopes IDs loaded so far is [%s]. Next chunk with "
"Markers [%s]. Total number of resources navigated "
"[%s].", scope_ids, len(scope_ids), marker,
total_resources_navigated)
scope_ids = [
resource.get(scope_attribute) for resource in
resources_chunk if resource.get(scope_attribute)]
unique_scope_ids.update(set(scope_ids))
LOG.debug("Scopes IDs [%s] loaded. The total number of unique "
"scopes IDs loaded so far is [%s]. Next chunk will "
"be loaded filtering by resources not in scope "
"attributes [%s] with values [%s]. Total number of "
"resources navigated [%s].", scope_ids,
len(scope_ids), scope_attribute, unique_scope_ids,
total_resources_navigated)
return list(unique_scope_ids)

View File

@ -46,11 +46,11 @@ class GnocchiFetcherTest(tests.TestCase):
def test_get_tenants_marker_list_resource_last_call(self):
with mock.patch.object(
self.fetcher._conn.resource, 'list') as resource_list:
self.fetcher._conn.resource, 'search') as resource_list:
resource_list.side_effect = [
self.resource_list,
[{'id': "some_replicated_id",
'project_id': 'some_replicated_id_project'}]]
'project_id': 'some_replicated_id_project'}], []]
all_scope_ids = self.fetcher.get_tenants()
all_scope_ids.sort()
@ -58,14 +58,24 @@ class GnocchiFetcherTest(tests.TestCase):
self.assertEqual(self.unique_scope_ids, all_scope_ids)
resource_list.assert_has_calls([
mock.call(resource_type="generic", marker=None, details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True)
mock.call(resource_type='generic', details=True, query=None),
mock.call(resource_type='generic', details=True,
query={'not': {'in': {'project_id': [
'some_other_project_id',
'some_other_project_id2',
'some_other_project_id3',
'some_replicated_id_project']}}}),
mock.call(resource_type='generic', details=True,
query={'not': {'in': {'project_id': [
'some_other_project_id',
'some_other_project_id2',
'some_other_project_id3',
'some_replicated_id_project']}}})
])
def test_get_tenants_empty_list_resource_last_call(self):
with mock.patch.object(
self.fetcher._conn.resource, 'list') as resource_list:
self.fetcher._conn.resource, 'search') as resource_list:
resource_list.side_effect = [
self.resource_list, self.resource_list, []]
@ -75,15 +85,24 @@ class GnocchiFetcherTest(tests.TestCase):
self.assertEqual(self.unique_scope_ids, all_scope_ids)
resource_list.assert_has_calls([
mock.call(resource_type="generic", marker=None, details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True)], any_order=False)
mock.call(resource_type='generic', details=True, query=None),
mock.call(resource_type='generic', details=True,
query={'not': {'in': {'project_id': [
'some_other_project_id',
'some_other_project_id2',
'some_other_project_id3',
'some_replicated_id_project']}}}),
mock.call(resource_type='generic', details=True,
query={'not': {'in': {'project_id': [
'some_other_project_id',
'some_other_project_id2',
'some_other_project_id3',
'some_replicated_id_project']}}})],
any_order=False)
def test_get_tenants_scope_id_as_none(self):
with mock.patch.object(
self.fetcher._conn.resource, 'list') as resource_list:
self.fetcher._conn.resource, 'search') as resource_list:
resource_list.side_effect = [
self.resource_list, self.resource_list,
[{"id": "test", "project_id": None}], []]
@ -94,11 +113,23 @@ class GnocchiFetcherTest(tests.TestCase):
self.assertEqual(self.unique_scope_ids, all_scope_ids)
resource_list.assert_has_calls([
mock.call(resource_type="generic", marker=None, details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True),
mock.call(resource_type="generic", marker="test",
details=True)
mock.call(resource_type='generic', details=True, query=None),
mock.call(resource_type='generic', details=True,
query={'not': {'in': {'project_id': [
'some_other_project_id',
'some_other_project_id2',
'some_other_project_id3',
'some_replicated_id_project']}}}),
mock.call(resource_type='generic', details=True,
query={'not': {'in': {'project_id': [
'some_other_project_id',
'some_other_project_id2',
'some_other_project_id3',
'some_replicated_id_project']}}}),
mock.call(resource_type='generic', details=True,
query={'not': {'in': {'project_id': [
'some_other_project_id',
'some_other_project_id2',
'some_other_project_id3',
'some_replicated_id_project']}}})
], any_order=False)

View File

@ -0,0 +1,6 @@
---
issues:
- |
Optimize Gnocchi fetcher runtime to avoid taking too long to load scopes
when CloudKitty runs in cloud environments with hundreds of thousands of
resources.