Optimize Gnocchi fetcher processing time

Following the footsteps of [1], depending on the environment where CloudKitty is applied, when using Gnocchi fetcher, if the environment is considerably big, CloudKitty tends to take too much time loading the scope IDs from the Gnocchi API. To reduce that process time, we adopted a different approach to discover the scope IDs registered in Gnocchi. This patch proposes a change in that process, building on top of [1] to load the chunk of Gnocchi resources and execute the retrieval of the scope ID and selecting only the unique ones right away. Then, in the worst case scenario, we would only have 1000 resources being loaded to memory. Furthermore, we use the ``search`` API method from Gnocchi to filter resources that are from scopes that are not in the set already loaded. Therefore, we do not need to go over all of the resources in Gnocchi. We will only list all of the resources that do not have the scope IDs already loaded. This reduced the processing time to load scope IDs from about 5 minutes to 40 seconds in an environment with 500 scopes and ~50,000 resources. [1] https://review.opendev.org/c/openstack/cloudkitty/+/864269 Change-Id: I9fa8934c5c857bd0069bb8038423e0126739a310
2023-01-27 08:49:33 -03:00 · 2023-01-27 08:49:33 -03:00 · 15be883a18
commit 15be883a18
parent 974b8b9607
3 changed files with 81 additions and 38 deletions
--- a/cloudkitty/fetcher/gnocchi.py
+++ b/cloudkitty/fetcher/gnocchi.py
@ -127,36 +127,42 @@ class GnocchiFetcher(fetcher.BaseFetcher):
        scope_attribute = CONF.fetcher_gnocchi.scope_attribute
        resource_types = CONF.fetcher_gnocchi.resource_types
        for resource_type in resource_types:
-            marker = None
            while True:
-                resources_chunk = self._conn.resource.list(
-                    resource_type=resource_type, marker=marker, details=True)
+                search_scopes_query = None
+                if unique_scope_ids:
+                    unique_scope_ids_list = list(unique_scope_ids)
+                    unique_scope_ids_list.sort()

+                    search_scopes_query = {"not": {
+                        "in": {scope_attribute: unique_scope_ids_list}}
+                    }
+
+                resources_chunk = self._conn.resource.search(
+                    resource_type=resource_type, details=True,
+                    query=search_scopes_query
+                )
                chunk_len = len(resources_chunk)

-                is_last_chunk_equals_marker =\
-                    chunk_len > 0 and resources_chunk[
-                        chunk_len - 1]['id'] == marker
-
-                if chunk_len < 1 or (
-                        chunk_len == 1 and is_last_chunk_equals_marker):
+                if chunk_len < 1:
                    LOG.debug("Scopes IDs [%s] loaded. The total number of "
                              "unique scope IDs loaded is [%s]. Total number "
                              "of resources navigated [%s].", unique_scope_ids,
                              len(unique_scope_ids), total_resources_navigated)
                    break

-                marker = resources_chunk[-1]['id']
                total_resources_navigated += chunk_len

-                scope_ids = [resource.get(
-                    scope_attribute, None) for resource in resources_chunk]
-                scope_ids = [s_id for s_id in scope_ids if s_id]
-                unique_scope_ids.update(set(scope_ids))
-                LOG.debug("Scopes IDs [%s] loaded. The total number of unique "
-                          "scopes IDs loaded so far is [%s]. Next chunk with "
-                          "Markers [%s]. Total number of resources navigated "
-                          "[%s].", scope_ids, len(scope_ids), marker,
-                          total_resources_navigated)
+                scope_ids = [
+                    resource.get(scope_attribute) for resource in
+                    resources_chunk if resource.get(scope_attribute)]

+                unique_scope_ids.update(set(scope_ids))
+
+                LOG.debug("Scopes IDs [%s] loaded. The total number of unique "
+                          "scopes IDs loaded so far is [%s]. Next chunk will "
+                          "be loaded filtering by resources not in scope "
+                          "attributes [%s] with values [%s]. Total number of "
+                          "resources navigated [%s].", scope_ids,
+                          len(scope_ids), scope_attribute, unique_scope_ids,
+                          total_resources_navigated)
        return list(unique_scope_ids)
--- a/cloudkitty/tests/fetchers/test_gnocchi.py
+++ b/cloudkitty/tests/fetchers/test_gnocchi.py
@ -46,11 +46,11 @@ class GnocchiFetcherTest(tests.TestCase):

    def test_get_tenants_marker_list_resource_last_call(self):
        with mock.patch.object(
-                self.fetcher._conn.resource, 'list') as resource_list:
+                self.fetcher._conn.resource, 'search') as resource_list:
            resource_list.side_effect = [
                self.resource_list,
                [{'id': "some_replicated_id",
-                  'project_id': 'some_replicated_id_project'}]]
+                  'project_id': 'some_replicated_id_project'}], []]

            all_scope_ids = self.fetcher.get_tenants()
            all_scope_ids.sort()
@ -58,14 +58,24 @@ class GnocchiFetcherTest(tests.TestCase):
            self.assertEqual(self.unique_scope_ids, all_scope_ids)

            resource_list.assert_has_calls([
-                mock.call(resource_type="generic", marker=None, details=True),
-                mock.call(resource_type="generic", marker="some_replicated_id",
-                          details=True)
+                mock.call(resource_type='generic', details=True, query=None),
+                mock.call(resource_type='generic', details=True,
+                          query={'not': {'in': {'project_id': [
+                              'some_other_project_id',
+                              'some_other_project_id2',
+                              'some_other_project_id3',
+                              'some_replicated_id_project']}}}),
+                mock.call(resource_type='generic', details=True,
+                          query={'not': {'in': {'project_id': [
+                              'some_other_project_id',
+                              'some_other_project_id2',
+                              'some_other_project_id3',
+                              'some_replicated_id_project']}}})
            ])

    def test_get_tenants_empty_list_resource_last_call(self):
        with mock.patch.object(
-                self.fetcher._conn.resource, 'list') as resource_list:
+                self.fetcher._conn.resource, 'search') as resource_list:
            resource_list.side_effect = [
                self.resource_list, self.resource_list, []]

@ -75,15 +85,24 @@ class GnocchiFetcherTest(tests.TestCase):
            self.assertEqual(self.unique_scope_ids, all_scope_ids)

            resource_list.assert_has_calls([
-                mock.call(resource_type="generic", marker=None, details=True),
-                mock.call(resource_type="generic", marker="some_replicated_id",
-                          details=True),
-                mock.call(resource_type="generic", marker="some_replicated_id",
-                          details=True)], any_order=False)
+                mock.call(resource_type='generic', details=True, query=None),
+                mock.call(resource_type='generic', details=True,
+                          query={'not': {'in': {'project_id': [
+                              'some_other_project_id',
+                              'some_other_project_id2',
+                              'some_other_project_id3',
+                              'some_replicated_id_project']}}}),
+                mock.call(resource_type='generic', details=True,
+                          query={'not': {'in': {'project_id': [
+                              'some_other_project_id',
+                              'some_other_project_id2',
+                              'some_other_project_id3',
+                              'some_replicated_id_project']}}})],
+                any_order=False)

    def test_get_tenants_scope_id_as_none(self):
        with mock.patch.object(
-                self.fetcher._conn.resource, 'list') as resource_list:
+                self.fetcher._conn.resource, 'search') as resource_list:
            resource_list.side_effect = [
                self.resource_list, self.resource_list,
                [{"id": "test", "project_id": None}], []]
@ -94,11 +113,23 @@ class GnocchiFetcherTest(tests.TestCase):
            self.assertEqual(self.unique_scope_ids, all_scope_ids)

            resource_list.assert_has_calls([
-                mock.call(resource_type="generic", marker=None, details=True),
-                mock.call(resource_type="generic", marker="some_replicated_id",
-                          details=True),
-                mock.call(resource_type="generic", marker="some_replicated_id",
-                          details=True),
-                mock.call(resource_type="generic", marker="test",
-                          details=True)
+                mock.call(resource_type='generic', details=True, query=None),
+                mock.call(resource_type='generic', details=True,
+                          query={'not': {'in': {'project_id': [
+                              'some_other_project_id',
+                              'some_other_project_id2',
+                              'some_other_project_id3',
+                              'some_replicated_id_project']}}}),
+                mock.call(resource_type='generic', details=True,
+                          query={'not': {'in': {'project_id': [
+                              'some_other_project_id',
+                              'some_other_project_id2',
+                              'some_other_project_id3',
+                              'some_replicated_id_project']}}}),
+                mock.call(resource_type='generic', details=True,
+                          query={'not': {'in': {'project_id': [
+                              'some_other_project_id',
+                              'some_other_project_id2',
+                              'some_other_project_id3',
+                              'some_replicated_id_project']}}})
            ], any_order=False)
--- a/releasenotes/notes/optimize_gnochi-fetcher-runtime-3604026816.yaml
+++ b/releasenotes/notes/optimize_gnochi-fetcher-runtime-3604026816.yaml
@ -0,0 +1,6 @@
+---
+issues:
+  - |
+    Optimize Gnocchi fetcher runtime to avoid taking too long to load scopes
+    when CloudKitty runs in cloud environments with hundreds of thousands of
+    resources.