From c1de4fb13e4c7d220ff2e42f24f06b4bbe53521f Mon Sep 17 00:00:00 2001 From: melanie witt Date: Tue, 20 Oct 2020 21:46:13 +0000 Subject: [PATCH] Use subqueryload() instead of joinedload() for (system_)metadata Currently, when we "get" a single instance from the database and we load metadata and system_metadata, we do so using a joinedload() which does JOINs with the respective tables. Because of the one-to-many relationship between an instance and (system_)metadata records, doing the database query this way can result in a large number of additional rows being returned unnecessarily and cause a large data transfer. This is similar to the problem addressed by change I0610fb16ccce2ee95c318589c8abcc30613a3fe9 which added separate queries for (system_)metadata when we "get" multiple instances. We don't, however, reuse the same code for this change because _instances_fill_metadata converts the instance database object to a dict, and some callers of _instance_get_by_uuid need to be able to access an instance database object attached to the session (example: instance_update_and_get_original). By using subqueryload() [1], we can perform the additional queries for (system_)metadata to solve the problem with a similar approach. Closes-Bug: #1799298 [1] https://docs.sqlalchemy.org/en/13/orm/loading_relationships.html#subquery-eager-loading Change-Id: I5c071f70f669966e9807b38e99077c1cae5b4606 (cherry picked from commit e728fe668a6de886455f2dbaf655c8a151462c8c) (cherry picked from commit 63d2e62c3a223f883ca810f4c66a2a236cf3d483) (cherry picked from commit e7a45e0335e4cf44fec7f7b8d2505f5b95445cf9) (cherry picked from commit 4350074029ffbc03ab238c442ec86fab6560e365) (cherry picked from commit ad7e4fb8f4ea6c458af00ec7aa0b321dc37c097c) (cherry picked from commit 68f80d201310dcaf688d8437a46c8d272ead317d) --- nova/db/sqlalchemy/api.py | 17 ++++++++++++++++- nova/tests/unit/db/test_db_api.py | 8 ++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index ac577b873eeb..c57112859182 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -49,6 +49,7 @@ from sqlalchemy.orm import contains_eager from sqlalchemy.orm import joinedload from sqlalchemy.orm import joinedload_all from sqlalchemy.orm import noload +from sqlalchemy.orm import subqueryload from sqlalchemy.orm import undefer from sqlalchemy.schema import Table from sqlalchemy import sql @@ -1919,13 +1920,27 @@ def _build_instance_get(context, columns_to_join=None): continue if 'extra.' in column: query = query.options(undefer(column)) + elif column in ['metadata', 'system_metadata']: + # NOTE(melwitt): We use subqueryload() instead of joinedload() for + # metadata and system_metadata because of the one-to-many + # relationship of the data. Directly joining these columns can + # result in a large number of additional rows being queried if an + # instance has a large number of (system_)metadata items, resulting + # in a large data transfer. Instead, the subqueryload() will + # perform additional queries to obtain metadata and system_metadata + # for the instance. + query = query.options(subqueryload(column)) else: query = query.options(joinedload(column)) # NOTE(alaski) Stop lazy loading of columns not needed. for col in ['metadata', 'system_metadata']: if col not in columns_to_join: query = query.options(noload(col)) - return query + # NOTE(melwitt): We need to use order_by() so that the + # additional queries emitted by subqueryload() include the same ordering as + # used by the parent query. + # https://docs.sqlalchemy.org/en/13/orm/loading_relationships.html#the-importance-of-ordering + return query.order_by(models.Instance.id) def _instances_fill_metadata(context, instances, manual_joins=None): diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py index 37d11880ac76..fe4452f835e3 100644 --- a/nova/tests/unit/db/test_db_api.py +++ b/nova/tests/unit/db/test_db_api.py @@ -2372,6 +2372,14 @@ class InstanceTestCase(test.TestCase, ModelsObjectComparatorMixin): sys_meta = utils.metadata_to_dict(inst['system_metadata']) self.assertEqual(sys_meta, self.sample_data['system_metadata']) + def test_instance_get_with_meta(self): + inst_id = self.create_instance_with_args().id + inst = db.instance_get(self.ctxt, inst_id) + meta = utils.metadata_to_dict(inst['metadata']) + self.assertEqual(meta, self.sample_data['metadata']) + sys_meta = utils.metadata_to_dict(inst['system_metadata']) + self.assertEqual(sys_meta, self.sample_data['system_metadata']) + def test_instance_update(self): instance = self.create_instance_with_args() metadata = {'host': 'bar', 'key2': 'wuff'}