From 63d2e62c3a223f883ca810f4c66a2a236cf3d483 Mon Sep 17 00:00:00 2001 From: melanie witt Date: Tue, 20 Oct 2020 21:46:13 +0000 Subject: [PATCH] Use subqueryload() instead of joinedload() for (system_)metadata Currently, when we "get" a single instance from the database and we load metadata and system_metadata, we do so using a joinedload() which does JOINs with the respective tables. Because of the one-to-many relationship between an instance and (system_)metadata records, doing the database query this way can result in a large number of additional rows being returned unnecessarily and cause a large data transfer. This is similar to the problem addressed by change I0610fb16ccce2ee95c318589c8abcc30613a3fe9 which added separate queries for (system_)metadata when we "get" multiple instances. We don't, however, reuse the same code for this change because _instances_fill_metadata converts the instance database object to a dict, and some callers of _instance_get_by_uuid need to be able to access an instance database object attached to the session (example: instance_update_and_get_original). By using subqueryload() [1], we can perform the additional queries for (system_)metadata to solve the problem with a similar approach. Closes-Bug: #1799298 [1] https://docs.sqlalchemy.org/en/13/orm/loading_relationships.html#subquery-eager-loading Change-Id: I5c071f70f669966e9807b38e99077c1cae5b4606 (cherry picked from commit e728fe668a6de886455f2dbaf655c8a151462c8c) --- nova/db/sqlalchemy/api.py | 17 ++++++++++++++++- nova/tests/unit/db/test_db_api.py | 8 ++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index 973622a8f217..e523f575a150 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -47,6 +47,7 @@ from sqlalchemy import or_ from sqlalchemy.orm import aliased from sqlalchemy.orm import joinedload from sqlalchemy.orm import noload +from sqlalchemy.orm import subqueryload from sqlalchemy.orm import undefer from sqlalchemy.schema import Table from sqlalchemy import sql @@ -1266,13 +1267,27 @@ def _build_instance_get(context, columns_to_join=None): continue if 'extra.' in column: query = query.options(undefer(column)) + elif column in ['metadata', 'system_metadata']: + # NOTE(melwitt): We use subqueryload() instead of joinedload() for + # metadata and system_metadata because of the one-to-many + # relationship of the data. Directly joining these columns can + # result in a large number of additional rows being queried if an + # instance has a large number of (system_)metadata items, resulting + # in a large data transfer. Instead, the subqueryload() will + # perform additional queries to obtain metadata and system_metadata + # for the instance. + query = query.options(subqueryload(column)) else: query = query.options(joinedload(column)) # NOTE(alaski) Stop lazy loading of columns not needed. for col in ['metadata', 'system_metadata']: if col not in columns_to_join: query = query.options(noload(col)) - return query + # NOTE(melwitt): We need to use order_by() so that the + # additional queries emitted by subqueryload() include the same ordering as + # used by the parent query. + # https://docs.sqlalchemy.org/en/13/orm/loading_relationships.html#the-importance-of-ordering + return query.order_by(models.Instance.id) def _instances_fill_metadata(context, instances, manual_joins=None): diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py index cf662aad73eb..a9943f1c3711 100644 --- a/nova/tests/unit/db/test_db_api.py +++ b/nova/tests/unit/db/test_db_api.py @@ -1693,6 +1693,14 @@ class InstanceTestCase(test.TestCase, ModelsObjectComparatorMixin): sys_meta = utils.metadata_to_dict(inst['system_metadata']) self.assertEqual(sys_meta, self.sample_data['system_metadata']) + def test_instance_get_with_meta(self): + inst_id = self.create_instance_with_args().id + inst = db.instance_get(self.ctxt, inst_id) + meta = utils.metadata_to_dict(inst['metadata']) + self.assertEqual(meta, self.sample_data['metadata']) + sys_meta = utils.metadata_to_dict(inst['system_metadata']) + self.assertEqual(sys_meta, self.sample_data['system_metadata']) + def test_instance_update(self): instance = self.create_instance_with_args() metadata = {'host': 'bar', 'key2': 'wuff'}