Use subqueryload() instead of joinedload() for (system_)metadata

Currently, when we "get" a single instance from the database and we load metadata and system_metadata, we do so using a joinedload() which does JOINs with the respective tables. Because of the one-to-many relationship between an instance and (system_)metadata records, doing the database query this way can result in a large number of additional rows being returned unnecessarily and cause a large data transfer. This is similar to the problem addressed by change I0610fb16ccce2ee95c318589c8abcc30613a3fe9 which added separate queries for (system_)metadata when we "get" multiple instances. We don't, however, reuse the same code for this change because _instances_fill_metadata converts the instance database object to a dict, and some callers of _instance_get_by_uuid need to be able to access an instance database object attached to the session (example: instance_update_and_get_original). By using subqueryload() [1], we can perform the additional queries for (system_)metadata to solve the problem with a similar approach. Closes-Bug: #1799298 [1] https://docs.sqlalchemy.org/en/13/orm/loading_relationships.html#subquery-eager-loading Change-Id: I5c071f70f669966e9807b38e99077c1cae5b4606 (cherry picked from commit e728fe668a) (cherry picked from commit 63d2e62c3a) (cherry picked from commit e7a45e0335) (cherry picked from commit 4350074029) (cherry picked from commit ad7e4fb8f4)
2020-10-20 21:46:13 +00:00 · 2020-10-20 21:46:13 +00:00 · 68f80d2013
parent ee813f733f
commit 68f80d2013
2 changed files with 24 additions and 1 deletions
--- a/nova/db/sqlalchemy/api.py
+++ b/nova/db/sqlalchemy/api.py
@ -49,6 +49,7 @@ from sqlalchemy.orm import contains_eager
 from sqlalchemy.orm import joinedload
 from sqlalchemy.orm import joinedload_all
 from sqlalchemy.orm import noload
+from sqlalchemy.orm import subqueryload
 from sqlalchemy.orm import undefer
 from sqlalchemy.schema import Table
 from sqlalchemy import sql
@ -1930,13 +1931,27 @@ def _build_instance_get(context, columns_to_join=None):
            continue
        if 'extra.' in column:
            query = query.options(undefer(column))
+        elif column in ['metadata', 'system_metadata']:
+            # NOTE(melwitt): We use subqueryload() instead of joinedload() for
+            # metadata and system_metadata because of the one-to-many
+            # relationship of the data. Directly joining these columns can
+            # result in a large number of additional rows being queried if an
+            # instance has a large number of (system_)metadata items, resulting
+            # in a large data transfer. Instead, the subqueryload() will
+            # perform additional queries to obtain metadata and system_metadata
+            # for the instance.
+            query = query.options(subqueryload(column))
        else:
            query = query.options(joinedload(column))
    # NOTE(alaski) Stop lazy loading of columns not needed.
    for col in ['metadata', 'system_metadata']:
        if col not in columns_to_join:
            query = query.options(noload(col))
-    return query
+    # NOTE(melwitt): We need to use order_by(<unique column>) so that the
+    # additional queries emitted by subqueryload() include the same ordering as
+    # used by the parent query.
+    # https://docs.sqlalchemy.org/en/13/orm/loading_relationships.html#the-importance-of-ordering
+    return query.order_by(models.Instance.id)


 def _instances_fill_metadata(context, instances, manual_joins=None):
--- a/nova/tests/unit/db/test_db_api.py
+++ b/nova/tests/unit/db/test_db_api.py
@ -1943,6 +1943,14 @@ class InstanceTestCase(test.TestCase, ModelsObjectComparatorMixin):
            sys_meta = utils.metadata_to_dict(inst['system_metadata'])
            self.assertEqual(sys_meta, self.sample_data['system_metadata'])

+    def test_instance_get_with_meta(self):
+        inst_id = self.create_instance_with_args().id
+        inst = db.instance_get(self.ctxt, inst_id)
+        meta = utils.metadata_to_dict(inst['metadata'])
+        self.assertEqual(meta, self.sample_data['metadata'])
+        sys_meta = utils.metadata_to_dict(inst['system_metadata'])
+        self.assertEqual(sys_meta, self.sample_data['system_metadata'])
+
    def test_instance_update(self):
        instance = self.create_instance_with_args()
        metadata = {'host': 'bar', 'key2': 'wuff'}