Use subqueryload() instead of joinedload() for (system_)metadata

Currently, when we "get" a single instance from the database and we
load metadata and system_metadata, we do so using a joinedload() which
does JOINs with the respective tables. Because of the one-to-many
relationship between an instance and (system_)metadata records, doing
the database query this way can result in a large number of additional
rows being returned unnecessarily and cause a large data transfer.

This is similar to the problem addressed by change
I0610fb16ccce2ee95c318589c8abcc30613a3fe9 which added separate queries
for (system_)metadata when we "get" multiple instances. We don't,
however, reuse the same code for this change because
_instances_fill_metadata converts the instance database object to a
dict, and some callers of _instance_get_by_uuid need to be able to
access an instance database object attached to the session (example:
instance_update_and_get_original).

By using subqueryload() [1], we can perform the additional queries for
(system_)metadata to solve the problem with a similar approach.

Closes-Bug: #1799298

[1] https://docs.sqlalchemy.org/en/13/orm/loading_relationships.html#subquery-eager-loading

Change-Id: I5c071f70f669966e9807b38e99077c1cae5b4606
(cherry picked from commit e728fe668a)
(cherry picked from commit 63d2e62c3a)
(cherry picked from commit e7a45e0335)
(cherry picked from commit 4350074029)
(cherry picked from commit ad7e4fb8f4)
This commit is contained in:
melanie witt 2020-10-20 21:46:13 +00:00
parent ee813f733f
commit 68f80d2013
2 changed files with 24 additions and 1 deletions

View File

@ -49,6 +49,7 @@ from sqlalchemy.orm import contains_eager
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import joinedload_all
from sqlalchemy.orm import noload
from sqlalchemy.orm import subqueryload
from sqlalchemy.orm import undefer
from sqlalchemy.schema import Table
from sqlalchemy import sql
@ -1930,13 +1931,27 @@ def _build_instance_get(context, columns_to_join=None):
continue
if 'extra.' in column:
query = query.options(undefer(column))
elif column in ['metadata', 'system_metadata']:
# NOTE(melwitt): We use subqueryload() instead of joinedload() for
# metadata and system_metadata because of the one-to-many
# relationship of the data. Directly joining these columns can
# result in a large number of additional rows being queried if an
# instance has a large number of (system_)metadata items, resulting
# in a large data transfer. Instead, the subqueryload() will
# perform additional queries to obtain metadata and system_metadata
# for the instance.
query = query.options(subqueryload(column))
else:
query = query.options(joinedload(column))
# NOTE(alaski) Stop lazy loading of columns not needed.
for col in ['metadata', 'system_metadata']:
if col not in columns_to_join:
query = query.options(noload(col))
return query
# NOTE(melwitt): We need to use order_by(<unique column>) so that the
# additional queries emitted by subqueryload() include the same ordering as
# used by the parent query.
# https://docs.sqlalchemy.org/en/13/orm/loading_relationships.html#the-importance-of-ordering
return query.order_by(models.Instance.id)
def _instances_fill_metadata(context, instances, manual_joins=None):

View File

@ -1943,6 +1943,14 @@ class InstanceTestCase(test.TestCase, ModelsObjectComparatorMixin):
sys_meta = utils.metadata_to_dict(inst['system_metadata'])
self.assertEqual(sys_meta, self.sample_data['system_metadata'])
def test_instance_get_with_meta(self):
inst_id = self.create_instance_with_args().id
inst = db.instance_get(self.ctxt, inst_id)
meta = utils.metadata_to_dict(inst['metadata'])
self.assertEqual(meta, self.sample_data['metadata'])
sys_meta = utils.metadata_to_dict(inst['system_metadata'])
self.assertEqual(sys_meta, self.sample_data['system_metadata'])
def test_instance_update(self):
instance = self.create_instance_with_args()
metadata = {'host': 'bar', 'key2': 'wuff'}