Use selectinload for all list queries

Since node objects *can* be large, and due to the nature of joined
record dedeuplication, we should avoid them for list operations to
speed the operation to completion by *instead* allowing the client
to execute three queries and reconcile versus dedeuplicate the
single node.

This *should* result in generally faster list interaction, and
preserves the behavior for nodes at this time in order to
minimize a drastic increase of SQL queries for individual nodes.

Change-Id: Iac457e483068f613ded2aeff60cf6d9fc64a7dac
This commit is contained in:
Julia Kreger 2021-06-21 10:52:09 -07:00 committed by Jay Faulkner
parent cb1a5a3c98
commit 831cd5a15e
2 changed files with 107 additions and 10 deletions

View File

@ -79,16 +79,98 @@ def _wrap_session(session):
return session return session
def _get_node_query_with_all(): def _get_node_query_with_all_for_single_node():
"""Return a query object for the Node joined with all relevant fields. """Return a query object for the Node joined with all relevant fields.
This method utilizes a joined load query which creates a result set
where corresponding traits, and tags, are joined together in the result
set.
This is more efficent from a Queries Per Second standpoint with the
backend database, as they are not separate distinct queries which
are being executed by the client.
The downside of this, is the relationship of tags and traits to nodes
is that there may be multiple tags and traits for each node. Ultimately
this style of query forces SQLAlchemy to de-duplicate the result set
because the database returns the nodes portion of the result set for
every trait, tag, or other table field the query is joined with.
This looks like:
node1, tag1, trait1
node1, tag1, trait2
node1, tag1, trait3
node1, tag2, trait1
Et cetra, to create:
node1, [tag1, tag2], [trait1, trait 2, trait3]
Where joins are super in-efficent for Ironic, is where nodes are being
enumerated, as the above result set pattern is not just for one node, but
potentially thousands of nodes. In that case, we should use the
_get_node_query_with_all_for_list helper to return a more appropriate
query object which will be more efficient for the end user.
:returns: a query object. :returns: a query object.
""" """
# NOTE(TheJulia): This *likely* ought to be selectinload, however
# it is a very common hit pattern for Ironic to query just the node.
# In those sorts of locations, the performance issues are less noticable
# to end users. *IF/WHEN* we change it to be selectinload for nodes,
# the resulting DB load will see a queries per second increase, which
# we should be careful about.
# NOTE(TheJulia): Basic benchmark difference
# Test data creation: 67.202 seconds.
# 2.43 seconds to obtain all nodes from SQLAlchemy (10k nodes)
# 5.15 seconds to obtain all nodes *and* have node objects (10k nodes)
return (model_query(models.Node) return (model_query(models.Node)
.options(joinedload('tags')) .options(joinedload('tags'))
.options(joinedload('traits'))) .options(joinedload('traits')))
def _get_node_query_with_all_for_list():
"""Return a query object for the Node with queried extra fields.
This method returns a query object joining tags and traits in a pattern
where the result set is first built, and then the resulting associations
are queried separately and the objects are reconciled by SQLAlchemy to
build the composite objects based upon the associations.
This results in the following query pattern when the query is executed:
select $fields from nodes where x;
# SQLAlchemy creates a list of associated node IDs.
select $fields from tags where node_id in ('1', '3', '37268');
select $fields from traits where node_id in ('1', '3', '37268');
SQLAlchemy then returns a result set where the tags and traits are
composited together efficently as opposed to having to deduplicate
the result set. This shifts additional load to the database which
was previously a high overhead operation with-in the conductor...
which results in a slower conductor.
:returns: a query object.
"""
# NOTE(TheJulia): When comparing CI rubs *with* this being the default
# for all general list operations, at 10k nodes, this pattern appears
# to be on-par with a 5% variability between the two example benchmark
# tests. That being said, the test *does* not include tags or traits
# in it's test data set so client side deduplication is not measured.
# NOTE(TheJulia): Basic benchmark difference
# tests data creation: 67.117 seconds
# 2.32 seconds to obtain all nodes from SQLAlchemy (10k nodes)
# 4.99 seconds to obtain all nodes *and* have node objects (10k nodes)
# If this holds true, the required record deduplication with joinedload
# may be basically the same amount of overhead as requesting the tags
# and traits separately.
return (model_query(models.Node)
.options(selectinload('tags'))
.options(selectinload('traits')))
def _get_deploy_template_query_with_steps(): def _get_deploy_template_query_with_steps():
"""Return a query object for the DeployTemplate joined with steps. """Return a query object for the DeployTemplate joined with steps.
@ -437,7 +519,7 @@ class Connection(api.Connection):
def get_node_list(self, filters=None, limit=None, marker=None, def get_node_list(self, filters=None, limit=None, marker=None,
sort_key=None, sort_dir=None, fields=None): sort_key=None, sort_dir=None, fields=None):
if not fields: if not fields:
query = _get_node_query_with_all() query = _get_node_query_with_all_for_list()
query = self._add_nodes_filters(query, filters) query = self._add_nodes_filters(query, filters)
return _paginate_query(models.Node, limit, marker, return _paginate_query(models.Node, limit, marker,
sort_key, sort_dir, query) sort_key, sort_dir, query)
@ -536,9 +618,8 @@ class Connection(api.Connection):
@oslo_db_api.retry_on_deadlock @oslo_db_api.retry_on_deadlock
def reserve_node(self, tag, node_id): def reserve_node(self, tag, node_id):
with _session_for_write(): with _session_for_write():
query = _get_node_query_with_all() query = _get_node_query_with_all_for_single_node()
query = add_identity_filter(query, node_id) query = add_identity_filter(query, node_id)
# be optimistic and assume we usually create a reservation
count = query.filter_by(reservation=None).update( count = query.filter_by(reservation=None).update(
{'reservation': tag}, synchronize_session=False) {'reservation': tag}, synchronize_session=False)
try: try:
@ -614,7 +695,7 @@ class Connection(api.Connection):
return node return node
def get_node_by_id(self, node_id): def get_node_by_id(self, node_id):
query = _get_node_query_with_all() query = _get_node_query_with_all_for_single_node()
query = query.filter_by(id=node_id) query = query.filter_by(id=node_id)
try: try:
return query.one() return query.one()
@ -622,7 +703,7 @@ class Connection(api.Connection):
raise exception.NodeNotFound(node=node_id) raise exception.NodeNotFound(node=node_id)
def get_node_by_uuid(self, node_uuid): def get_node_by_uuid(self, node_uuid):
query = _get_node_query_with_all() query = _get_node_query_with_all_for_single_node()
query = query.filter_by(uuid=node_uuid) query = query.filter_by(uuid=node_uuid)
try: try:
return query.one() return query.one()
@ -630,7 +711,7 @@ class Connection(api.Connection):
raise exception.NodeNotFound(node=node_uuid) raise exception.NodeNotFound(node=node_uuid)
def get_node_by_name(self, node_name): def get_node_by_name(self, node_name):
query = _get_node_query_with_all() query = _get_node_query_with_all_for_single_node()
query = query.filter_by(name=node_name) query = query.filter_by(name=node_name)
try: try:
return query.one() return query.one()
@ -641,7 +722,7 @@ class Connection(api.Connection):
if not uuidutils.is_uuid_like(instance): if not uuidutils.is_uuid_like(instance):
raise exception.InvalidUUID(uuid=instance) raise exception.InvalidUUID(uuid=instance)
query = _get_node_query_with_all() query = _get_node_query_with_all_for_single_node()
query = query.filter_by(instance_uuid=instance) query = query.filter_by(instance_uuid=instance)
try: try:
@ -758,7 +839,7 @@ class Connection(api.Connection):
ref.update(values) ref.update(values)
# Return the updated node model joined with all relevant fields. # Return the updated node model joined with all relevant fields.
query = _get_node_query_with_all() query = _get_node_query_with_all_for_single_node()
query = add_identity_filter(query, node_id) query = add_identity_filter(query, node_id)
return query.one() return query.one()
@ -1294,7 +1375,7 @@ class Connection(api.Connection):
return model_query(q.exists()).scalar() return model_query(q.exists()).scalar()
def get_node_by_port_addresses(self, addresses): def get_node_by_port_addresses(self, addresses):
q = _get_node_query_with_all() q = _get_node_query_with_all_for_single_node()
q = q.distinct().join(models.Port) q = q.distinct().join(models.Port)
q = q.filter(models.Port.address.in_(addresses)) q = q.filter(models.Port.address.in_(addresses))

View File

@ -0,0 +1,16 @@
---
upgrade:
- |
The query pattern for the database when lists of nodes are retrieved has
been changed to a more efficient pattern at scale, where a list of nodes
is generated, and then additional queries are executed to composite this
data together. This is from a model where the database client in the
conductor was having to deduplicate the resulting data set which is
overall less efficent.
other:
- |
The default database query pattern has been changed which will result
in additional database queries when compositing lists of ``nodes``
by separately querying ``traits`` and ``tags``. Previously this was a
joined query which requires deduplication of the result set before
building composite objects.