Add statement timeouts to some web sql queries

The SQL queries are designed to be highly optimized and should return
in milliseconds even with millions of rows.  However, sometimes
query planners are misled by certain characteristics and can end
up performing suboptimally.

To protect the web server in case that happens, set a statement or
query timeout for the queries which list builds or buildsets.  This
will instruct mysql or postgresql to limit execution of the buildset
or build listing queries to 30 seconds -- but only if these queries
originate in zuul-web.  Other users (such as the admin tools) may
still run these queries without an explicit time limit (though the
server may still have one).

Unfortunately (or perhaps fortunately) the RDBMSs can occasionally
satisfy the queries we use in testing in less than 1ms, making a
functional test of this feature impractical (we are unable to set
the timeout to 0ms).

Change-Id: If2f01b33dc679ab7cf952a4fbf095a1f3b6e4faf
This commit is contained in:
James E. Blair 2023-03-13 14:48:41 -07:00
parent 0d35927e2c
commit 84c0420792
2 changed files with 51 additions and 5 deletions

View File

@ -13,6 +13,7 @@
# under the License.
import logging
import re
import time
from urllib.parse import quote_plus
@ -27,12 +28,29 @@ import sqlalchemy.pool
from zuul.connection import BaseConnection
from zuul.zk.locks import CONNECTION_LOCK_ROOT, locked, SessionAwareLock
BUILDSET_TABLE = 'zuul_buildset'
BUILD_TABLE = 'zuul_build'
BUILD_EVENTS_TABLE = 'zuul_build_event'
ARTIFACT_TABLE = 'zuul_artifact'
PROVIDES_TABLE = 'zuul_provides'
STATEMENT_TIMEOUT_RE = re.compile(r'/\* statement_timeout=(\d+) \*/')
# In Postgres we can set a per-transaction (which for us is
# effectively per-query) execution timeout by executing "SET LOCAL
# statement_timeout" before our query. There isn't a great way to do
# this using the SQLAlchemy query API, so instead, we add a comment as
# a hint, and here we parse that comment and execute the "SET". The
# comment remains in the query sent to the server, but that's okay --
# it may even help an operator in debugging.
@sa.event.listens_for(sa.Engine, "before_cursor_execute")
def _set_timeout(conn, cursor, stmt, params, context, executemany):
match = STATEMENT_TIMEOUT_RE.search(stmt)
if match:
cursor.execute("SET LOCAL statement_timeout=%s" % match.groups())
class DatabaseSession(object):
@ -76,7 +94,7 @@ class DatabaseSession(object):
result=None, provides=None, final=None, held=None,
complete=None, sort_by_buildset=False, limit=50,
offset=0, idx_min=None, idx_max=None,
exclude_result=None):
exclude_result=None, query_timeout=None):
build_table = self.connection.zuul_build_table
buildset_table = self.connection.zuul_buildset_table
@ -104,6 +122,17 @@ class DatabaseSession(object):
if not (project or change or uuid):
q = q.with_hint(build_table, 'USE INDEX (PRIMARY)', 'mysql')
if query_timeout:
# For MySQL, we can add a query hint directly.
q = q.prefix_with(
f'/*+ MAX_EXECUTION_TIME({query_timeout}) */',
dialect='mysql')
# For Postgres, we add a comment that we parse in our
# event handler.
q = q.with_statement_hint(
f'/* statement_timeout={query_timeout} */',
dialect_name='postgresql')
q = self.listFilter(q, buildset_table.c.tenant, tenant)
q = self.listFilter(q, buildset_table.c.project, project)
q = self.listFilter(q, buildset_table.c.pipeline, pipeline)
@ -185,7 +214,8 @@ class DatabaseSession(object):
change=None, branch=None, patchset=None, ref=None,
newrev=None, uuid=None, result=None, complete=None,
updated_max=None,
limit=50, offset=0, idx_min=None, idx_max=None):
limit=50, offset=0, idx_min=None, idx_max=None,
query_timeout=None):
buildset_table = self.connection.zuul_buildset_table
@ -194,6 +224,17 @@ class DatabaseSession(object):
if not (project or change or uuid):
q = q.with_hint(buildset_table, 'USE INDEX (PRIMARY)', 'mysql')
if query_timeout:
# For MySQL, we can add a query hint directly.
q = q.prefix_with(
f'/*+ MAX_EXECUTION_TIME({query_timeout}) */',
dialect='mysql')
# For Postgres, we add a comment that we parse in our
# event handler.
q = q.with_statement_hint(
f'/* statement_timeout={query_timeout} */',
dialect_name='postgresql')
q = self.listFilter(q, buildset_table.c.tenant, tenant)
q = self.listFilter(q, buildset_table.c.project, project)
q = self.listFilter(q, buildset_table.c.pipeline, pipeline)

View File

@ -463,6 +463,8 @@ class ZuulWebAPI(object):
self.cache_expiry = 1
self.static_cache_expiry = zuulweb.static_cache_expiry
# SQL build query timeout, in milliseconds:
self.query_timeout = 30000
@property
def log(self):
@ -1454,7 +1456,8 @@ class ZuulWebAPI(object):
newrev=newrev, uuid=uuid, job_name=job_name, voting=voting,
nodeset=nodeset, result=result, final=final, held=held,
complete=complete, limit=limit, offset=skip, idx_min=_idx_min,
idx_max=_idx_max, exclude_result=exclude_result)
idx_max=_idx_max, exclude_result=exclude_result,
query_timeout=self.query_timeout)
return [self.buildToDict(b, b.buildset) for b in builds]
@ -1510,7 +1513,8 @@ class ZuulWebAPI(object):
buildsets = connection.getBuildsets(
tenant=tenant_name, project=project, pipeline=pipeline,
branch=branch, complete=True, limit=1)
branch=branch, complete=True, limit=1,
query_timeout=self.query_timeout)
if not buildsets:
raise cherrypy.HTTPError(404, 'No buildset found')
@ -1551,7 +1555,8 @@ class ZuulWebAPI(object):
tenant=tenant_name, project=project, pipeline=pipeline,
change=change, branch=branch, patchset=patchset, ref=ref,
newrev=newrev, uuid=uuid, result=result, complete=complete,
limit=limit, offset=skip, idx_min=_idx_min, idx_max=_idx_max)
limit=limit, offset=skip, idx_min=_idx_min, idx_max=_idx_max,
query_timeout=self.query_timeout)
return [self.buildsetToDict(b) for b in buildsets]