From 4eb3b655c1c26790b4a5618cc53d5450737a2663 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Tue, 19 Nov 2024 15:20:25 -0800 Subject: [PATCH] Batch fake build database additions When skipping a job, we create a fake build entry in ZK and also a database record. When skipping a large number of jobs, we create many such builds and after each one, we rewrite the entire QueueItem object in ZooKeeper. To avoid excessive unecessary database queries, create all the fake build objects and then add them to the database in one transaction, with only one buildset query. Also, remove the "missing_buildset_okay" parameter and replace it with an exception handler in order to localize the logic around when it's okay for a buildset to be missing. Change-Id: Ibfb553a7d4d408c2c870fb9e9a653ef35db21fd8 --- zuul/driver/sql/sqlreporter.py | 118 +++++++++++++++++++-------------- zuul/exceptions.py | 4 ++ zuul/model.py | 27 ++++---- zuul/scheduler.py | 29 ++++---- 4 files changed, 101 insertions(+), 77 deletions(-) diff --git a/zuul/driver/sql/sqlreporter.py b/zuul/driver/sql/sqlreporter.py index 4b70e4eb64..3ce02c7ac5 100644 --- a/zuul/driver/sql/sqlreporter.py +++ b/zuul/driver/sql/sqlreporter.py @@ -21,6 +21,7 @@ import voluptuous as v import sqlalchemy.exc +from zuul.exceptions import MissingBuildsetError from zuul.lib.result_data import get_artifacts_from_result_data from zuul.reporter import BaseReporter @@ -147,52 +148,27 @@ class SQLReporter(BaseReporter): else: self.log.exception("Unable to create build") - def reportBuildEnd(self, build, tenant, final, - missing_buildset_okay=False): + def reportBuildEnd(self, build, tenant, final): + return self.reportBuildEnds([build], tenant, final) + + def reportBuildEnds(self, builds, tenant, final): for retry_count in range(self.retry_count): try: with self.connection.getSession() as db: - db_build = db.getBuild(tenant=tenant, uuid=build.uuid) - if not db_build: - db_build = self._createBuild(db, build) - if not db_build: - if missing_buildset_okay: - return - raise Exception("Unable to create build in DB") - - end_time = build.end_time or time.time() - end = datetime.datetime.fromtimestamp( - end_time, tz=datetime.timezone.utc) - - db_build.result = build.result - db_build.end_time = end - db_build.log_url = build.log_url - db_build.error_detail = build.error_detail - db_build.final = final - db_build.held = build.held - - for provides in build.job.provides: - db_build.createProvides(name=provides) - - for artifact in get_artifacts_from_result_data( - build.result_data, - logger=self.log): - if 'metadata' in artifact: - artifact['metadata'] = json.dumps( - artifact['metadata']) - db_build.createArtifact(**artifact) - - for event in build.events: - # Reformat the event_time so it's compatible to SQL. - # Don't update the event object in place, but only - # the generated dict representation to not alter the - # datastructure for other reporters. - ev = event.toDict() - ev["event_time"] = datetime.datetime.fromtimestamp( - event.event_time, tz=datetime.timezone.utc) - db_build.createBuildEvent(**ev) - - return db_build + buildset = builds[0].build_set + try: + db_buildset = self._getBuildset(db, builds[0]) + except MissingBuildsetError: + # let _createBuild handle if necessary + pass + for build in builds: + if build.build_set is not buildset: + raise Exception("All batch reported builds " + "must be for the same buildset") + self._reportBuildEnd( + db, db_buildset, build, tenant, final) + # Exit retry loop + return except sqlalchemy.exc.DBAPIError: if retry_count < self.retry_count - 1: self.log.error("Unable to update build, will retry") @@ -200,21 +176,65 @@ class SQLReporter(BaseReporter): else: self.log.exception("Unable to update build") - def _createBuild(self, db, build): - start_time = build.start_time or time.time() - start = datetime.datetime.fromtimestamp(start_time, - tz=datetime.timezone.utc) + def _reportBuildEnd(self, db, db_buildset, build, tenant, final): + db_build = db.getBuild(tenant=tenant, uuid=build.uuid) + if not db_build: + db_build = self._createBuild(db, build, db_buildset=db_buildset) + end_time = build.end_time or time.time() + end = datetime.datetime.fromtimestamp( + end_time, tz=datetime.timezone.utc) + + db_build.result = build.result + db_build.end_time = end + db_build.log_url = build.log_url + db_build.error_detail = build.error_detail + db_build.final = final + db_build.held = build.held + + for provides in build.job.provides: + db_build.createProvides(name=provides) + + for artifact in get_artifacts_from_result_data( + build.result_data, + logger=self.log): + if 'metadata' in artifact: + artifact['metadata'] = json.dumps( + artifact['metadata']) + db_build.createArtifact(**artifact) + + for event in build.events: + # Reformat the event_time so it's compatible to SQL. + # Don't update the event object in place, but only + # the generated dict representation to not alter the + # datastructure for other reporters. + ev = event.toDict() + ev["event_time"] = datetime.datetime.fromtimestamp( + event.event_time, tz=datetime.timezone.utc) + db_build.createBuildEvent(**ev) + return db_build + + def _getBuildset(self, db, build): buildset = build.build_set if not buildset: - return + return None db_buildset = db.getBuildset( tenant=buildset.item.pipeline.tenant.name, uuid=buildset.uuid) if not db_buildset: self.log.warning("Creating missing buildset %s", buildset.uuid) db_buildset = self._createBuildset(db, buildset) + return db_buildset + + def _createBuild(self, db, build, db_buildset=None): + start_time = build.start_time or time.time() + start = datetime.datetime.fromtimestamp(start_time, + tz=datetime.timezone.utc) + if db_buildset is None: + db_buildset = self._getBuildset(db, build) + if db_buildset is None: + raise MissingBuildsetError() if db_buildset.first_build_start_time is None: db_buildset.first_build_start_time = start - item = buildset.item + item = build.build_set.item change = item.getChangeForJob(build.job) ref = db.getOrCreateRef( project=change.project.name, diff --git a/zuul/exceptions.py b/zuul/exceptions.py index 81abf65b5a..cd8ea04e58 100644 --- a/zuul/exceptions.py +++ b/zuul/exceptions.py @@ -42,6 +42,10 @@ class MergeFailure(Exception): pass +class MissingBuildsetError(Exception): + pass + + class ConfigurationError(Exception): pass diff --git a/zuul/model.py b/zuul/model.py index 131eec242d..8421ee625a 100644 --- a/zuul/model.py +++ b/zuul/model.py @@ -6678,11 +6678,10 @@ class QueueItem(zkobject.ZKObject): result='SKIPPED')) if fake_builds: self.addBuilds(fake_builds) - for fakebuild in fake_builds: - self.pipeline.manager.sched.reportBuildEnd( - fakebuild, - tenant=self.pipeline.tenant.name, - final=True) + self.pipeline.manager.sched.reportBuildEnds( + fake_builds, + tenant=self.pipeline.tenant.name, + final=True) def setNodeRequestFailure(self, job, error): fakebuild = Build.new( @@ -6753,11 +6752,10 @@ class QueueItem(zkobject.ZKObject): error_detail=msg, result='SKIPPED')) if fake_builds: self.addBuilds(fake_builds) - for fakebuild in fake_builds: - self.pipeline.manager.sched.reportBuildEnd( - fakebuild, - tenant=self.pipeline.tenant.name, - final=True) + self.pipeline.manager.sched.reportBuildEnds( + fake_builds, + tenant=self.pipeline.tenant.name, + final=True) def _setMissingJobsSkipped(self, msg): fake_builds = [] @@ -6772,11 +6770,10 @@ class QueueItem(zkobject.ZKObject): error_detail=msg, result='SKIPPED')) if fake_builds: self.addBuilds(fake_builds) - for fakebuild in fake_builds: - self.pipeline.manager.sched.reportBuildEnd( - fakebuild, - tenant=self.pipeline.tenant.name, - final=True) + self.pipeline.manager.sched.reportBuildEnds( + fake_builds, + tenant=self.pipeline.tenant.name, + final=True) def formatUrlPattern(self, url_pattern, job=None, build=None): url = None diff --git a/zuul/scheduler.py b/zuul/scheduler.py index 73e0c6160e..e848addcfa 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -908,6 +908,11 @@ class Scheduler(threading.Thread): event, needs_result=False ) + def reportBuildEnds(self, builds, tenant, final): + self.sql.reportBuildEnds(builds, tenant=tenant, final=final) + for build in builds: + self._reportBuildStats(build) + def reportBuildEnd(self, build, tenant, final): self.sql.reportBuildEnd(build, tenant=tenant, final=final) self._reportBuildStats(build) @@ -2949,22 +2954,20 @@ class Scheduler(threading.Thread): ) self._cleanupCompletedBuild(build) - # If we have not reported start for this build, we don't - # need to report end. If we haven't reported start, we - # won't have a build record in the DB, so we would - # normally create one and attach it to a buildset, but we - # don't know the buildset, and we don't have enough - # information to construct a buildset record from scratch. - # Indicate that is acceptable in this situation, so we - # don't throw an exception. In other words: if we don't - # have a build record in the DB here, the - # "missing_buildset_okay" flag will cause reportBuildEnd - # to do nothing. try: self.sql.reportBuildEnd( build, tenant=pipeline.tenant.name, - final=True, - missing_buildset_okay=True) + final=True) + except exceptions.MissingBuildsetError: + # If we have not reported start for this build, we + # don't need to report end. If we haven't reported + # start, we won't have a build record in the DB, so we + # would normally create one and attach it to a + # buildset, but we don't know the buildset, and we + # don't have enough information to construct a + # buildset record from scratch. That's okay in this + # situation. + pass except Exception: log.exception("Error reporting build completion to DB:")