From a703f42c245c6ba8352994951bcb36b5f1258ca1 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Sun, 26 May 2019 18:50:43 +0200 Subject: [PATCH] Annotate logs around build states It's useful to have logs around build states annotated with the event ids that caused it. Change-Id: Ib2173210516823cfe16e34710e8d01319aba8cce --- zuul/executor/client.py | 22 +++++++++++---------- zuul/executor/server.py | 42 ++++++++++++++++++++++++---------------- zuul/manager/__init__.py | 6 ++++-- zuul/scheduler.py | 16 +++++++-------- 4 files changed, 48 insertions(+), 38 deletions(-) diff --git a/zuul/executor/client.py b/zuul/executor/client.py index 079214fb98..d4e9fb31af 100644 --- a/zuul/executor/client.py +++ b/zuul/executor/client.py @@ -503,12 +503,12 @@ class ExecutorClient(object): self.onBuildCompleted(job, 'LOST') def cancelJobInQueue(self, build): + log = get_annotated_logger(self.log, build.zuul_event_id) job = build.__gearman_job req = gear.CancelJobAdminRequest(job.handle) job.connection.sendAdminRequest(req, timeout=300) - self.log.debug("Response to cancel build %s request: %s" % - (build, req.response.strip())) + log.debug("Response to cancel build request: %s", req.response.strip()) if req.response.startswith(b"OK"): try: del self.builds[job.unique] @@ -522,29 +522,31 @@ class ExecutorClient(object): return False def cancelRunningBuild(self, build): + log = get_annotated_logger(self.log, build.zuul_event_id) if not build.__gearman_worker: - self.log.error("Build %s has no manager while canceling" % - (build,)) + log.error("Build %s has no manager while canceling", build) stop_uuid = str(uuid4().hex) - data = dict(uuid=build.__gearman_job.unique) + data = dict(uuid=build.__gearman_job.unique, + zuul_event_id=build.zuul_event_id) stop_job = gear.TextJob("executor:stop:%s" % build.__gearman_worker, json_dumps(data), unique=stop_uuid) self.meta_jobs[stop_uuid] = stop_job - self.log.debug("Submitting stop job: %s", stop_job) + log.debug("Submitting stop job: %s", stop_job) self.gearman.submitJob(stop_job, precedence=gear.PRECEDENCE_HIGH, timeout=300) return True def resumeBuild(self, build): + log = get_annotated_logger(self.log, build.zuul_event_id) if not build.__gearman_worker: - self.log.error("Build %s has no manager while resuming" % - (build,)) + log.error("Build %s has no manager while resuming", build) resume_uuid = str(uuid4().hex) - data = dict(uuid=build.__gearman_job.unique) + data = dict(uuid=build.__gearman_job.unique, + zuul_event_id=build.zuul_event_id) stop_job = gear.TextJob("executor:resume:%s" % build.__gearman_worker, json_dumps(data), unique=resume_uuid) self.meta_jobs[resume_uuid] = stop_job - self.log.debug("Submitting resume job: %s", stop_job) + log.debug("Submitting resume job: %s", stop_job) self.gearman.submitJob(stop_job, precedence=gear.PRECEDENCE_HIGH, timeout=300) diff --git a/zuul/executor/server.py b/zuul/executor/server.py index bc5f1ddf05..29f19eb7b3 100644 --- a/zuul/executor/server.py +++ b/zuul/executor/server.py @@ -2618,21 +2618,25 @@ class ExecutorServer(object): if not self._running: job.sendWorkFail() return + + args = json.loads(job.arguments) + zuul_event_id = args.get('zuul_event_id') + log = get_annotated_logger(self.log, zuul_event_id) + function_name = 'executor:execute' if self.zone: function_name += ':%s' % self.zone if job.name == (function_name): - self.log.debug("Got %s job: %s" % - (function_name, job.unique)) + log.debug("Got %s job: %s", function_name, job.unique) self.executeJob(job) elif job.name.startswith('executor:resume'): - self.log.debug("Got resume job: %s" % job.unique) + log.debug("Got resume job: %s", job.unique) self.resumeJob(job) elif job.name.startswith('executor:stop'): - self.log.debug("Got stop job: %s" % job.unique) + log.debug("Got stop job: %s", job.unique) self.stopJob(job) else: - self.log.error("Unable to handle job %s" % job.name) + log.error("Unable to handle job %s", job.name) job.sendWorkFail() def executeJob(self, job): @@ -2698,42 +2702,46 @@ class ExecutorServer(object): def resumeJob(self, job): try: args = json.loads(job.arguments) - self.log.debug("Resume job with arguments: %s" % (args,)) + zuul_event_id = args.get('zuul_event_id') + log = get_annotated_logger(self.log, zuul_event_id) + log.debug("Resume job with arguments: %s", args) unique = args['uuid'] - self.resumeJobByUnique(unique) + self.resumeJobByUnique(unique, zuul_event_id=zuul_event_id) finally: job.sendWorkComplete() def stopJob(self, job): try: args = json.loads(job.arguments) - self.log.debug("Stop job with arguments: %s" % (args,)) + zuul_event_id = args.get('zuul_event_id') + log = get_annotated_logger(self.log, zuul_event_id) + log.debug("Stop job with arguments: %s", args) unique = args['uuid'] - self.stopJobByUnique(unique) + self.stopJobByUnique(unique, zuul_event_id=zuul_event_id) finally: job.sendWorkComplete() - def resumeJobByUnique(self, unique): + def resumeJobByUnique(self, unique, zuul_event_id=None): + log = get_annotated_logger(self.log, zuul_event_id) job_worker = self.job_workers.get(unique) if not job_worker: - self.log.debug("Unable to find worker for job %s" % (unique,)) + log.debug("Unable to find worker for job %s", unique) return try: job_worker.resume() except Exception: - self.log.exception("Exception sending resume command " - "to worker:") + log.exception("Exception sending resume command to worker:") - def stopJobByUnique(self, unique, reason=None): + def stopJobByUnique(self, unique, reason=None, zuul_event_id=None): + log = get_annotated_logger(self.log, zuul_event_id) job_worker = self.job_workers.get(unique) if not job_worker: - self.log.debug("Unable to find worker for job %s" % (unique,)) + log.debug("Unable to find worker for job %s", unique) return try: job_worker.stop(reason) except Exception: - self.log.exception("Exception sending stop command " - "to worker:") + log.exception("Exception sending stop command to worker:") def cat(self, job): args = json.loads(job.arguments) diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py index 6ca02a5295..7d7ec38192 100644 --- a/zuul/manager/__init__.py +++ b/zuul/manager/__init__.py @@ -825,12 +825,14 @@ class PipelineManager(object): return changed def onBuildStarted(self, build): - self.log.debug("Build %s started" % build) + log = get_annotated_logger(self.log, build.zuul_event_id) + log.debug("Build %s started", build) return True def onBuildPaused(self, build): + log = get_annotated_logger(self.log, build.zuul_event_id) item = build.build_set.item - self.log.debug("Build %s of %s paused", build, item.change) + log.debug("Build %s of %s paused", build, item.change) item.setResult(build) # We need to resume builds because we could either have no children diff --git a/zuul/scheduler.py b/zuul/scheduler.py index a2c6dad6af..cd32c1eaa6 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -1163,32 +1163,30 @@ class Scheduler(threading.Thread): def _doBuildStartedEvent(self, event): build = event.build + log = get_annotated_logger(self.log, build.zuul_event_id) if build.build_set is not build.build_set.item.current_build_set: - self.log.warning("Build %s is not in the current build set" % - (build,)) + log.warning("Build %s is not in the current build set", build) return pipeline = build.build_set.item.pipeline if not pipeline: - self.log.warning("Build %s is not associated with a pipeline" % - (build,)) + log.warning("Build %s is not associated with a pipeline", build) return try: build.estimated_time = float(self.time_database.getEstimatedTime( build)) except Exception: - self.log.exception("Exception estimating build time:") + log.exception("Exception estimating build time:") pipeline.manager.onBuildStarted(event.build) def _doBuildPausedEvent(self, event): build = event.build + log = get_annotated_logger(self.log, build.zuul_event_id) if build.build_set is not build.build_set.item.current_build_set: - self.log.warning("Build %s is not in the current build set" % - (build,)) + log.warning("Build %s is not in the current build set", build) return pipeline = build.build_set.item.pipeline if not pipeline: - self.log.warning("Build %s is not associated with a pipeline" % - (build,)) + log.warning("Build %s is not associated with a pipeline", build) return pipeline.manager.onBuildPaused(event.build)