From 313dcebb9f8c511d26d6c2eadd416be16811f1c9 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Fri, 13 Mar 2020 13:43:56 +0100 Subject: [PATCH] Defer setting build pause to event queue We currently set the paused flag on the build object directly when we receive the notification from the executor and afterwards enqueue a BuildPaused event to let the pipeline process the pause. As soon a the build is marked as paused the pipeline processor will request resources for the child jobs. However during processing of the BuildPaused event it can be that the paused job requested to skip some jobs. In this case we have a race between requesting the resources and setting the child job results to SKIPPED. The result is that if the resources are requested earlier they will be leaked because they will never be released by the scheduler as no builds will be enqueued for them. This can be solved by deferring to set the paused flag to the event queue which eliminates the race. This is similar to the deferred result setting in I0631b97e6ae2bf7d883c6c6a189e9d41d99ece5d. Change-Id: Ie0fa8a2613fca65884d54b59273769f4984552cd --- zuul/executor/client.py | 8 +++----- zuul/scheduler.py | 5 +++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/zuul/executor/client.py b/zuul/executor/client.py index d80b90a3be..8fbfe0138f 100644 --- a/zuul/executor/client.py +++ b/zuul/executor/client.py @@ -487,11 +487,9 @@ class ExecutorClient(object): build.worker.updateFromData(data) build.__gearman_worker = build.worker.name - if 'paused' in data and build.paused != data['paused']: - build.paused = data['paused'] - if build.paused: - result_data = data.get('data', {}) - self.sched.onBuildPaused(build, result_data) + if 'paused' in data: + result_data = data.get('data', {}) + self.sched.onBuildPaused(build, result_data) if not started: self.log.info("Build %s started" % job) diff --git a/zuul/scheduler.py b/zuul/scheduler.py index 8e0738236b..187c29d745 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -1429,6 +1429,11 @@ class Scheduler(threading.Thread): def _doBuildPausedEvent(self, event): build = event.build + + # Setting paused is deferred to event processing stage to avoid a race + # with child job skipping. + build.paused = True + log = get_annotated_logger(self.log, build.zuul_event_id) if build.build_set is not build.build_set.item.current_build_set: log.warning("Build %s is not in the current build set", build)