From 130708b43c5b544cf72aafa57e0059c6db289638 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Tue, 11 Feb 2020 17:21:40 +0100 Subject: [PATCH] Support pausing merge jobs Currently an executor still executes merge jobs even when it's paused. This is surprising to the user and an operational problem when having a misbehaving executor for some reason. Further the merger now also can be paused explicitly. Change-Id: I7ebf2df9d6648789e6bb2d797edd5b67a0925cfc --- doc/source/discussion/components.rst | 9 +++++++-- .../notes/merger-pause-ac57e3e7421545b4.yaml | 5 +++++ tests/unit/test_scheduler.py | 3 +++ zuul/executor/server.py | 10 ++++++++-- zuul/lib/gearworker.py | 11 +++++++++-- zuul/merger/server.py | 15 +++++++++++++-- 6 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 releasenotes/notes/merger-pause-ac57e3e7421545b4.yaml diff --git a/doc/source/discussion/components.rst b/doc/source/discussion/components.rst index c2ed7e4c49..5d93bd9327 100644 --- a/doc/source/discussion/components.rst +++ b/doc/source/discussion/components.rst @@ -479,8 +479,13 @@ The following section of ``zuul.conf`` is used by the merger: Operation ~~~~~~~~~ -To start the merger, run ``zuul-merger``. To stop it, kill the -PID which was saved in the pidfile specified in the configuration. +To start the merger, run ``zuul-merger``. + +In order to stop the merger and under normal circumstances it is +best to pause and wait for all currently running tasks to finish +before stopping it. To do so run ``zuul-merger pause``. + +To stop the merger immediately, run ``zuul-merger stop``. .. _executor: diff --git a/releasenotes/notes/merger-pause-ac57e3e7421545b4.yaml b/releasenotes/notes/merger-pause-ac57e3e7421545b4.yaml new file mode 100644 index 0000000000..646990177b --- /dev/null +++ b/releasenotes/notes/merger-pause-ac57e3e7421545b4.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + The zuul-executor now also pauses all merger related tasks when it's paused. + Further the zuul-merger can also be paused by running ``zuul-merger pause``. diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py index bb4c949e58..749099ac03 100644 --- a/tests/unit/test_scheduler.py +++ b/tests/unit/test_scheduler.py @@ -7364,6 +7364,9 @@ class TestSemaphore(ZuulTestCase): # Pause the executor so it doesn't take any jobs. self.executor_server.pause() + # Start merger as the paused executor won't take merge jobs. + self._startMerger() + # Pause nodepool so we can wait on the node requests and fulfill them # in a controlled manner. self.fake_nodepool.paused = True diff --git a/zuul/executor/server.py b/zuul/executor/server.py index cf8c74374e..69a2dc1a4d 100644 --- a/zuul/executor/server.py +++ b/zuul/executor/server.py @@ -2518,7 +2518,7 @@ class ExecutorServer(object): } self.merger_gearworker = ZuulGearWorker( 'Zuul Executor Merger', - 'zuul.ExecutorServer', + 'zuul.ExecutorServer.MergeWorker', 'merger', self.config, self.merger_jobs, @@ -2539,7 +2539,7 @@ class ExecutorServer(object): self.executor_gearworker = ZuulGearWorker( 'Zuul Executor Server', - 'zuul.ExecutorServer', + 'zuul.ExecutorServer.ExecuteWorker', 'executor', self.config, self.executor_jobs, @@ -2678,10 +2678,16 @@ class ExecutorServer(object): self.executor_gearworker.join() def pause(self): + self.log.debug('Pausing') self.pause_sensor.pause = True + if self.merger_gearworker is not None: + self.merger_gearworker.unregister() def unpause(self): + self.log.debug('Resuming') self.pause_sensor.pause = False + if self.merger_gearworker is not None: + self.merger_gearworker.register() def graceful(self): # TODOv3: implement diff --git a/zuul/lib/gearworker.py b/zuul/lib/gearworker.py index da7fa82a0e..1bffbd4dea 100644 --- a/zuul/lib/gearworker.py +++ b/zuul/lib/gearworker.py @@ -53,11 +53,18 @@ class ZuulGearWorker: tcp_keepintvl=30, tcp_keepcnt=5) self.log.debug('Waiting for server') self.gearman.waitForServer() + self.register() + self.thread.start() - self.log.debug('Registering') + def register(self): + self.log.debug('Registering jobs') for job in self.jobs: self.gearman.registerFunction(job) - self.thread.start() + + def unregister(self): + self.log.debug('Unregistering jobs') + for job in self.jobs: + self.gearman.unRegisterFunction(job) def stop(self): self._running = False diff --git a/zuul/merger/server.py b/zuul/merger/server.py index 04674f9f3a..fd05cda711 100644 --- a/zuul/merger/server.py +++ b/zuul/merger/server.py @@ -22,7 +22,7 @@ from zuul.lib.gearworker import ZuulGearWorker from zuul.merger import merger -COMMANDS = ['stop'] +COMMANDS = ['stop', 'pause', 'unpause'] class MergeServer(object): @@ -44,7 +44,10 @@ class MergeServer(object): merge_root, connections, merge_email, merge_name, speed_limit, speed_time, git_timeout=git_timeout) self.command_map = dict( - stop=self.stop) + stop=self.stop, + pause=self.pause, + unpause=self.unpause, + ) command_socket = get_default( self.config, 'merger', 'command_socket', '/var/lib/zuul/merger.socket') @@ -85,6 +88,14 @@ class MergeServer(object): def join(self): self.gearworker.join() + def pause(self): + self.log.debug('Pausing') + self.gearworker.unregister() + + def unpause(self): + self.log.debug('Resuming') + self.gearworker.register() + def runCommand(self): while self._command_running: try: