From a8c0dbd5d49372ed6b9e849ac71b9b2742b9436d Mon Sep 17 00:00:00 2001 From: Thomas Bechtold Date: Thu, 10 Dec 2015 07:16:54 +0100 Subject: [PATCH 01/16] Fix documentation example The verified parameter needs to be set instead of "my_gerrit: -1" . Change-Id: Ife9111266c79b3d9c0eee3879f49e36940cfbf46 --- doc/source/zuul.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/zuul.rst b/doc/source/zuul.rst index 74ce3607e7..ad8ec2e103 100644 --- a/doc/source/zuul.rst +++ b/doc/source/zuul.rst @@ -568,8 +568,8 @@ file. The first is called a *check* pipeline:: my_gerrit: verified: 1 failure: - gerrit: - my_gerrit: -1 + my_gerrit: + verified: -1 This will trigger jobs each time a new patchset (or change) is uploaded to Gerrit, and report +/-1 values to Gerrit in the From 802c03ccc09ef9766e30920ab1a9983adc18d653 Mon Sep 17 00:00:00 2001 From: JP Sullivan Date: Thu, 10 Dec 2015 16:54:16 +0000 Subject: [PATCH 02/16] Add vim swap files to .gitignore Change-Id: I8db056a61e76b58ba0ca98c7b411f656c76352b6 --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9703f16c92..b59cb77233 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.sw? *.egg *.egg-info *.pyc From ccef05dede36d3645939199f79f8e7e198f03abe Mon Sep 17 00:00:00 2001 From: Evgeny Antyshev Date: Thu, 10 Dec 2015 15:05:33 +0000 Subject: [PATCH 03/16] Connection names for legacy configs It has been set like that: connections = {'_legacy_gerrit': Connection(name='gerrit')} This leads to that in merger.py it creates ".ssh_wrapper_gerrit" file, basing it on key from connections dict, but tries to use ".ssh_wrapper__legacy_gerrit", basing on connection name. I propose to set it to "gerrit" everywhere (and the same for smtp). Change-Id: I2c2d8bee7bcf98c5f12809f77b34e206d07b8438 --- zuul/lib/connections.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zuul/lib/connections.py b/zuul/lib/connections.py index cb26ba509c..92ddb0fd25 100644 --- a/zuul/lib/connections.py +++ b/zuul/lib/connections.py @@ -56,11 +56,11 @@ def configure_connections(config): if 'gerrit' in config.sections(): connections['gerrit'] = \ zuul.connection.gerrit.GerritConnection( - '_legacy_gerrit', dict(config.items('gerrit'))) + 'gerrit', dict(config.items('gerrit'))) if 'smtp' in config.sections(): connections['smtp'] = \ zuul.connection.smtp.SMTPConnection( - '_legacy_smtp', dict(config.items('smtp'))) + 'smtp', dict(config.items('smtp'))) return connections From 932f1f81fed034719be3c788b4d4615e9bc325dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Nov=C3=BD?= Date: Fri, 11 Dec 2015 21:06:13 +0100 Subject: [PATCH 04/16] Deprecated tox -downloadcache option removed Caching is enabled by default from pip version 6.0 More info: https://testrun.org/tox/latest/config.html#confval-downloadcache=path https://pip.pypa.io/en/stable/reference/pip_install/#caching Change-Id: I40e4e60383ce6824aa0d73db887e9d38bbaf3e58 --- tox.ini | 3 --- 1 file changed, 3 deletions(-) diff --git a/tox.ini b/tox.ini index 0f8254a813..a6ad6a04a8 100644 --- a/tox.ini +++ b/tox.ini @@ -16,9 +16,6 @@ deps = -r{toxinidir}/requirements.txt commands = python setup.py testr --slowest --testr-args='{posargs}' -[tox:jenkins] -downloadcache = ~/cache/pip - [testenv:pep8] commands = flake8 {posargs} From 13cb40c3a076ba78edc5e35729c6bd7ef38ff9bb Mon Sep 17 00:00:00 2001 From: Richard Hedlind Date: Fri, 18 Dec 2015 13:45:58 -0700 Subject: [PATCH 05/16] Add exception handler to updateBuildDescriptions Adding a exception handler to updateBuildDescriptions to handle exceptions from launcher (Gearman). This addresses the corner case seen in: https://storyboard.openstack.org/#!/story/2000445 In the above mentioned corner case, Gearman throws a communication error exception. Since updateBuildDescriptions is called after the build status has been reported to Gerrit, we want to make sure that we return back to caller so that the build is properly marked as reported in reportItem to avoid an infinite loop. Change-Id: I166d4a2e1d45e97a9ed0b6addb7270e1bf92d6f7 --- zuul/scheduler.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/zuul/scheduler.py b/zuul/scheduler.py index f8321d14e2..151aae14e6 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -1535,13 +1535,23 @@ class BasePipelineManager(object): def updateBuildDescriptions(self, build_set): for build in build_set.getBuilds(): - desc = self.formatDescription(build) - self.sched.launcher.setBuildDescription(build, desc) + try: + desc = self.formatDescription(build) + self.sched.launcher.setBuildDescription(build, desc) + except: + # Log the failure and let loop continue + self.log.error("Failed to update description for build %s" % + (build)) if build_set.previous_build_set: for build in build_set.previous_build_set.getBuilds(): - desc = self.formatDescription(build) - self.sched.launcher.setBuildDescription(build, desc) + try: + desc = self.formatDescription(build) + self.sched.launcher.setBuildDescription(build, desc) + except: + # Log the failure and let loop continue + self.log.error("Failed to update description for " + "build %s in previous build set" % (build)) def onBuildStarted(self, build): self.log.debug("Build %s started" % build) From 811e2e933440a66324be7bc37db36bf94395ab30 Mon Sep 17 00:00:00 2001 From: Joshua Hesketh Date: Tue, 8 Dec 2015 09:55:05 +1100 Subject: [PATCH 06/16] Fix regression in change tracking Make sure we update the referenced change object on a new gerrit event rather than waiting to remake the queue item. This was a performance regression in the connection changes. Change-Id: I2a967f0347352a7674deb550e34fb94d1d903e89 --- zuul/connection/__init__.py | 11 +++++++++++ zuul/connection/gerrit.py | 23 ++++++++++++++++++----- zuul/model.py | 3 --- zuul/scheduler.py | 3 +++ zuul/source/gerrit.py | 3 --- 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/zuul/connection/__init__.py b/zuul/connection/__init__.py index f2ea47a790..402528f084 100644 --- a/zuul/connection/__init__.py +++ b/zuul/connection/__init__.py @@ -43,6 +43,14 @@ class BaseConnection(object): self.connection_name = connection_name self.connection_config = connection_config + # Keep track of the sources, triggers and reporters using this + # connection + self.attached_to = { + 'source': [], + 'trigger': [], + 'reporter': [], + } + def onLoad(self): pass @@ -51,3 +59,6 @@ class BaseConnection(object): def registerScheduler(self, sched): self.sched = sched + + def registerUse(self, what, instance): + self.attached_to[what].append(instance) diff --git a/zuul/connection/gerrit.py b/zuul/connection/gerrit.py index c408d7d25c..f8e5add617 100644 --- a/zuul/connection/gerrit.py +++ b/zuul/connection/gerrit.py @@ -47,7 +47,6 @@ class GerritEventConnector(threading.Thread): def _handleEvent(self): ts, data = self.connection.getEvent() if self._stopped: - self.connection.eventDone() return # Gerrit can produce inconsistent data immediately after an # event, So ensure that we do not deliver the event to Zuul @@ -101,11 +100,23 @@ class GerritEventConnector(threading.Thread): if (event.change_number and self.connection.sched.getProject(event.project_name)): - # Mark the change as needing a refresh in the cache - event._needs_refresh = True - + # Call _getChange for the side effect of updating the + # cache. Note that this modifies Change objects outside + # the main thread. + # NOTE(jhesketh): Ideally we'd just remove the change from the + # cache to denote that it needs updating. However the change + # object is already used by Item's and hence BuildSet's etc. and + # we need to update those objects by reference so that they have + # the correct/new information and also avoid hitting gerrit + # multiple times. + if self.connection.attached_to['source']: + self.connection.attached_to['source'][0]._getChange( + event.change_number, event.patch_number, refresh=True) + # We only need to do this once since the connection maintains + # the cache (which is shared between all the sources) + # NOTE(jhesketh): We may couple sources and connections again + # at which point this becomes more sensible. self.connection.sched.addEvent(event) - self.connection.eventDone() def run(self): while True: @@ -115,6 +126,8 @@ class GerritEventConnector(threading.Thread): self._handleEvent() except: self.log.exception("Exception moving Gerrit event:") + finally: + self.connection.eventDone() class GerritWatcher(threading.Thread): diff --git a/zuul/model.py b/zuul/model.py index 54f776cf67..c555561148 100644 --- a/zuul/model.py +++ b/zuul/model.py @@ -1005,9 +1005,6 @@ class TriggerEvent(object): # an admin command, etc): self.forced_pipeline = None - # Internal mechanism to track if the change needs a refresh from cache - self._needs_refresh = False - def __repr__(self): ret = ' Date: Tue, 22 Dec 2015 12:11:55 -0500 Subject: [PATCH 07/16] Bump APScheduler to >=3.0 This patch upgrades zuul to support APScheduler 3.0. For the most part, 3.0 was a rewrite but our changes seem to be limited. Change-Id: I0c66b5998122c3f59ed06e3e7b3ab3199f94f478 Signed-off-by: Paul Belanger --- requirements.txt | 2 +- zuul/trigger/timer.py | 20 +++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6318a593ee..8da177af12 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ extras statsd>=1.0.0,<3.0 voluptuous>=0.7 gear>=0.5.7,<1.0.0 -apscheduler>=2.1.1,<3.0 +apscheduler>=3.0 PrettyTable>=0.6,<0.8 babel>=1.0 six>=1.6.0 diff --git a/zuul/trigger/timer.py b/zuul/trigger/timer.py index c93a638279..d42e3db296 100644 --- a/zuul/trigger/timer.py +++ b/zuul/trigger/timer.py @@ -13,7 +13,8 @@ # License for the specific language governing permissions and limitations # under the License. -import apscheduler.scheduler +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.triggers.cron import CronTrigger import logging import voluptuous as v from zuul.model import EventFilter, TriggerEvent @@ -26,7 +27,7 @@ class TimerTrigger(BaseTrigger): def __init__(self, trigger_config={}, sched=None, connection=None): super(TimerTrigger, self).__init__(trigger_config, sched, connection) - self.apsched = apscheduler.scheduler.Scheduler() + self.apsched = BackgroundScheduler() self.apsched.start() def _onTrigger(self, pipeline_name, timespec): @@ -62,7 +63,7 @@ class TimerTrigger(BaseTrigger): def postConfig(self): for job in self.apsched.get_jobs(): - self.apsched.unschedule_job(job) + job.remove() for pipeline in self.sched.layout.pipelines.values(): for ef in pipeline.manager.event_filters: if ef.trigger != self: @@ -81,14 +82,11 @@ class TimerTrigger(BaseTrigger): second = parts[5] else: second = None - self.apsched.add_cron_job(self._onTrigger, - day=dom, - day_of_week=dow, - hour=hour, - minute=minute, - second=second, - args=(pipeline.name, - timespec,)) + trigger = CronTrigger(day=dom, day_of_week=dow, hour=hour, + minute=minute, second=second) + + self.apsched.add_job(self._onTrigger, trigger=trigger, + args=(pipeline.name, timespec,)) def getSchema(): From 271297de9063769acfd71c8f18e31ae661c4b80c Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Wed, 23 Dec 2015 12:03:31 -0500 Subject: [PATCH 08/16] Remove webob requirements cap Looking at the library, and openstack requirements, there doesn't appear to be a reason to cap webob. So, remove the cap so we can use newer versions. Change-Id: Id19c297b540e9081bcf733dff3a334a4b0f477d8 Signed-off-by: Paul Belanger --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6318a593ee..02f17f5b32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ pbr>=0.5.21,<1.0 argparse PyYAML>=3.1.0 Paste -WebOb>=1.2.3,<1.3 +WebOb>=1.2.3 paramiko>=1.8.0 GitPython>=0.3.3 ordereddict From 2055dcd93d991473c961fe5e04a3d74908252796 Mon Sep 17 00:00:00 2001 From: Andreas Jaeger Date: Tue, 29 Dec 2015 21:47:21 +0100 Subject: [PATCH 09/16] Use git.openstack.org everywhere Our official git master is at git.openstack.org, update places that use github instead. Change-Id: Idf745ced59028dbada05acf7ae0be05e967a999d --- doc/source/launchers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/launchers.rst b/doc/source/launchers.rst index 0a1e0e7743..c61cea8724 100644 --- a/doc/source/launchers.rst +++ b/doc/source/launchers.rst @@ -239,7 +239,7 @@ the Git plugin to prepare them, or you may chose to use a shell script instead. As an example, the OpenStack project uses the following script to prepare the workspace for its integration testing: - https://github.com/openstack-infra/devstack-gate/blob/master/devstack-vm-gate-wrap.sh + https://git.openstack.org/cgit/openstack-infra/devstack-gate/tree/devstack-vm-gate-wrap.sh Turbo Hipster Worker ~~~~~~~~~~~~~~~~~~~~ From 9ba7c060e6ffd335894d1b0ab70ae24656f7a3c5 Mon Sep 17 00:00:00 2001 From: Doug Wiegley Date: Wed, 30 Dec 2015 11:35:05 -0700 Subject: [PATCH 10/16] Bump pbr minimum version, to avoid testrepository requirement Change-Id: I89154466cb8a0a3f777df2cea48ae1812f3de881 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6318a593ee..6e8e1c19e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -pbr>=0.5.21,<1.0 +pbr>=1.1.0 argparse PyYAML>=3.1.0 From b078bb4850e956c2d0b5d5c51397b9096de4abe0 Mon Sep 17 00:00:00 2001 From: Evgeny Antyshev Date: Mon, 25 Jan 2016 14:44:21 +0000 Subject: [PATCH 11/16] Don't require 'commit' attribute in merge event Problem occurs when merge events for periodic pipeline are processed: first, a trivial mistake in type checking introduced in review 243493, second, merge events generated for NullChange's are repo updates, and don't provide 'commit' attribute. I don't see the necessity to enforce 'build_set.commit' to be set (which is given to job env ZUUL_COMMIT if 'ref' or 'refspec' are set, but that's not the case for NullChange) Change-Id: Ib5da4ba987898d37d8e5082f4b8e2a5c31910323 --- zuul/scheduler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zuul/scheduler.py b/zuul/scheduler.py index 05eb6febfd..9f370b9087 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -1577,9 +1577,9 @@ class BasePipelineManager(object): if event.merged: build_set.commit = event.commit elif event.updated: - if not isinstance(item, NullChange): + if not isinstance(item.change, NullChange): build_set.commit = item.change.newrev - if not build_set.commit: + if not build_set.commit and not isinstance(item.change, NullChange): self.log.info("Unable to merge change %s" % item.change) self.pipeline.setUnableToMerge(item) From 90b61dbde89402971411a63f7596719db63f6155 Mon Sep 17 00:00:00 2001 From: Joshua Hesketh Date: Wed, 3 Feb 2016 14:22:15 +1100 Subject: [PATCH 12/16] Fix memory leak reloading triggers Because the triggers are loaded to the scheduler and not an object of a pipeline they aren't reset when reloading. They are reused by the new pipeline, however any previously loaded triggers will still have an old connection object that should no longer be used. Instead reset the triggers when reloading causing new triggers to be created by the pipeline configuration against the new connection objects. The connection objects that were hanging around for old triggers were keeping their change cache and hence using up a lot of memory. It appears that maintainTriggerCache is only called when reloading, so the cache would have a habit of growing out of hand and, in particular, if you don't ever reload it will not be maintained. A followup to run the cache at sensible times will come. Change-Id: I81ee47524cda71a500c55a95a2280f491b1b63d9 --- zuul/scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zuul/scheduler.py b/zuul/scheduler.py index 05eb6febfd..f93eca92b6 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -263,6 +263,7 @@ class Scheduler(threading.Thread): def _unloadDrivers(self): for trigger in self.triggers.values(): trigger.stop() + self.triggers = {} for pipeline in self.layout.pipelines.values(): pipeline.source.stop() for action in self._reporter_actions.values(): From 2a6cc76d18f8a1b0c68da158589eea7ed343a669 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Wed, 3 Feb 2016 14:32:05 -0800 Subject: [PATCH 13/16] Pass ZUUL_TEST_ROOT through tox This is so that one might, for example, run the zuul tests out of a tmpfs. Change-Id: I8e1d5b11c83e5f782f92d95b0d09632312bf310d --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index a6ad6a04a8..79ea939b11 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,7 @@ setenv = STATSD_HOST=127.0.0.1 STATSD_PORT=8125 VIRTUAL_ENV={envdir} OS_TEST_TIMEOUT=30 +passenv = ZUUL_TEST_ROOT usedevelop = True install_command = pip install {opts} {packages} deps = -r{toxinidir}/requirements.txt From 995fc0fc5bef915c3d8560fa25bd6944e2813713 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Thu, 4 Feb 2016 16:48:31 -0800 Subject: [PATCH 14/16] Try to make test_idle less racy Reconfiguration should be synchronous, but just in case, wait afterwords. The second wait may be more important -- we might still have some outstanding timer events from the previous configuration so we should make sure that everything really has stopped before we release builds. Change-Id: I52a3b0c309dc87fc6a553e690286d5dae093e085 --- tests/test_scheduler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 4f52911096..ead8c6ef5a 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -2742,11 +2742,11 @@ class TestScheduler(ZuulTestCase): 'tests/fixtures/layout-idle.yaml') self.sched.reconfigure(self.config) self.registerJobs() + self.waitUntilSettled() # The pipeline triggers every second, so we should have seen # several by now. time.sleep(5) - self.waitUntilSettled() # Stop queuing timer triggered jobs so that the assertions # below don't race against more jobs being queued. @@ -2754,6 +2754,7 @@ class TestScheduler(ZuulTestCase): 'tests/fixtures/layout-no-timer.yaml') self.sched.reconfigure(self.config) self.registerJobs() + self.waitUntilSettled() self.assertEqual(len(self.builds), 2) self.worker.release('.*') From af17a978c43ad1909b189d0fcaaac0757e91f518 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Wed, 3 Feb 2016 15:07:18 -0800 Subject: [PATCH 15/16] Add job mutex support This is so that jobs that interact with external resources can be mutexed. Change-Id: I94365e258cae30c5fe61981eccc879f400b02f7f --- doc/source/zuul.rst | 5 ++ tests/fixtures/layout-mutex.yaml | 25 ++++++++++ tests/fixtures/layout.yaml | 4 ++ tests/test_scheduler.py | 64 ++++++++++++++++++++++++ zuul/layoutvalidator.py | 1 + zuul/model.py | 16 ++++-- zuul/scheduler.py | 83 +++++++++++++++++++++++++++++--- 7 files changed, 186 insertions(+), 12 deletions(-) create mode 100644 tests/fixtures/layout-mutex.yaml diff --git a/doc/source/zuul.rst b/doc/source/zuul.rst index ad8ec2e103..b5b8d7bf19 100644 --- a/doc/source/zuul.rst +++ b/doc/source/zuul.rst @@ -704,6 +704,11 @@ each job as it builds a list from the project specification. would largely defeat the parallelization of dependent change testing that is the main feature of Zuul. Default: ``false``. +**mutex (optional)** + This is a string that names a mutex that should be observed by this + job. Only one build of any job that references the same named mutex + will be enqueued at a time. This applies across all pipelines. + **branch (optional)** This job should only be run on matching branches. This field is treated as a regular expression and multiple branches may be diff --git a/tests/fixtures/layout-mutex.yaml b/tests/fixtures/layout-mutex.yaml new file mode 100644 index 0000000000..fcd052973c --- /dev/null +++ b/tests/fixtures/layout-mutex.yaml @@ -0,0 +1,25 @@ +pipelines: + - name: check + manager: IndependentPipelineManager + trigger: + gerrit: + - event: patchset-created + success: + gerrit: + verified: 1 + failure: + gerrit: + verified: -1 + +jobs: + - name: mutex-one + mutex: test-mutex + - name: mutex-two + mutex: test-mutex + +projects: + - name: org/project + check: + - project-test1 + - mutex-one + - mutex-two diff --git a/tests/fixtures/layout.yaml b/tests/fixtures/layout.yaml index 1d2344370a..e8f035e5f4 100644 --- a/tests/fixtures/layout.yaml +++ b/tests/fixtures/layout.yaml @@ -116,6 +116,10 @@ jobs: parameter-function: select_debian_node - name: project1-project2-integration queue-name: integration + - name: mutex-one + mutex: test-mutex + - name: mutex-two + mutex: test-mutex project-templates: - name: test-one-and-two diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index ead8c6ef5a..8960e3af75 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -2280,6 +2280,70 @@ class TestScheduler(ZuulTestCase): self.sched.reconfigure(self.config) self.assertEqual(len(self.sched.layout.pipelines['gate'].queues), 1) + def test_mutex(self): + "Test job mutexes" + self.config.set('zuul', 'layout_config', + 'tests/fixtures/layout-mutex.yaml') + self.sched.reconfigure(self.config) + + self.worker.hold_jobs_in_build = True + A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') + B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B') + self.assertFalse('test-mutex' in self.sched.mutex.mutexes) + + self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) + self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1)) + self.waitUntilSettled() + self.assertEqual(len(self.builds), 3) + self.assertEqual(self.builds[0].name, 'project-test1') + self.assertEqual(self.builds[1].name, 'mutex-one') + self.assertEqual(self.builds[2].name, 'project-test1') + + self.worker.release('mutex-one') + self.waitUntilSettled() + + self.assertEqual(len(self.builds), 3) + self.assertEqual(self.builds[0].name, 'project-test1') + self.assertEqual(self.builds[1].name, 'project-test1') + self.assertEqual(self.builds[2].name, 'mutex-two') + self.assertTrue('test-mutex' in self.sched.mutex.mutexes) + + self.worker.release('mutex-two') + self.waitUntilSettled() + + self.assertEqual(len(self.builds), 3) + self.assertEqual(self.builds[0].name, 'project-test1') + self.assertEqual(self.builds[1].name, 'project-test1') + self.assertEqual(self.builds[2].name, 'mutex-one') + self.assertTrue('test-mutex' in self.sched.mutex.mutexes) + + self.worker.release('mutex-one') + self.waitUntilSettled() + + self.assertEqual(len(self.builds), 3) + self.assertEqual(self.builds[0].name, 'project-test1') + self.assertEqual(self.builds[1].name, 'project-test1') + self.assertEqual(self.builds[2].name, 'mutex-two') + self.assertTrue('test-mutex' in self.sched.mutex.mutexes) + + self.worker.release('mutex-two') + self.waitUntilSettled() + + self.assertEqual(len(self.builds), 2) + self.assertEqual(self.builds[0].name, 'project-test1') + self.assertEqual(self.builds[1].name, 'project-test1') + self.assertFalse('test-mutex' in self.sched.mutex.mutexes) + + self.worker.hold_jobs_in_build = False + self.worker.release() + + self.waitUntilSettled() + self.assertEqual(len(self.builds), 0) + + self.assertEqual(A.reported, 1) + self.assertEqual(B.reported, 1) + self.assertFalse('test-mutex' in self.sched.mutex.mutexes) + def test_node_label(self): "Test that a job runs on a specific node label" self.worker.registerFunction('build:node-project-test1:debian') diff --git a/zuul/layoutvalidator.py b/zuul/layoutvalidator.py index ba96ab7949..a01eed3e76 100644 --- a/zuul/layoutvalidator.py +++ b/zuul/layoutvalidator.py @@ -103,6 +103,7 @@ class LayoutSchema(object): 'success-pattern': str, 'hold-following-changes': bool, 'voting': bool, + 'mutex': str, 'parameter-function': str, 'branch': toList(str), 'files': toList(str), diff --git a/zuul/model.py b/zuul/model.py index c555561148..75f727dfc3 100644 --- a/zuul/model.py +++ b/zuul/model.py @@ -134,7 +134,7 @@ class Pipeline(object): return [] return item.change.filterJobs(tree.getJobs()) - def _findJobsToRun(self, job_trees, item): + def _findJobsToRun(self, job_trees, item, mutex): torun = [] if item.item_ahead: # Only run jobs if any 'hold' jobs on the change ahead @@ -153,20 +153,23 @@ class Pipeline(object): else: # There is no build for the root of this job tree, # so we should run it. - torun.append(job) + if mutex.acquire(item, job): + # If this job needs a mutex, either acquire it or make + # sure that we have it before running the job. + torun.append(job) # If there is no job, this is a null job tree, and we should # run all of its jobs. if result == 'SUCCESS' or not job: - torun.extend(self._findJobsToRun(tree.job_trees, item)) + torun.extend(self._findJobsToRun(tree.job_trees, item, mutex)) return torun - def findJobsToRun(self, item): + def findJobsToRun(self, item, mutex): if not item.live: return [] tree = self.getJobTree(item.change.project) if not tree: return [] - return self._findJobsToRun(tree.job_trees, item) + return self._findJobsToRun(tree.job_trees, item, mutex) def haveAllJobsStarted(self, item): for job in self.getJobs(item): @@ -441,6 +444,7 @@ class Job(object): self.failure_pattern = None self.success_pattern = None self.parameter_function = None + self.mutex = None # A metajob should only supply values for attributes that have # been explicitly provided, so avoid setting boolean defaults. if self.is_metajob: @@ -487,6 +491,8 @@ class Job(object): self.skip_if_matcher = other.skip_if_matcher.copy() if other.swift: self.swift.update(other.swift) + if other.mutex: + self.mutex = other.mutex # Only non-None values should be copied for boolean attributes. if other.hold_following_changes is not None: self.hold_following_changes = other.hold_following_changes diff --git a/zuul/scheduler.py b/zuul/scheduler.py index f93eca92b6..0e3fea0eeb 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -59,6 +59,68 @@ def deep_format(obj, paramdict): return ret +class MutexHandler(object): + log = logging.getLogger("zuul.MutexHandler") + + def __init__(self): + self.mutexes = {} + + def acquire(self, item, job): + if not job.mutex: + return True + mutex_name = job.mutex + m = self.mutexes.get(mutex_name) + if not m: + # The mutex is not held, acquire it + self._acquire(mutex_name, item, job.name) + return True + held_item, held_job_name = m + if held_item is item and held_job_name == job.name: + # This item already holds the mutex + return True + held_build = held_item.current_build_set.getBuild(held_job_name) + if held_build and held_build.result: + # The build that held the mutex is complete, release it + # and let the new item have it. + self.log.error("Held mutex %s being released because " + "the build that holds it is complete" % + (mutex_name,)) + self._release(mutex_name, item, job.name) + self._acquire(mutex_name, item, job.name) + return True + return False + + def release(self, item, job): + if not job.mutex: + return + mutex_name = job.mutex + m = self.mutexes.get(mutex_name) + if not m: + # The mutex is not held, nothing to do + self.log.error("Mutex can not be released for %s " + "because the mutex is not held" % + (item,)) + return + held_item, held_job_name = m + if held_item is item and held_job_name == job.name: + # This item holds the mutex + self._release(mutex_name, item, job.name) + return + self.log.error("Mutex can not be released for %s " + "which does not hold it" % + (item,)) + + def _acquire(self, mutex_name, item, job_name): + self.log.debug("Job %s of item %s acquiring mutex %s" % + (job_name, item, mutex_name)) + self.mutexes[mutex_name] = (item, job_name) + + def _release(self, mutex_name, item, job_name): + self.log.debug("Job %s of item %s releasing mutex %s" % + (job_name, item, mutex_name)) + del self.mutexes[mutex_name] + + class ManagementEvent(object): """An event that should be processed within the main queue run loop""" def __init__(self): @@ -185,6 +247,7 @@ class Scheduler(threading.Thread): self._stopped = False self.launcher = None self.merger = None + self.mutex = MutexHandler() self.connections = dict() # Despite triggers being part of the pipeline, there is one trigger set # per scheduler. The pipeline handles the trigger filters but since @@ -461,6 +524,9 @@ class Scheduler(threading.Thread): m = config_job.get('voting', None) if m is not None: job.voting = m + m = config_job.get('mutex', None) + if m is not None: + job.mutex = m fname = config_job.get('parameter-function', None) if fname: func = config_env.get(fname, None) @@ -1086,14 +1152,16 @@ class BasePipelineManager(object): efilters += str(tree.job.skip_if_matcher) if efilters: efilters = ' ' + efilters - hold = '' + tags = [] if tree.job.hold_following_changes: - hold = ' [hold]' - voting = '' + tags.append('[hold]') if not tree.job.voting: - voting = ' [nonvoting]' - self.log.info("%s%s%s%s%s" % (istr, repr(tree.job), - efilters, hold, voting)) + tags.append('[nonvoting]') + if tree.job.mutex: + tags.append('[mutex: %s]' % tree.job.mutex) + tags = ' '.join(tags) + self.log.info("%s%s%s %s" % (istr, repr(tree.job), + efilters, tags)) for x in tree.job_trees: log_jobs(x, indent + 2) @@ -1410,7 +1478,7 @@ class BasePipelineManager(object): "for change %s:" % (job, item.change)) def launchJobs(self, item): - jobs = self.pipeline.findJobsToRun(item) + jobs = self.pipeline.findJobsToRun(item, self.sched.mutex) if jobs: self._launchJobs(item, jobs) @@ -1566,6 +1634,7 @@ class BasePipelineManager(object): item = build.build_set.item self.pipeline.setResult(item, build) + self.sched.mutex.release(item, build.job) self.log.debug("Item %s status is now:\n %s" % (item, item.formatStatus())) return True From f760f0e49f0d45291afc6e5bee0041e87b17e865 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Tue, 9 Feb 2016 08:44:52 -0800 Subject: [PATCH 16/16] Fix default merge failure reports Commit 385d11e2eddad010dd8de2989668092249acddd3 moved the logic that performs report formatting into the reporters themselves. But it also contained a logic change to the formatting. Previously, if an item was not mergeable, it was reported without a job list, regardless if the merge-failure or standard failure reporter was used. With that change, if a pipeline specified a merge-failure message reporter, it would not format the job list, but if no merge-failure reporter was supplied, and the standard failure reporter was used, the standard failure reporter would not check whether a merge-failure happened and instead always try to format the job list. Change-Id: If65d4f64d6558a544d3d0c2cc0b32ad7786a6bcd --- tests/test_scheduler.py | 25 +++++++++++++++++++++++++ zuul/reporter/__init__.py | 2 ++ 2 files changed, 27 insertions(+) diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 8960e3af75..81c294824d 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -3477,6 +3477,31 @@ For CI problems and help debugging, contact ci@example.org""" self.assertEqual('The merge failed! For more information...', self.smtp_messages[0]['body']) + def test_default_merge_failure_reports(self): + """Check that the default merge failure reports are correct.""" + + # A should report success, B should report merge failure. + A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') + A.addPatchset(['conflict']) + B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B') + B.addPatchset(['conflict']) + A.addApproval('CRVW', 2) + B.addApproval('CRVW', 2) + self.fake_gerrit.addEvent(A.addApproval('APRV', 1)) + self.fake_gerrit.addEvent(B.addApproval('APRV', 1)) + self.waitUntilSettled() + + self.assertEqual(3, len(self.history)) # A jobs + self.assertEqual(A.reported, 2) + self.assertEqual(B.reported, 2) + self.assertEqual(A.data['status'], 'MERGED') + self.assertEqual(B.data['status'], 'NEW') + self.assertIn('Build succeeded', A.messages[1]) + self.assertIn('Merge Failed', B.messages[1]) + self.assertIn('automatically merged', B.messages[1]) + self.assertNotIn('logs.example.com', B.messages[1]) + self.assertNotIn('SKIPPED', B.messages[1]) + def test_swift_instructions(self): "Test that the correct swift instructions are sent to the workers" self.config.set('zuul', 'layout_config', diff --git a/zuul/reporter/__init__.py b/zuul/reporter/__init__.py index e29f9a776f..fd7917400e 100644 --- a/zuul/reporter/__init__.py +++ b/zuul/reporter/__init__.py @@ -84,6 +84,8 @@ class BaseReporter(object): def _formatItemReportFailure(self, pipeline, item): if item.dequeued_needing_change: msg = 'This change depends on a change that failed to merge.\n' + elif not pipeline.didMergerSucceed(item): + msg = pipeline.merge_failure_message else: msg = (pipeline.failure_message + '\n\n' + self._formatItemReportJobs(pipeline, item))