From e22d5a11bb6af0dffc9391294e218e88314eb823 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Tue, 7 Jul 2020 12:58:14 +0200 Subject: [PATCH] Resume jobs after reenqueue of an item Currently it can happen that a buildset cannot proceed when there is a paused job and an in progress build is removed due to a reconfiguration. The reason for this is that there will never be a resume event for the paused job if there are no other in progress builds. This can be fixed by resuming jobs if necessary when the reenqueue is finished. Change-Id: Id470a8db310bb7833a7103b6fe287fd9b132e55f --- .../git/org_project6/zuul-reconfigure.yaml | 4 ++ .../job-pause/git/org_project6/zuul.yaml | 7 +++ tests/fixtures/config/job-pause/main.yaml | 1 + tests/unit/test_v3.py | 43 +++++++++++++++++++ zuul/manager/__init__.py | 5 +++ 5 files changed, 60 insertions(+) create mode 100644 tests/fixtures/config/job-pause/git/org_project6/zuul-reconfigure.yaml create mode 100644 tests/fixtures/config/job-pause/git/org_project6/zuul.yaml diff --git a/tests/fixtures/config/job-pause/git/org_project6/zuul-reconfigure.yaml b/tests/fixtures/config/job-pause/git/org_project6/zuul-reconfigure.yaml new file mode 100644 index 0000000000..df5e757e70 --- /dev/null +++ b/tests/fixtures/config/job-pause/git/org_project6/zuul-reconfigure.yaml @@ -0,0 +1,4 @@ +- project: + check: + jobs: + - compile diff --git a/tests/fixtures/config/job-pause/git/org_project6/zuul.yaml b/tests/fixtures/config/job-pause/git/org_project6/zuul.yaml new file mode 100644 index 0000000000..235d609794 --- /dev/null +++ b/tests/fixtures/config/job-pause/git/org_project6/zuul.yaml @@ -0,0 +1,7 @@ +- project: + check: + jobs: + - compile + - test: + dependencies: + - compile diff --git a/tests/fixtures/config/job-pause/main.yaml b/tests/fixtures/config/job-pause/main.yaml index 7deb4c65f6..cf06a05cac 100644 --- a/tests/fixtures/config/job-pause/main.yaml +++ b/tests/fixtures/config/job-pause/main.yaml @@ -9,3 +9,4 @@ - org/project3 - org/project4 - org/project5 + - org/project6 diff --git a/tests/unit/test_v3.py b/tests/unit/test_v3.py index 3d3408029f..dc8e8f6a14 100644 --- a/tests/unit/test_v3.py +++ b/tests/unit/test_v3.py @@ -5575,6 +5575,49 @@ class TestJobPause(AnsibleZuulTestCase): dict(name='just-pause', result='SUCCESS', changes='1,1'), ], ordered=False) + def test_job_reconfigure_resume(self): + """ + Tests that a paused job is resumed after reconfiguration + + Tests that a paused job is resumed after a reconfiguration removed the + last job which is in progress. + """ + self.wait_timeout = 120 + + # Output extra ansible info so we might see errors. + self.executor_server.hold_jobs_in_build = True + + A = self.fake_gerrit.addFakeChange('org/project6', 'master', 'A') + + self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) + self.waitUntilSettled() + + self.assertEqual(len(self.builds), 1, 'compile in progress') + self.executor_server.release('compile') + self.waitUntilSettled() + + self.assertEqual(len(self.builds), 2, 'compile and test in progress') + + # Remove the test1 job. + self.commitConfigUpdate( + 'org/project6', + 'config/job-pause/git/org_project6/zuul-reconfigure.yaml') + self.scheds.execute(lambda app: app.sched.reconfigure(app.config)) + self.waitUntilSettled() + + # The "compile" job might be paused during the waitUntilSettled + # call and appear settled; it should automatically resume + # though, so just wait for it. + for x in iterate_timeout(60, 'job compile finished'): + if not self.builds: + break + self.waitUntilSettled() + + self.assertHistory([ + dict(name='compile', result='SUCCESS', changes='1,1'), + dict(name='test', result='ABORTED', changes='1,1'), + ]) + def test_job_pause_skipped_child(self): """ Tests that a paused job is resumed with externally skipped jobs. diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py index 7e387f86e2..ddb5c05439 100644 --- a/zuul/manager/__init__.py +++ b/zuul/manager/__init__.py @@ -294,6 +294,11 @@ class PipelineManager(metaclass=ABCMeta): if item.dequeued_needing_change: item.setDequeuedNeedingChange() + # It can happen that all in-flight builds have been removed + # which would lead to paused parent jobs not being resumed. + # To prevent that resume parent jobs if necessary. + self._resumeBuilds(item.current_build_set) + self.reportStats(item) return True else: