Merge "Support fail-fast in project pipelines"
This commit is contained in:
commit
bb6078da0b
|
@ -1419,6 +1419,19 @@ pipeline.
|
||||||
difficult to determine why Zuul did or did not run a certain
|
difficult to determine why Zuul did or did not run a certain
|
||||||
job, the additional information this provides may help.
|
job, the additional information this provides may help.
|
||||||
|
|
||||||
|
.. attr:: fail-fast
|
||||||
|
:default: false
|
||||||
|
|
||||||
|
If this is set to `true`, Zuul will report a build failure
|
||||||
|
immediately and abort all still running builds. This can be used
|
||||||
|
to save resources in resource constrained environments at the cost
|
||||||
|
of potentially requiring multiple attempts if more than one problem
|
||||||
|
is present.
|
||||||
|
|
||||||
|
Once this is defined it cannot be overridden afterwards. So this
|
||||||
|
can be forced to a specific value by e.g. defining it in a config
|
||||||
|
repo.
|
||||||
|
|
||||||
.. _project-template:
|
.. _project-template:
|
||||||
|
|
||||||
Project Template
|
Project Template
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
Zuul now supports :attr:`project.<pipeline>.fail-fast` to immediately
|
||||||
|
report and cancel builds on the first failure in a buildset.
|
|
@ -0,0 +1,2 @@
|
||||||
|
- hosts: all
|
||||||
|
tasks: []
|
|
@ -0,0 +1,63 @@
|
||||||
|
- pipeline:
|
||||||
|
name: check
|
||||||
|
manager: independent
|
||||||
|
trigger:
|
||||||
|
gerrit:
|
||||||
|
- event: patchset-created
|
||||||
|
success:
|
||||||
|
gerrit:
|
||||||
|
Verified: 1
|
||||||
|
failure:
|
||||||
|
gerrit:
|
||||||
|
Verified: -1
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: base
|
||||||
|
parent: null
|
||||||
|
run: playbooks/run.yaml
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: project-merge
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: project-test1
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: project-test2
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: project-test3
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: project-test4
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: project-test5
|
||||||
|
nodeset:
|
||||||
|
nodes:
|
||||||
|
- name: controller
|
||||||
|
label: label1
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: project-test6
|
||||||
|
|
||||||
|
- project:
|
||||||
|
name: org/project
|
||||||
|
check:
|
||||||
|
fail-fast: true
|
||||||
|
jobs:
|
||||||
|
- project-merge
|
||||||
|
- project-test1:
|
||||||
|
dependencies: project-merge
|
||||||
|
- project-test2:
|
||||||
|
dependencies: project-merge
|
||||||
|
- project-test3:
|
||||||
|
dependencies:
|
||||||
|
- name: project-test2
|
||||||
|
soft: true
|
||||||
|
- project-test4:
|
||||||
|
dependencies: project-test2
|
||||||
|
- project-test5
|
||||||
|
- project-test6:
|
||||||
|
dependencies: project-merge
|
||||||
|
voting: false
|
|
@ -0,0 +1 @@
|
||||||
|
test
|
|
@ -0,0 +1,5 @@
|
||||||
|
# This tries to unset fail-fast which should not be possible because it's
|
||||||
|
# already set to true in common-config.
|
||||||
|
- project:
|
||||||
|
check:
|
||||||
|
fail-fast: false
|
|
@ -0,0 +1,8 @@
|
||||||
|
- tenant:
|
||||||
|
name: tenant-one
|
||||||
|
source:
|
||||||
|
gerrit:
|
||||||
|
config-projects:
|
||||||
|
- common-config
|
||||||
|
untrusted-projects:
|
||||||
|
- org/project
|
|
@ -7163,3 +7163,92 @@ class TestSchedulerBranchMatcher(ZuulTestCase):
|
||||||
"A should report start and success")
|
"A should report start and success")
|
||||||
self.assertIn('gate', A.messages[1],
|
self.assertIn('gate', A.messages[1],
|
||||||
"A should transit gate")
|
"A should transit gate")
|
||||||
|
|
||||||
|
|
||||||
|
class TestSchedulerFailFast(ZuulTestCase):
|
||||||
|
tenant_config_file = 'config/fail-fast/main.yaml'
|
||||||
|
|
||||||
|
def test_fail_fast(self):
|
||||||
|
"""
|
||||||
|
Tests that a pipeline that is flagged with fail-fast
|
||||||
|
aborts jobs early.
|
||||||
|
"""
|
||||||
|
self.executor_server.hold_jobs_in_build = True
|
||||||
|
self.fake_nodepool.pause()
|
||||||
|
|
||||||
|
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||||
|
self.executor_server.failJob('project-test1', A)
|
||||||
|
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
self.waitUntilSettled()
|
||||||
|
self.assertEqual(len(self.builds), 1)
|
||||||
|
self.assertEqual(self.builds[0].name, 'project-merge')
|
||||||
|
self.executor_server.release('project-merge')
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# Now project-test1, project-test2 and project-test6
|
||||||
|
# should be running
|
||||||
|
self.assertEqual(len(self.builds), 3)
|
||||||
|
|
||||||
|
# Release project-test1 which will fail
|
||||||
|
self.executor_server.release('project-test1')
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
self.fake_nodepool.unpause()
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# Now project-test2 must be aborted
|
||||||
|
self.assertEqual(len(self.builds), 0)
|
||||||
|
self.assertEqual(A.reported, 1)
|
||||||
|
self.assertHistory([
|
||||||
|
dict(name='project-merge', result='SUCCESS', changes='1,1'),
|
||||||
|
dict(name='project-test1', result='FAILURE', changes='1,1'),
|
||||||
|
dict(name='project-test2', result='ABORTED', changes='1,1'),
|
||||||
|
dict(name='project-test6', result='ABORTED', changes='1,1'),
|
||||||
|
], ordered=False)
|
||||||
|
|
||||||
|
def test_fail_fast_nonvoting(self):
|
||||||
|
"""
|
||||||
|
Tests that a pipeline that is flagged with fail-fast
|
||||||
|
doesn't abort jobs due to a non-voting job.
|
||||||
|
"""
|
||||||
|
self.executor_server.hold_jobs_in_build = True
|
||||||
|
|
||||||
|
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||||
|
self.executor_server.failJob('project-test6', A)
|
||||||
|
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
self.waitUntilSettled()
|
||||||
|
self.assertEqual(len(self.builds), 2)
|
||||||
|
self.assertEqual(self.builds[0].name, 'project-merge')
|
||||||
|
self.executor_server.release('project-merge')
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# Now project-test1, project-test2, project-test5 and project-test6
|
||||||
|
# should be running
|
||||||
|
self.assertEqual(len(self.builds), 4)
|
||||||
|
|
||||||
|
# Release project-test6 which will fail
|
||||||
|
self.executor_server.release('project-test6')
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# Now project-test1, project-test2 and project-test5 should be running
|
||||||
|
self.assertEqual(len(self.builds), 3)
|
||||||
|
|
||||||
|
self.executor_server.hold_jobs_in_build = False
|
||||||
|
self.executor_server.release()
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
self.assertEqual(len(self.builds), 0)
|
||||||
|
self.assertEqual(A.reported, 1)
|
||||||
|
self.assertHistory([
|
||||||
|
dict(name='project-merge', result='SUCCESS', changes='1,1'),
|
||||||
|
dict(name='project-test1', result='SUCCESS', changes='1,1'),
|
||||||
|
dict(name='project-test2', result='SUCCESS', changes='1,1'),
|
||||||
|
dict(name='project-test3', result='SUCCESS', changes='1,1'),
|
||||||
|
dict(name='project-test4', result='SUCCESS', changes='1,1'),
|
||||||
|
dict(name='project-test5', result='SUCCESS', changes='1,1'),
|
||||||
|
dict(name='project-test6', result='FAILURE', changes='1,1'),
|
||||||
|
], ordered=False)
|
||||||
|
|
|
@ -926,6 +926,7 @@ class ProjectTemplateParser(object):
|
||||||
pipeline_contents = {
|
pipeline_contents = {
|
||||||
'queue': str,
|
'queue': str,
|
||||||
'debug': bool,
|
'debug': bool,
|
||||||
|
'fail-fast': bool,
|
||||||
'jobs': job_list
|
'jobs': job_list
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -955,6 +956,8 @@ class ProjectTemplateParser(object):
|
||||||
project_template.pipelines[pipeline_name] = project_pipeline
|
project_template.pipelines[pipeline_name] = project_pipeline
|
||||||
project_pipeline.queue_name = conf_pipeline.get('queue')
|
project_pipeline.queue_name = conf_pipeline.get('queue')
|
||||||
project_pipeline.debug = conf_pipeline.get('debug')
|
project_pipeline.debug = conf_pipeline.get('debug')
|
||||||
|
project_pipeline.fail_fast = conf_pipeline.get(
|
||||||
|
'fail-fast')
|
||||||
self.parseJobList(
|
self.parseJobList(
|
||||||
conf_pipeline.get('jobs', []),
|
conf_pipeline.get('jobs', []),
|
||||||
source_context, start_mark, project_pipeline.job_list)
|
source_context, start_mark, project_pipeline.job_list)
|
||||||
|
@ -1007,6 +1010,7 @@ class ProjectParser(object):
|
||||||
pipeline_contents = {
|
pipeline_contents = {
|
||||||
'queue': str,
|
'queue': str,
|
||||||
'debug': bool,
|
'debug': bool,
|
||||||
|
'fail-fast': bool,
|
||||||
'jobs': job_list
|
'jobs': job_list
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -449,6 +449,13 @@ class ExecutorClient(object):
|
||||||
# track of which results are non-final.
|
# track of which results are non-final.
|
||||||
if build.retry:
|
if build.retry:
|
||||||
result = None
|
result = None
|
||||||
|
|
||||||
|
# If the build was canceled, we did actively cancel the job so
|
||||||
|
# don't overwrite the result and don't retry.
|
||||||
|
if build.canceled:
|
||||||
|
result = build.result
|
||||||
|
build.retry = False
|
||||||
|
|
||||||
self.sched.onBuildCompleted(build, result, result_data, warnings)
|
self.sched.onBuildCompleted(build, result, result_data, warnings)
|
||||||
# The test suite expects the build to be removed from the
|
# The test suite expects the build to be removed from the
|
||||||
# internal dict after it's added to the report queue.
|
# internal dict after it's added to the report queue.
|
||||||
|
|
|
@ -852,6 +852,13 @@ class PipelineManager(object):
|
||||||
if build:
|
if build:
|
||||||
build_set.removeBuild(build)
|
build_set.removeBuild(build)
|
||||||
|
|
||||||
|
def _cancelRunningBuilds(self, build_set):
|
||||||
|
item = build_set.item
|
||||||
|
for job in item.getJobs():
|
||||||
|
build = build_set.getBuild(job.name)
|
||||||
|
if not build or not build.result:
|
||||||
|
self.sched.cancelJob(build_set, job, final=True)
|
||||||
|
|
||||||
def onBuildCompleted(self, build):
|
def onBuildCompleted(self, build):
|
||||||
item = build.build_set.item
|
item = build.build_set.item
|
||||||
|
|
||||||
|
@ -870,6 +877,15 @@ class PipelineManager(object):
|
||||||
self._resetDependentBuilds(build.build_set, build)
|
self._resetDependentBuilds(build.build_set, build)
|
||||||
|
|
||||||
self._resumeBuilds(build.build_set)
|
self._resumeBuilds(build.build_set)
|
||||||
|
|
||||||
|
if (item.project_pipeline_config.fail_fast and
|
||||||
|
build.failed and build.job.voting):
|
||||||
|
# If fail-fast is set and the build is not successful
|
||||||
|
# cancel all remaining jobs.
|
||||||
|
self.log.debug("Build %s failed and fail-fast enabled, canceling "
|
||||||
|
"running builds", build)
|
||||||
|
self._cancelRunningBuilds(build.build_set)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def onFilesChangesCompleted(self, event):
|
def onFilesChangesCompleted(self, event):
|
||||||
|
|
|
@ -1830,6 +1830,12 @@ class Build(object):
|
||||||
return ('<Build %s of %s voting:%s on %s>' %
|
return ('<Build %s of %s voting:%s on %s>' %
|
||||||
(self.uuid, self.job.name, self.job.voting, self.worker))
|
(self.uuid, self.job.name, self.job.voting, self.worker))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def failed(self):
|
||||||
|
if self.result and self.result not in ['SUCCESS', 'SKIPPED']:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def pipeline(self):
|
def pipeline(self):
|
||||||
return self.build_set.item.pipeline
|
return self.build_set.item.pipeline
|
||||||
|
@ -2478,7 +2484,7 @@ class QueueItem(object):
|
||||||
build = build_set.getBuild(job.name)
|
build = build_set.getBuild(job.name)
|
||||||
if build and (build.result == 'SUCCESS' or build.paused):
|
if build and (build.result == 'SUCCESS' or build.paused):
|
||||||
successful_job_names.add(job.name)
|
successful_job_names.add(job.name)
|
||||||
elif build and build.result in ('SKIPPED', 'FAILURE'):
|
elif build and build.result in ('SKIPPED', 'FAILURE', 'CANCELED'):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
nodeset = build_set.getJobNodeSet(job.name)
|
nodeset = build_set.getJobNodeSet(job.name)
|
||||||
|
@ -3151,6 +3157,7 @@ class ProjectPipelineConfig(ConfigObject):
|
||||||
self.queue_name = None
|
self.queue_name = None
|
||||||
self.debug = False
|
self.debug = False
|
||||||
self.debug_messages = []
|
self.debug_messages = []
|
||||||
|
self.fail_fast = None
|
||||||
self.variables = {}
|
self.variables = {}
|
||||||
|
|
||||||
def addDebug(self, msg):
|
def addDebug(self, msg):
|
||||||
|
@ -3163,6 +3170,8 @@ class ProjectPipelineConfig(ConfigObject):
|
||||||
self.queue_name = other.queue_name
|
self.queue_name = other.queue_name
|
||||||
if other.debug:
|
if other.debug:
|
||||||
self.debug = other.debug
|
self.debug = other.debug
|
||||||
|
if self.fail_fast is None:
|
||||||
|
self.fail_fast = other.fail_fast
|
||||||
self.job_list.inheritFrom(other.job_list)
|
self.job_list.inheritFrom(other.job_list)
|
||||||
|
|
||||||
def updateVariables(self, other):
|
def updateVariables(self, other):
|
||||||
|
|
|
@ -37,6 +37,7 @@ from zuul.lib.config import get_default
|
||||||
from zuul.lib.gear_utils import getGearmanFunctions
|
from zuul.lib.gear_utils import getGearmanFunctions
|
||||||
from zuul.lib.statsd import get_statsd
|
from zuul.lib.statsd import get_statsd
|
||||||
import zuul.lib.queue
|
import zuul.lib.queue
|
||||||
|
from zuul.model import Build
|
||||||
|
|
||||||
COMMANDS = ['full-reconfigure', 'stop']
|
COMMANDS = ['full-reconfigure', 'stop']
|
||||||
|
|
||||||
|
@ -1419,7 +1420,7 @@ class Scheduler(threading.Thread):
|
||||||
other_change.refresh_deps = True
|
other_change.refresh_deps = True
|
||||||
change.refresh_deps = True
|
change.refresh_deps = True
|
||||||
|
|
||||||
def cancelJob(self, buildset, job, build=None):
|
def cancelJob(self, buildset, job, build=None, final=False):
|
||||||
item = buildset.item
|
item = buildset.item
|
||||||
job_name = job.name
|
job_name = job.name
|
||||||
try:
|
try:
|
||||||
|
@ -1459,6 +1460,13 @@ class Scheduler(threading.Thread):
|
||||||
nodeset = buildset.getJobNodeSet(job_name)
|
nodeset = buildset.getJobNodeSet(job_name)
|
||||||
if nodeset:
|
if nodeset:
|
||||||
self.nodepool.returnNodeSet(nodeset)
|
self.nodepool.returnNodeSet(nodeset)
|
||||||
|
|
||||||
|
if final:
|
||||||
|
# If final is set make sure that the job is not resurrected
|
||||||
|
# later by re-requesting nodes.
|
||||||
|
fakebuild = Build(job, None)
|
||||||
|
fakebuild.result = 'CANCELED'
|
||||||
|
buildset.addBuild(fakebuild)
|
||||||
finally:
|
finally:
|
||||||
# Release the semaphore in any case
|
# Release the semaphore in any case
|
||||||
tenant = buildset.item.pipeline.tenant
|
tenant = buildset.item.pipeline.tenant
|
||||||
|
|
Loading…
Reference in New Issue