Add attempts logic for jobs
Today, if a job is aborted, zuul will launch said job until success / failure. If the job continues to abort, it will loop forever. As a result, we now added the ability to limit this. By default we'll try to relaunch an aborted job a total of 3 times, before RETRY_LIMIT is returned as the result. Change-Id: Ie26fdc29c07430ebfb3df8be8ac1786d63d7e0fe Signed-off-by: Paul Belanger <pabelanger@redhat.com>
This commit is contained in:
parent
38ce39fe58
commit
71d9817406
@ -803,6 +803,11 @@ each job as it builds a list from the project specification.
|
||||
Boolean value (``true`` or ``false``) that indicates whatever
|
||||
a job is voting or not. Default: ``true``.
|
||||
|
||||
**attempts (optional)**
|
||||
Number of attempts zuul will launch a job. Once reached, zuul will report
|
||||
RETRY_LIMIT as the job result.
|
||||
Defaults to 3.
|
||||
|
||||
**tags (optional)**
|
||||
A list of arbitrary strings which will be associated with the job.
|
||||
Can be used by the parameter-function to alter behavior based on
|
||||
|
@ -540,6 +540,7 @@ class FakeBuild(threading.Thread):
|
||||
self.wait_condition = threading.Condition()
|
||||
self.waiting = False
|
||||
self.aborted = False
|
||||
self.requeue = False
|
||||
self.created = time.time()
|
||||
self.description = ''
|
||||
self.run_error = False
|
||||
@ -602,6 +603,8 @@ class FakeBuild(threading.Thread):
|
||||
result = 'FAILURE'
|
||||
if self.aborted:
|
||||
result = 'ABORTED'
|
||||
if self.requeue:
|
||||
result = None
|
||||
|
||||
if self.run_error:
|
||||
work_fail = True
|
||||
|
30
tests/fixtures/layout-abort-attempts.yaml
vendored
Normal file
30
tests/fixtures/layout-abort-attempts.yaml
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
pipelines:
|
||||
- name: check
|
||||
manager: IndependentPipelineManager
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: patchset-created
|
||||
success:
|
||||
gerrit:
|
||||
verified: 1
|
||||
failure:
|
||||
gerrit:
|
||||
verified: -1
|
||||
|
||||
- name: post
|
||||
manager: IndependentPipelineManager
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: ref-updated
|
||||
ref: ^(?!refs/).*$
|
||||
|
||||
jobs:
|
||||
- name: project-test1
|
||||
attempts: 4
|
||||
|
||||
projects:
|
||||
- name: org/project
|
||||
check:
|
||||
- project-merge:
|
||||
- project-test1
|
||||
- project-test2
|
@ -4481,3 +4481,36 @@ For CI problems and help debugging, contact ci@example.org"""
|
||||
self.assertIn(
|
||||
'- docs-draft-test2 https://server/job/docs-draft-test2/1/',
|
||||
body[3])
|
||||
|
||||
def test_rerun_on_abort(self):
|
||||
"Test that if a worker fails to run a job, it is run again"
|
||||
|
||||
self.config.set('zuul', 'layout_config',
|
||||
'tests/fixtures/layout-abort-attempts.yaml')
|
||||
self.sched.reconfigure(self.config)
|
||||
self.worker.hold_jobs_in_build = True
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.worker.release('.*-merge')
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertEqual(len(self.builds), 2)
|
||||
self.builds[0].requeue = True
|
||||
self.worker.release('.*-test*')
|
||||
self.waitUntilSettled()
|
||||
|
||||
for x in range(3):
|
||||
self.assertEqual(len(self.builds), 1)
|
||||
self.builds[0].requeue = True
|
||||
self.worker.release('.*-test1')
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.worker.hold_jobs_in_build = False
|
||||
self.worker.release()
|
||||
self.waitUntilSettled()
|
||||
self.assertEqual(len(self.history), 6)
|
||||
self.assertEqual(self.countJobResults(self.history, 'SUCCESS'), 2)
|
||||
self.assertEqual(A.reported, 1)
|
||||
self.assertIn('RETRY_LIMIT', A.messages[0])
|
||||
|
@ -367,6 +367,12 @@ class Gearman(object):
|
||||
self.onBuildCompleted(gearman_job, 'NOT_REGISTERED')
|
||||
return build
|
||||
|
||||
# NOTE(pabelanger): Rather then looping forever, check to see if job
|
||||
# has passed attempts limit.
|
||||
if item.current_build_set.getTries(job.name) > job.attempts:
|
||||
self.onBuildCompleted(gearman_job, 'RETRY_LIMIT')
|
||||
return build
|
||||
|
||||
if pipeline.precedence == zuul.model.PRECEDENCE_NORMAL:
|
||||
precedence = gear.PRECEDENCE_NORMAL
|
||||
elif pipeline.precedence == zuul.model.PRECEDENCE_HIGH:
|
||||
|
@ -103,6 +103,7 @@ class LayoutSchema(object):
|
||||
'success-pattern': str,
|
||||
'hold-following-changes': bool,
|
||||
'voting': bool,
|
||||
'attempts': int,
|
||||
'mutex': str,
|
||||
'tags': toList(str),
|
||||
'parameter-function': str,
|
||||
|
@ -466,6 +466,8 @@ class Job(object):
|
||||
self._files = []
|
||||
self.skip_if_matcher = None
|
||||
self.swift = {}
|
||||
# Number of attempts to launch a job before giving up.
|
||||
self.attempts = 3
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
@ -646,6 +648,7 @@ class BuildSet(object):
|
||||
self.unable_to_merge = False
|
||||
self.failing_reasons = []
|
||||
self.merge_state = self.NEW
|
||||
self.tries = {}
|
||||
|
||||
def __repr__(self):
|
||||
return '<BuildSet item: %s #builds: %s merge state: %s>' % (
|
||||
@ -671,9 +674,12 @@ class BuildSet(object):
|
||||
|
||||
def addBuild(self, build):
|
||||
self.builds[build.job.name] = build
|
||||
if build.job.name not in self.tries:
|
||||
self.tries[build.job.name] = 1
|
||||
build.build_set = self
|
||||
|
||||
def removeBuild(self, build):
|
||||
self.tries[build.job.name] += 1
|
||||
del self.builds[build.job.name]
|
||||
|
||||
def getBuild(self, job_name):
|
||||
@ -684,6 +690,9 @@ class BuildSet(object):
|
||||
keys.sort()
|
||||
return [self.builds.get(x) for x in keys]
|
||||
|
||||
def getTries(self, job_name):
|
||||
return self.tries.get(job_name)
|
||||
|
||||
|
||||
class QueueItem(object):
|
||||
"""A changish inside of a Pipeline queue"""
|
||||
|
@ -529,6 +529,7 @@ class Scheduler(threading.Thread):
|
||||
m = config_job.get('hold-following-changes', False)
|
||||
if m:
|
||||
job.hold_following_changes = True
|
||||
job.attempts = config_job.get('attempts', 3)
|
||||
m = config_job.get('voting', None)
|
||||
if m is not None:
|
||||
job.voting = m
|
||||
|
Loading…
Reference in New Issue
Block a user