Fix deadlock with nonexistent jobs.
When Zuul tried to launch a non-existent jenkins job, the path to mark the job as LOST involves calling back into the scheduler from the launcher while holding the queue lock. That deadlocks, so instead, spawn a thread to report the job as lost. Change-Id: I6a5661638e7edba1c56eb0b3d6384283b3ecc4ed Reviewed-on: https://review.openstack.org/12889 Approved: James E. Blair <corvus@inaugust.com> Reviewed-by: James E. Blair <corvus@inaugust.com> Tested-by: Jenkins
This commit is contained in:
parent
05fed6074e
commit
7ee88a225d
|
@ -460,6 +460,7 @@ class FakeJenkins(object):
|
|||
self.hold_jobs_in_queue = False
|
||||
self.hold_jobs_in_build = False
|
||||
self.fail_tests = {}
|
||||
self.nonexistent_jobs = []
|
||||
|
||||
def fakeEnqueue(self, job):
|
||||
self.queue.append(job)
|
||||
|
@ -505,6 +506,8 @@ class FakeJenkins(object):
|
|||
return False
|
||||
|
||||
def build_job(self, name, parameters):
|
||||
if name in self.nonexistent_jobs:
|
||||
raise Exception("Job does not exist")
|
||||
count = self.job_counter.get(name, 0)
|
||||
count += 1
|
||||
self.job_counter[name] = count
|
||||
|
@ -1530,3 +1533,39 @@ class testScheduler(unittest.TestCase):
|
|||
assert A.data['status'] == 'MERGED'
|
||||
assert A.reported == 2
|
||||
self.assertEmptyQueues()
|
||||
|
||||
def test_nonexistent_job(self):
|
||||
"Test launching a job that doesn't exist"
|
||||
self.fake_jenkins.nonexistent_jobs.append('project-merge')
|
||||
self.jenkins.launch_retry_timeout = 0.1
|
||||
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
A.addApproval('CRVW', 2)
|
||||
self.fake_gerrit.addEvent(A.addApproval('APRV', 1))
|
||||
# There may be a thread about to report a lost change
|
||||
while A.reported < 2:
|
||||
self.waitUntilSettled()
|
||||
jobs = self.fake_jenkins.job_history
|
||||
job_names = [x['name'] for x in jobs]
|
||||
assert not job_names
|
||||
assert A.data['status'] == 'NEW'
|
||||
assert A.reported == 2
|
||||
self.assertEmptyQueues()
|
||||
|
||||
# Make sure things still work:
|
||||
self.fake_jenkins.nonexistent_jobs = []
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
A.addApproval('CRVW', 2)
|
||||
self.fake_gerrit.addEvent(A.addApproval('APRV', 1))
|
||||
self.waitUntilSettled()
|
||||
jobs = self.fake_jenkins.job_history
|
||||
job_names = [x['name'] for x in jobs]
|
||||
assert 'project-merge' in job_names
|
||||
assert 'project-test1' in job_names
|
||||
assert 'project-test2' in job_names
|
||||
assert jobs[0]['result'] == 'SUCCESS'
|
||||
assert jobs[1]['result'] == 'SUCCESS'
|
||||
assert jobs[2]['result'] == 'SUCCESS'
|
||||
assert A.data['status'] == 'MERGED'
|
||||
assert A.reported == 2
|
||||
self.assertEmptyQueues()
|
||||
|
|
|
@ -190,6 +190,7 @@ class ExtendedJenkins(jenkins.Jenkins):
|
|||
|
||||
class Jenkins(object):
|
||||
log = logging.getLogger("zuul.Jenkins")
|
||||
launch_retry_timeout = 5
|
||||
|
||||
def __init__(self, config, sched):
|
||||
self.sched = sched
|
||||
|
@ -267,7 +268,7 @@ class Jenkins(object):
|
|||
self.log.exception("Exception launching build %s for "
|
||||
"job %s for change %s (will retry):" %
|
||||
(build, job, change))
|
||||
time.sleep(5)
|
||||
time.sleep(self.launch_retry_timeout)
|
||||
|
||||
if errored:
|
||||
if launched:
|
||||
|
@ -277,10 +278,16 @@ class Jenkins(object):
|
|||
"declaring lost" % build)
|
||||
# To keep the queue moving, declare this as a lost build
|
||||
# so that the change will get dropped.
|
||||
self.onBuildCompleted(build.uuid, 'LOST', None, None)
|
||||
|
||||
t = threading.Thread(target=self.declareBuildLost,
|
||||
args=(build,))
|
||||
t.start()
|
||||
return build
|
||||
|
||||
def declareBuildLost(self, build):
|
||||
# Call this from a new thread to invoke onBuildCompleted from
|
||||
# a thread that has the queue lock.
|
||||
self.onBuildCompleted(build.uuid, 'LOST', None, None)
|
||||
|
||||
def findBuildInQueue(self, build):
|
||||
for item in self.jenkins.get_queue_info():
|
||||
if 'actions' not in item:
|
||||
|
|
|
@ -707,6 +707,7 @@ class BasePipelineManager(object):
|
|||
result = job.success_message
|
||||
elif result == 'FAILURE' and job.failure_message:
|
||||
result = job.failure_message
|
||||
url = None
|
||||
if build.url:
|
||||
if pattern:
|
||||
url = pattern.format(change=changeish,
|
||||
|
|
Loading…
Reference in New Issue