Fix deadlock with nonexistent jobs.

When Zuul tried to launch a non-existent jenkins job, the path
to mark the job as LOST involves calling back into the scheduler
from the launcher while holding the queue lock.  That deadlocks,
so instead, spawn a thread to report the job as lost.

Change-Id: I6a5661638e7edba1c56eb0b3d6384283b3ecc4ed
Reviewed-on: https://review.openstack.org/12889
Approved: James E. Blair <corvus@inaugust.com>
Reviewed-by: James E. Blair <corvus@inaugust.com>
Tested-by: Jenkins
This commit is contained in:
James E. Blair 2012-09-12 18:59:31 +02:00 committed by Jenkins
parent 05fed6074e
commit 7ee88a225d
3 changed files with 50 additions and 3 deletions

View File

@ -460,6 +460,7 @@ class FakeJenkins(object):
self.hold_jobs_in_queue = False
self.hold_jobs_in_build = False
self.fail_tests = {}
self.nonexistent_jobs = []
def fakeEnqueue(self, job):
self.queue.append(job)
@ -505,6 +506,8 @@ class FakeJenkins(object):
return False
def build_job(self, name, parameters):
if name in self.nonexistent_jobs:
raise Exception("Job does not exist")
count = self.job_counter.get(name, 0)
count += 1
self.job_counter[name] = count
@ -1530,3 +1533,39 @@ class testScheduler(unittest.TestCase):
assert A.data['status'] == 'MERGED'
assert A.reported == 2
self.assertEmptyQueues()
def test_nonexistent_job(self):
"Test launching a job that doesn't exist"
self.fake_jenkins.nonexistent_jobs.append('project-merge')
self.jenkins.launch_retry_timeout = 0.1
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addApproval('CRVW', 2)
self.fake_gerrit.addEvent(A.addApproval('APRV', 1))
# There may be a thread about to report a lost change
while A.reported < 2:
self.waitUntilSettled()
jobs = self.fake_jenkins.job_history
job_names = [x['name'] for x in jobs]
assert not job_names
assert A.data['status'] == 'NEW'
assert A.reported == 2
self.assertEmptyQueues()
# Make sure things still work:
self.fake_jenkins.nonexistent_jobs = []
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addApproval('CRVW', 2)
self.fake_gerrit.addEvent(A.addApproval('APRV', 1))
self.waitUntilSettled()
jobs = self.fake_jenkins.job_history
job_names = [x['name'] for x in jobs]
assert 'project-merge' in job_names
assert 'project-test1' in job_names
assert 'project-test2' in job_names
assert jobs[0]['result'] == 'SUCCESS'
assert jobs[1]['result'] == 'SUCCESS'
assert jobs[2]['result'] == 'SUCCESS'
assert A.data['status'] == 'MERGED'
assert A.reported == 2
self.assertEmptyQueues()

View File

@ -190,6 +190,7 @@ class ExtendedJenkins(jenkins.Jenkins):
class Jenkins(object):
log = logging.getLogger("zuul.Jenkins")
launch_retry_timeout = 5
def __init__(self, config, sched):
self.sched = sched
@ -267,7 +268,7 @@ class Jenkins(object):
self.log.exception("Exception launching build %s for "
"job %s for change %s (will retry):" %
(build, job, change))
time.sleep(5)
time.sleep(self.launch_retry_timeout)
if errored:
if launched:
@ -277,10 +278,16 @@ class Jenkins(object):
"declaring lost" % build)
# To keep the queue moving, declare this as a lost build
# so that the change will get dropped.
self.onBuildCompleted(build.uuid, 'LOST', None, None)
t = threading.Thread(target=self.declareBuildLost,
args=(build,))
t.start()
return build
def declareBuildLost(self, build):
# Call this from a new thread to invoke onBuildCompleted from
# a thread that has the queue lock.
self.onBuildCompleted(build.uuid, 'LOST', None, None)
def findBuildInQueue(self, build):
for item in self.jenkins.get_queue_info():
if 'actions' not in item:

View File

@ -707,6 +707,7 @@ class BasePipelineManager(object):
result = job.success_message
elif result == 'FAILURE' and job.failure_message:
result = job.failure_message
url = None
if build.url:
if pattern:
url = pattern.format(change=changeish,