Fix exception calculating run time for lost builds.

When a build is reported as lost, it has no start_time, so the
code that calculated the run-time for statsd threw an exception.

Correct that by only calculating the run time for jobs that have
really completed.

Add debug lines to onBuild* that would have helped diagnose this.

Keep track of the throwaway threads that are used to report LOST
builds so that the test suite can more correctly know when the
system is settled.

Add a test case for a single non-existent post job.

Add STATSD_* env variables in tox to make sure that the statsd
code paths are executed by tests.

Change-Id: I339443cbad7b0858d70e9180d63dfb701b8eaee9
Reviewed-on: https://review.openstack.org/18792
Reviewed-by: Clark Boylan <clark.boylan@gmail.com>
Approved: Jeremy Stanley <fungi@yuggoth.org>
Reviewed-by: Jeremy Stanley <fungi@yuggoth.org>
Tested-by: Jenkins
This commit is contained in:
James E. Blair 2012-12-31 17:01:50 -08:00 committed by Jenkins
parent 5f1104216a
commit f62d428f3b
4 changed files with 39 additions and 2 deletions

View File

@ -696,6 +696,7 @@ class testScheduler(unittest.TestCase):
if (self.sched.trigger_event_queue.empty() and
self.sched.result_event_queue.empty() and
self.fake_gerrit.event_queue.empty() and
len(self.jenkins.lost_threads) == 0 and
self.fake_jenkins.fakeAllWaiting()):
self.sched.queue_lock.release()
self.log.debug("...settled.")
@ -712,7 +713,8 @@ class testScheduler(unittest.TestCase):
for pipeline in self.sched.pipelines.values():
for queue in pipeline.queues:
if len(queue.queue) != 0:
print 'queue', queue.queue
print 'pipeline %s queue %s contents %s' % (
pipeline.name, queue.name, queue.queue)
assert len(queue.queue) == 0
if len(queue.severed_heads) != 0:
print 'heads', queue.severed_heads
@ -1635,3 +1637,27 @@ class testScheduler(unittest.TestCase):
assert A.data['status'] == 'MERGED'
assert A.reported == 2
self.assertEmptyQueues()
def test_single_nonexistent_post_job(self):
"Test launching a single post job that doesn't exist"
self.fake_jenkins.nonexistent_jobs.append('project-post')
self.jenkins.launch_retry_timeout = 0.1
e = {
"type": "ref-updated",
"submitter": {
"name": "User Name",
},
"refUpdate": {
"oldRev": "90f173846e3af9154517b88543ffbd1691f31366",
"newRev": "d479a0bfcb34da57a31adb2a595c0cf687812543",
"refName": "master",
"project": "org/project",
}
}
self.fake_gerrit.addEvent(e)
self.waitUntilSettled()
jobs = self.fake_jenkins.job_history
assert len(jobs) == 0
self.assertEmptyQueues()

View File

@ -2,6 +2,9 @@
envlist = pep8, pyflakes, py27
[testenv]
# Set STATSD env variables so that statsd code paths are tested.
setenv = STATSD_HOST=localhost
STATSD_PORT=8125
deps = -r{toxinidir}/tools/pip-requires
-r{toxinidir}/tools/test-requires
commands = nosetests {posargs}

View File

@ -219,6 +219,9 @@ class Jenkins(object):
self.callback_thread.start()
self.cleanup_thread = JenkinsCleanup(self)
self.cleanup_thread.start()
# Keep track of threads that will report a lost build in the future,
# in aid of testing
self.lost_threads = []
def stop(self):
self.cleanup_thread.stop()
@ -326,6 +329,7 @@ class Jenkins(object):
# so that the change will get dropped.
t = threading.Thread(target=self.declareBuildLost,
args=(build,))
self.lost_threads.append(t)
t.start()
return build
@ -333,6 +337,7 @@ class Jenkins(object):
# Call this from a new thread to invoke onBuildCompleted from
# a thread that has the queue lock.
self.onBuildCompleted(build.uuid, 'LOST', None, None)
self.lost_threads.remove(threading.currentThread())
def findBuildInQueue(self, build):
for item in self.jenkins.get_queue_info():

View File

@ -224,6 +224,7 @@ class Scheduler(threading.Thread):
self.trigger_event_queue.put(event)
self.queue_lock.release()
self.wake_event.set()
self.log.debug("Done adding trigger event: %s" % event)
def onBuildStarted(self, build):
self.log.debug("Adding start event for build: %s" % build)
@ -232,20 +233,22 @@ class Scheduler(threading.Thread):
self.result_event_queue.put(('started', build))
self.queue_lock.release()
self.wake_event.set()
self.log.debug("Done adding start event for build: %s" % build)
def onBuildCompleted(self, build):
self.log.debug("Adding complete event for build: %s" % build)
build.end_time = time.time()
if statsd:
dt = int((build.end_time - build.start_time) * 1000)
key = 'zuul.job.%s' % build.job.name
if build.result in ['SUCCESS', 'FAILURE']:
dt = int((build.end_time - build.start_time) * 1000)
statsd.timing(key, dt)
statsd.incr(key)
self.queue_lock.acquire()
self.result_event_queue.put(('completed', build))
self.queue_lock.release()
self.wake_event.set()
self.log.debug("Done adding complete event for build: %s" % build)
def reconfigure(self, config):
self.log.debug("Prepare to reconfigure")