Report the per-job build wait time to graphite
Knowing how long a job waits to build in aggregate can give useful insights into the performance and capacity of the build system. This change also uses the node labels sent back from the gearman worker to submit metrics within that context. Change-Id: I173251c8b1569755124b7cb1a48b6274bf38c94b Depends-On: Ibca938fcf8a65facd7e39dab4eb994dfc637722a
This commit is contained in:
parent
f8e892567c
commit
b2332081d8
|
@ -598,6 +598,8 @@ class FakeBuild(threading.Thread):
|
|||
result = 'RUN_ERROR'
|
||||
else:
|
||||
data['result'] = result
|
||||
data['node_labels'] = ['bare-necessities']
|
||||
data['node_name'] = 'foo'
|
||||
work_fail = False
|
||||
|
||||
changes = None
|
||||
|
|
|
@ -232,6 +232,12 @@ class Client(zuul.cmd.ZuulApp):
|
|||
'number': {
|
||||
'title': 'Number'
|
||||
},
|
||||
'node_labels': {
|
||||
'title': 'Node Labels'
|
||||
},
|
||||
'node_name': {
|
||||
'title': 'Node Name'
|
||||
},
|
||||
'worker.name': {
|
||||
'title': 'Worker'
|
||||
},
|
||||
|
@ -245,7 +251,7 @@ class Client(zuul.cmd.ZuulApp):
|
|||
'worker.fqdn': {
|
||||
'title': 'Worker Domain'
|
||||
},
|
||||
'worker.progam': {
|
||||
'worker.program': {
|
||||
'title': 'Worker Program'
|
||||
},
|
||||
'worker.version': {
|
||||
|
|
|
@ -423,9 +423,11 @@ class Gearman(object):
|
|||
|
||||
build = self.builds.get(job.unique)
|
||||
if build:
|
||||
data = getJobData(job)
|
||||
build.node_labels = data.get('node_labels')
|
||||
build.node_name = data.get('node_name')
|
||||
if not build.canceled:
|
||||
if result is None:
|
||||
data = getJobData(job)
|
||||
result = data.get('result')
|
||||
if result is None:
|
||||
build.retry = True
|
||||
|
|
|
@ -578,6 +578,8 @@ class Build(object):
|
|||
self.retry = False
|
||||
self.parameters = {}
|
||||
self.worker = Worker()
|
||||
self.node_labels = []
|
||||
self.node_name = None
|
||||
|
||||
def __repr__(self):
|
||||
return ('<Build %s of %s on %s>' %
|
||||
|
@ -799,7 +801,9 @@ class QueueItem(object):
|
|||
'canceled': build.canceled if build else None,
|
||||
'retry': build.retry if build else None,
|
||||
'number': build.number if build else None,
|
||||
'worker': worker
|
||||
'node_labels': build.node_labels if build else [],
|
||||
'node_name': build.node_name if build else None,
|
||||
'worker': worker,
|
||||
})
|
||||
|
||||
if self.pipeline.haveAllJobsStarted(self):
|
||||
|
|
|
@ -533,6 +533,16 @@ class Scheduler(threading.Thread):
|
|||
def onBuildStarted(self, build):
|
||||
self.log.debug("Adding start event for build: %s" % build)
|
||||
build.start_time = time.time()
|
||||
try:
|
||||
if statsd and build.pipeline:
|
||||
jobname = build.job.name.replace('.', '_')
|
||||
key = 'zuul.pipeline.%s.job.%s.wait_time' % (
|
||||
build.pipeline.name, jobname)
|
||||
dt = int((build.start_time - build.launch_time) * 1000)
|
||||
statsd.timing(key, dt)
|
||||
statsd.incr(key)
|
||||
except:
|
||||
self.log.exception("Exception reporting runtime stats")
|
||||
event = BuildStartedEvent(build)
|
||||
self.result_event_queue.put(event)
|
||||
self.wake_event.set()
|
||||
|
@ -544,14 +554,24 @@ class Scheduler(threading.Thread):
|
|||
try:
|
||||
if statsd and build.pipeline:
|
||||
jobname = build.job.name.replace('.', '_')
|
||||
key = 'zuul.pipeline.%s.all_jobs' % build.pipeline.name
|
||||
for label in build.node_labels:
|
||||
# Jenkins includes the node name in its list of labels, so
|
||||
# we filter it out here, since that is not statistically
|
||||
# interesting.
|
||||
if label == build.node_name:
|
||||
continue
|
||||
dt = int((build.start_time - build.launch_time) * 1000)
|
||||
key = 'zuul.node_type.%s.job.%s.wait_time' % (
|
||||
label, jobname)
|
||||
statsd.timing(key, dt)
|
||||
statsd.incr(key)
|
||||
key = 'zuul.pipeline.%s.job.%s.%s' % (build.pipeline.name,
|
||||
jobname, build.result)
|
||||
if build.result in ['SUCCESS', 'FAILURE'] and build.start_time:
|
||||
dt = int((build.end_time - build.start_time) * 1000)
|
||||
statsd.timing(key, dt)
|
||||
statsd.incr(key)
|
||||
key = 'zuul.pipeline.%s.all_jobs' % build.pipeline.name
|
||||
statsd.incr(key)
|
||||
except:
|
||||
self.log.exception("Exception reporting runtime stats")
|
||||
event = BuildCompletedEvent(build)
|
||||
|
|
Loading…
Reference in New Issue