Update statsd output for tenants

Update the statsd output to account for tenants and other v3 changes.

Change-Id: I984e1930ab63d9a551cf33be922bac447ad0df9d
This commit is contained in:
James E. Blair 2017-10-09 07:02:40 -07:00
parent bccdfcf63d
commit 80ac158acd
4 changed files with 118 additions and 85 deletions

View File

@ -33,17 +33,13 @@ Metrics
These metrics are emitted by the Zuul :ref:`scheduler`: These metrics are emitted by the Zuul :ref:`scheduler`:
.. stat:: gerrit.event.<type> .. stat:: zuul.event.<driver>.event.<type>
:type: counter :type: counter
Gerrit emits different kinds of messages over its `stream-events` Zuul will report counters for each type of event it receives from
interface. Zuul will report counters for each type of event it each of its configured drivers.
receives from Gerrit.
Refer to your Gerrit installation documentation for a complete .. stat:: zuul.<tenant>.pipeline
list of Gerrit event types.
.. stat:: zuul.pipeline
Holds metrics specific to jobs. This hierarchy includes: Holds metrics specific to jobs. This hierarchy includes:
@ -63,22 +59,60 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
The number of items currently being processed by this The number of items currently being processed by this
pipeline. pipeline.
.. stat:: job .. stat:: project
Subtree detailing per jobs statistics: This hierarchy holds more specific metrics for each project
participating in the pipeline.
.. stat:: <jobname> .. stat:: <canonical_hostname>
The triggered job name. The canonical hostname for the triggering project.
Embedded ``.`` characters will be translated to ``_``.
.. stat:: <result> .. stat:: <project>
:type: counter, timer
A counter for each type of result (e.g., ``SUCCESS`` or The name of the triggering project. Embedded ``/`` or
``FAILURE``, ``ERROR``, etc.) for the job. If the ``.`` characters will be translated to ``_``.
result is ``SUCCESS`` or ``FAILURE``, Zuul will
additionally report the duration of the build as a .. stat:: <branch>
timer.
The name of the triggering branch. Embedded ``/`` or
``.`` characters will be translated to ``_``.
.. stat:: job
Subtree detailing per-project job statistics:
.. stat:: <jobname>
The triggered job name.
.. stat:: <result>
:type: counter, timer
A counter for each type of result (e.g., ``SUCCESS`` or
``FAILURE``, ``ERROR``, etc.) for the job. If the
result is ``SUCCESS`` or ``FAILURE``, Zuul will
additionally report the duration of the build as a
timer.
.. stat:: current_changes
:type: gauge
The number of items of this project currently being
processed by this pipeline.
.. stat:: resident_time
:type: timer
A timer metric reporting how long each item for this
project has been in the pipeline.
.. stat:: total_changes
:type: counter
The number of changes for this project processed by the
pipeline since Zuul started.
.. stat:: resident_time .. stat:: resident_time
:type: timer :type: timer
@ -98,34 +132,12 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
How long each item spent in the pipeline before its first job How long each item spent in the pipeline before its first job
started. started.
.. stat:: <project>
This hierarchy holds more specific metrics for each project As an example, given a job named `myjob` in `mytenant` triggered by a
participating in the pipeline. If the project name contains change to `myproject` on the `master` branch in the `gate` pipeline
a ``/`` character, it will be replaced with a ``.``. which took 40 seconds to build, the Zuul scheduler will emit the
following statsd events:
.. stat:: current_changes * ``zuul.tenant.mytenant.pipeline.gate.project.example_com.myproject.master.job.myjob.SUCCESS`` +1
:type: gauge * ``zuul.tenant.mytenant.pipeline.gate.project.example_com.myproject.master.job.myjob.SUCCESS`` 40 seconds
* ``zuul.tenant.mytenant.pipeline.gate.all_jobs`` +1
The number of items of this project currently being
processed by this pipeline.
.. stat:: resident_time
:type: timer
A timer metric reporting how long each item for this
project has been in the pipeline.
.. stat:: total_changes
:type: counter
The number of changes for this project processed by the
pipeline since Zuul started.
As an example, given a job named `myjob` triggered by the `gate` pipeline
which took 40 seconds to build, the Zuul scheduler will emit the following
statsd events:
* ``zuul.pipeline.gate.job.myjob.SUCCESS`` +1
* ``zuul.pipeline.gate.job.myjob`` 40 seconds
* ``zuul.pipeline.gate.all_jobs`` +1

View File

@ -89,25 +89,34 @@ class TestScheduler(ZuulTestCase):
self.assertEqual(self.getJobFromHistory('project-test2').node, self.assertEqual(self.getJobFromHistory('project-test2').node,
'label1') 'label1')
for stat in self.statsd.stats:
k, v = stat.decode('utf-8').split(':')
self.log.debug('stat %s:%s', k, v)
# TODOv3(jeblair): we may want to report stats by tenant (also?). # TODOv3(jeblair): we may want to report stats by tenant (also?).
# Per-driver # Per-driver
self.assertReportedStat('zuul.event.gerrit.comment-added', value='1|c') self.assertReportedStat('zuul.event.gerrit.comment-added', value='1|c')
# Per-driver per-connection # Per-driver per-connection
self.assertReportedStat('zuul.event.gerrit.gerrit.comment-added', self.assertReportedStat('zuul.event.gerrit.gerrit.comment-added',
value='1|c') value='1|c')
self.assertReportedStat('zuul.pipeline.gate.current_changes',
value='1|g')
self.assertReportedStat('zuul.pipeline.gate.job.project-merge.SUCCESS',
kind='ms')
self.assertReportedStat('zuul.pipeline.gate.job.project-merge.SUCCESS',
value='1|c')
self.assertReportedStat('zuul.pipeline.gate.resident_time', kind='ms')
self.assertReportedStat('zuul.pipeline.gate.total_changes',
value='1|c')
self.assertReportedStat( self.assertReportedStat(
'zuul.pipeline.gate.org.project.resident_time', kind='ms') 'zuul.tenant.tenant-one.pipeline.gate.current_changes',
value='1|g')
self.assertReportedStat( self.assertReportedStat(
'zuul.pipeline.gate.org.project.total_changes', value='1|c') 'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.job.project-merge.SUCCESS', kind='ms')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.job.project-merge.SUCCESS', value='1|c')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.resident_time', kind='ms')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.total_changes', value='1|c')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.resident_time', kind='ms')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.total_changes', value='1|c')
for build in self.history: for build in self.history:
self.assertTrue(build.parameters['zuul']['voting']) self.assertTrue(build.parameters['zuul']['voting'])

View File

@ -820,19 +820,28 @@ class PipelineManager(object):
dt = None dt = None
items = len(self.pipeline.getAllItems()) items = len(self.pipeline.getAllItems())
# stats.timers.zuul.pipeline.NAME.resident_time tenant = self.pipeline.layout.tenant
# stats_counts.zuul.pipeline.NAME.total_changes basekey = 'zuul.tenant.%s' % tenant.name
# stats.gauges.zuul.pipeline.NAME.current_changes key = '%s.pipeline.%s' % (basekey, self.pipeline.name)
key = 'zuul.pipeline.%s' % self.pipeline.name # stats.timers.zuul.tenant.<tenant>.pipeline.<pipeline>.resident_time
# stats_counts.zuul.tenant.<tenant>.pipeline.<pipeline>.total_changes
# stats.gauges.zuul.tenant.<tenant>.pipeline.<pipeline>.current_changes
self.sched.statsd.gauge(key + '.current_changes', items) self.sched.statsd.gauge(key + '.current_changes', items)
if dt: if dt:
self.sched.statsd.timing(key + '.resident_time', dt) self.sched.statsd.timing(key + '.resident_time', dt)
self.sched.statsd.incr(key + '.total_changes') self.sched.statsd.incr(key + '.total_changes')
# stats.timers.zuul.pipeline.NAME.ORG.PROJECT.resident_time hostname = (item.change.project.canonical_hostname.
# stats_counts.zuul.pipeline.NAME.ORG.PROJECT.total_changes replace('.', '_'))
project_name = item.change.project.name.replace('/', '.') projectname = (item.change.project.name.
key += '.%s' % project_name replace('.', '_').replace('/', '.'))
projectname = projectname.replace('.', '_').replace('/', '.')
branchname = item.change.branch.replace('.', '_').replace('/', '.')
# stats.timers.zuul.tenant.<tenant>.pipeline.<pipeline>.
# project.<host>.<project>.<branch>.resident_time
# stats_counts.zuul.tenant.<tenant>.pipeline.<pipeline>.
# project.<host>.<project>.<branch>.total_changes
key += '.project.%s.%s.%s' % (hostname, projectname, branchname)
if dt: if dt:
self.sched.statsd.timing(key + '.resident_time', dt) self.sched.statsd.timing(key + '.resident_time', dt)
self.sched.statsd.incr(key + '.total_changes') self.sched.statsd.incr(key + '.total_changes')

View File

@ -282,31 +282,34 @@ class Scheduler(threading.Thread):
build.result = result build.result = result
try: try:
if self.statsd and build.pipeline: if self.statsd and build.pipeline:
jobname = build.job.name.replace('.', '_') tenant = build.pipeline.layout.tenant
key = 'zuul.pipeline.%s.all_jobs' % build.pipeline.name jobname = build.job.name.replace('.', '_').replace('/', '_')
hostname = (build.build_set.item.change.project.
canonical_hostname.replace('.', '_'))
projectname = (build.build_set.item.change.project.name.
replace('.', '_').replace('/', '_'))
branchname = (build.build_set.item.change.branch.
replace('.', '_').replace('/', '_'))
basekey = 'zuul.tenant.%s' % tenant.name
pipekey = '%s.pipeline.%s' % (basekey, build.pipeline.name)
# zuul.tenant.<tenant>.pipeline.<pipeline>.all_jobs
key = '%s.all_jobs' % pipekey
self.statsd.incr(key) self.statsd.incr(key)
for label in build.node_labels: jobkey = '%s.project.%s.%s.%s.job.%s' % (
# Jenkins includes the node name in its list of labels, so pipekey, hostname, projectname, branchname, jobname)
# we filter it out here, since that is not statistically # zuul.tenant.<tenant>.pipeline.<pipeline>.project.
# interesting. # <host>.<project>.<branch>.job.<job>.<result>
if label == build.node_name: key = '%s.%s' % (jobkey, build.result)
continue
dt = int((build.start_time - build.execute_time) * 1000)
key = 'zuul.pipeline.%s.label.%s.wait_time' % (
build.pipeline.name, label)
self.statsd.timing(key, dt)
key = 'zuul.pipeline.%s.job.%s.%s' % (build.pipeline.name,
jobname, build.result)
if build.result in ['SUCCESS', 'FAILURE'] and build.start_time: if build.result in ['SUCCESS', 'FAILURE'] and build.start_time:
dt = int((build.end_time - build.start_time) * 1000) dt = int((build.end_time - build.start_time) * 1000)
self.statsd.timing(key, dt) self.statsd.timing(key, dt)
self.statsd.incr(key) self.statsd.incr(key)
# zuul.tenant.<tenant>.pipeline.<pipeline>.project.
key = 'zuul.pipeline.%s.job.%s.wait_time' % ( # <host>.<project>.<branch>.job.<job>.wait_time
build.pipeline.name, jobname) key = '%s.wait_time' % jobkey
dt = int((build.start_time - build.execute_time) * 1000) dt = int((build.start_time - build.execute_time) * 1000)
self.statsd.timing(key, dt) self.statsd.timing(key, dt)
except: except Exception:
self.log.exception("Exception reporting runtime stats") self.log.exception("Exception reporting runtime stats")
event = BuildCompletedEvent(build) event = BuildCompletedEvent(build)
self.result_event_queue.put(event) self.result_event_queue.put(event)