Browse Source

Update statsd output for tenants

Update the statsd output to account for tenants and other v3 changes.

Change-Id: I984e1930ab63d9a551cf33be922bac447ad0df9d
changes/80/510580/1
James E. Blair 5 years ago
parent
commit
80ac158acd
  1. 108
      doc/source/admin/monitoring.rst
  2. 31
      tests/unit/test_scheduler.py
  3. 25
      zuul/manager/__init__.py
  4. 39
      zuul/scheduler.py

108
doc/source/admin/monitoring.rst

@ -33,17 +33,13 @@ Metrics
These metrics are emitted by the Zuul :ref:`scheduler`:
.. stat:: gerrit.event.<type>
.. stat:: zuul.event.<driver>.event.<type>
:type: counter
Gerrit emits different kinds of messages over its `stream-events`
interface. Zuul will report counters for each type of event it
receives from Gerrit.
Zuul will report counters for each type of event it receives from
each of its configured drivers.
Refer to your Gerrit installation documentation for a complete
list of Gerrit event types.
.. stat:: zuul.pipeline
.. stat:: zuul.<tenant>.pipeline
Holds metrics specific to jobs. This hierarchy includes:
@ -63,22 +59,60 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
The number of items currently being processed by this
pipeline.
.. stat:: job
.. stat:: project
This hierarchy holds more specific metrics for each project
participating in the pipeline.
.. stat:: <canonical_hostname>
The canonical hostname for the triggering project.
Embedded ``.`` characters will be translated to ``_``.
.. stat:: <project>
The name of the triggering project. Embedded ``/`` or
``.`` characters will be translated to ``_``.
.. stat:: <branch>
The name of the triggering branch. Embedded ``/`` or
``.`` characters will be translated to ``_``.
.. stat:: job
Subtree detailing per-project job statistics:
.. stat:: <jobname>
Subtree detailing per jobs statistics:
The triggered job name.
.. stat:: <jobname>
.. stat:: <result>
:type: counter, timer
The triggered job name.
A counter for each type of result (e.g., ``SUCCESS`` or
``FAILURE``, ``ERROR``, etc.) for the job. If the
result is ``SUCCESS`` or ``FAILURE``, Zuul will
additionally report the duration of the build as a
timer.
.. stat:: <result>
:type: counter, timer
.. stat:: current_changes
:type: gauge
A counter for each type of result (e.g., ``SUCCESS`` or
``FAILURE``, ``ERROR``, etc.) for the job. If the
result is ``SUCCESS`` or ``FAILURE``, Zuul will
additionally report the duration of the build as a
timer.
The number of items of this project currently being
processed by this pipeline.
.. stat:: resident_time
:type: timer
A timer metric reporting how long each item for this
project has been in the pipeline.
.. stat:: total_changes
:type: counter
The number of changes for this project processed by the
pipeline since Zuul started.
.. stat:: resident_time
:type: timer
@ -98,34 +132,12 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
How long each item spent in the pipeline before its first job
started.
.. stat:: <project>
This hierarchy holds more specific metrics for each project
participating in the pipeline. If the project name contains
a ``/`` character, it will be replaced with a ``.``.
.. stat:: current_changes
:type: gauge
The number of items of this project currently being
processed by this pipeline.
.. stat:: resident_time
:type: timer
A timer metric reporting how long each item for this
project has been in the pipeline.
.. stat:: total_changes
:type: counter
The number of changes for this project processed by the
pipeline since Zuul started.
As an example, given a job named `myjob` triggered by the `gate` pipeline
which took 40 seconds to build, the Zuul scheduler will emit the following
statsd events:
As an example, given a job named `myjob` in `mytenant` triggered by a
change to `myproject` on the `master` branch in the `gate` pipeline
which took 40 seconds to build, the Zuul scheduler will emit the
following statsd events:
* ``zuul.pipeline.gate.job.myjob.SUCCESS`` +1
* ``zuul.pipeline.gate.job.myjob`` 40 seconds
* ``zuul.pipeline.gate.all_jobs`` +1
* ``zuul.tenant.mytenant.pipeline.gate.project.example_com.myproject.master.job.myjob.SUCCESS`` +1
* ``zuul.tenant.mytenant.pipeline.gate.project.example_com.myproject.master.job.myjob.SUCCESS`` 40 seconds
* ``zuul.tenant.mytenant.pipeline.gate.all_jobs`` +1

31
tests/unit/test_scheduler.py

@ -89,25 +89,34 @@ class TestScheduler(ZuulTestCase):
self.assertEqual(self.getJobFromHistory('project-test2').node,
'label1')
for stat in self.statsd.stats:
k, v = stat.decode('utf-8').split(':')
self.log.debug('stat %s:%s', k, v)
# TODOv3(jeblair): we may want to report stats by tenant (also?).
# Per-driver
self.assertReportedStat('zuul.event.gerrit.comment-added', value='1|c')
# Per-driver per-connection
self.assertReportedStat('zuul.event.gerrit.gerrit.comment-added',
value='1|c')
self.assertReportedStat('zuul.pipeline.gate.current_changes',
value='1|g')
self.assertReportedStat('zuul.pipeline.gate.job.project-merge.SUCCESS',
kind='ms')
self.assertReportedStat('zuul.pipeline.gate.job.project-merge.SUCCESS',
value='1|c')
self.assertReportedStat('zuul.pipeline.gate.resident_time', kind='ms')
self.assertReportedStat('zuul.pipeline.gate.total_changes',
value='1|c')
self.assertReportedStat(
'zuul.pipeline.gate.org.project.resident_time', kind='ms')
'zuul.tenant.tenant-one.pipeline.gate.current_changes',
value='1|g')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.job.project-merge.SUCCESS', kind='ms')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.job.project-merge.SUCCESS', value='1|c')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.resident_time', kind='ms')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.total_changes', value='1|c')
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.resident_time', kind='ms')
self.assertReportedStat(
'zuul.pipeline.gate.org.project.total_changes', value='1|c')
'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.total_changes', value='1|c')
for build in self.history:
self.assertTrue(build.parameters['zuul']['voting'])

25
zuul/manager/__init__.py

@ -820,19 +820,28 @@ class PipelineManager(object):
dt = None
items = len(self.pipeline.getAllItems())
# stats.timers.zuul.pipeline.NAME.resident_time
# stats_counts.zuul.pipeline.NAME.total_changes
# stats.gauges.zuul.pipeline.NAME.current_changes
key = 'zuul.pipeline.%s' % self.pipeline.name
tenant = self.pipeline.layout.tenant
basekey = 'zuul.tenant.%s' % tenant.name
key = '%s.pipeline.%s' % (basekey, self.pipeline.name)
# stats.timers.zuul.tenant.<tenant>.pipeline.<pipeline>.resident_time
# stats_counts.zuul.tenant.<tenant>.pipeline.<pipeline>.total_changes
# stats.gauges.zuul.tenant.<tenant>.pipeline.<pipeline>.current_changes
self.sched.statsd.gauge(key + '.current_changes', items)
if dt:
self.sched.statsd.timing(key + '.resident_time', dt)
self.sched.statsd.incr(key + '.total_changes')
# stats.timers.zuul.pipeline.NAME.ORG.PROJECT.resident_time
# stats_counts.zuul.pipeline.NAME.ORG.PROJECT.total_changes
project_name = item.change.project.name.replace('/', '.')
key += '.%s' % project_name
hostname = (item.change.project.canonical_hostname.
replace('.', '_'))
projectname = (item.change.project.name.
replace('.', '_').replace('/', '.'))
projectname = projectname.replace('.', '_').replace('/', '.')
branchname = item.change.branch.replace('.', '_').replace('/', '.')
# stats.timers.zuul.tenant.<tenant>.pipeline.<pipeline>.
# project.<host>.<project>.<branch>.resident_time
# stats_counts.zuul.tenant.<tenant>.pipeline.<pipeline>.
# project.<host>.<project>.<branch>.total_changes
key += '.project.%s.%s.%s' % (hostname, projectname, branchname)
if dt:
self.sched.statsd.timing(key + '.resident_time', dt)
self.sched.statsd.incr(key + '.total_changes')

39
zuul/scheduler.py

@ -282,31 +282,34 @@ class Scheduler(threading.Thread):
build.result = result
try:
if self.statsd and build.pipeline:
jobname = build.job.name.replace('.', '_')
key = 'zuul.pipeline.%s.all_jobs' % build.pipeline.name
tenant = build.pipeline.layout.tenant
jobname = build.job.name.replace('.', '_').replace('/', '_')
hostname = (build.build_set.item.change.project.
canonical_hostname.replace('.', '_'))
projectname = (build.build_set.item.change.project.name.
replace('.', '_').replace('/', '_'))
branchname = (build.build_set.item.change.branch.
replace('.', '_').replace('/', '_'))
basekey = 'zuul.tenant.%s' % tenant.name
pipekey = '%s.pipeline.%s' % (basekey, build.pipeline.name)
# zuul.tenant.<tenant>.pipeline.<pipeline>.all_jobs
key = '%s.all_jobs' % pipekey
self.statsd.incr(key)
for label in build.node_labels:
# Jenkins includes the node name in its list of labels, so
# we filter it out here, since that is not statistically
# interesting.
if label == build.node_name:
continue
dt = int((build.start_time - build.execute_time) * 1000)
key = 'zuul.pipeline.%s.label.%s.wait_time' % (
build.pipeline.name, label)
self.statsd.timing(key, dt)
key = 'zuul.pipeline.%s.job.%s.%s' % (build.pipeline.name,
jobname, build.result)
jobkey = '%s.project.%s.%s.%s.job.%s' % (
pipekey, hostname, projectname, branchname, jobname)
# zuul.tenant.<tenant>.pipeline.<pipeline>.project.
# <host>.<project>.<branch>.job.<job>.<result>
key = '%s.%s' % (jobkey, build.result)
if build.result in ['SUCCESS', 'FAILURE'] and build.start_time:
dt = int((build.end_time - build.start_time) * 1000)
self.statsd.timing(key, dt)
self.statsd.incr(key)
key = 'zuul.pipeline.%s.job.%s.wait_time' % (
build.pipeline.name, jobname)
# zuul.tenant.<tenant>.pipeline.<pipeline>.project.
# <host>.<project>.<branch>.job.<job>.wait_time
key = '%s.wait_time' % jobkey
dt = int((build.start_time - build.execute_time) * 1000)
self.statsd.timing(key, dt)
except:
except Exception:
self.log.exception("Exception reporting runtime stats")
event = BuildCompletedEvent(build)
self.result_event_queue.put(event)

Loading…
Cancel
Save