Emit some stats from executor

Emit the load average, a counter for builds, and a guage for
running builds.

Change-Id: I8541724f1322b8257b623b3b2cfd8f3e6b95574d
This commit is contained in:
James E. Blair 2017-10-10 15:42:26 -07:00
parent 107bb255f1
commit faf8198f2a
4 changed files with 45 additions and 4 deletions

View File

@ -32,7 +32,7 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
Zuul will report counters for each type of event it receives from Zuul will report counters for each type of event it receives from
each of its configured drivers. each of its configured drivers.
.. stat:: zuul.<tenant>.pipeline .. stat:: zuul.tenant.<tenant>.pipeline
Holds metrics specific to jobs. This hierarchy includes: Holds metrics specific to jobs. This hierarchy includes:
@ -125,6 +125,27 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
How long each item spent in the pipeline before its first job How long each item spent in the pipeline before its first job
started. started.
.. stat:: zuul.executor.<executor>
Holds metrics emitted by individual executors. The ``<executor>``
component of the key will be replaced with the hostname of the
executor.
.. stat:: builds
:type: counter
Incremented each time the executor starts a build.
.. stat:: running_builds
:type: gauge
The number of builds currently running on this executor.
.. stat:: load_average
:type: gauge
The one-minute load average of this executor, multiplied by 100.
As an example, given a job named `myjob` in `mytenant` triggered by a As an example, given a job named `myjob` in `mytenant` triggered by a
change to `myproject` on the `master` branch in the `gate` pipeline change to `myproject` on the `master` branch in the `gate` pipeline

View File

@ -1429,6 +1429,9 @@ class RecordingExecutorServer(zuul.executor.server.ExecutorServer):
be explicitly released. be explicitly released.
""" """
_job_class = RecordingAnsibleJob
def __init__(self, *args, **kw): def __init__(self, *args, **kw):
self._run_ansible = kw.pop('_run_ansible', False) self._run_ansible = kw.pop('_run_ansible', False)
self._test_root = kw.pop('_test_root', False) self._test_root = kw.pop('_test_root', False)
@ -1483,8 +1486,7 @@ class RecordingExecutorServer(zuul.executor.server.ExecutorServer):
args = json.loads(job.arguments) args = json.loads(job.arguments)
args['zuul']['_test'] = dict(test_root=self._test_root) args['zuul']['_test'] = dict(test_root=self._test_root)
job.arguments = json.dumps(args) job.arguments = json.dumps(args)
self.job_workers[job.unique] = RecordingAnsibleJob(self, job) super(RecordingExecutorServer, self).executeJob(job)
self.job_workers[job.unique].run()
def stopJob(self, job): def stopJob(self, job):
self.log.debug("handle stop") self.log.debug("handle stop")

View File

@ -117,6 +117,8 @@ class TestScheduler(ZuulTestCase):
self.assertReportedStat( self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.' 'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.total_changes', value='1|c') 'org_project.master.total_changes', value='1|c')
exec_key = 'zuul.executor.%s' % self.executor_server.hostname
self.assertReportedStat(exec_key + '.builds', value='1|c')
for build in self.history: for build in self.history:
self.assertTrue(build.parameters['zuul']['voting']) self.assertTrue(build.parameters['zuul']['voting'])

View File

@ -29,6 +29,7 @@ import time
import traceback import traceback
from zuul.lib.yamlutil import yaml from zuul.lib.yamlutil import yaml
from zuul.lib.config import get_default from zuul.lib.config import get_default
from zuul.lib.statsd import get_statsd
try: try:
import ara.plugins.callbacks as ara_callbacks import ara.plugins.callbacks as ara_callbacks
@ -1483,6 +1484,7 @@ class ExecutorExecuteWorker(gear.TextWorker):
class ExecutorServer(object): class ExecutorServer(object):
log = logging.getLogger("zuul.ExecutorServer") log = logging.getLogger("zuul.ExecutorServer")
_job_class = AnsibleJob
def __init__(self, config, connections={}, jobdir_root=None, def __init__(self, config, connections={}, jobdir_root=None,
keep_jobdir=False, log_streaming_port=DEFAULT_FINGER_PORT): keep_jobdir=False, log_streaming_port=DEFAULT_FINGER_PORT):
@ -1506,6 +1508,7 @@ class ExecutorServer(object):
nokeep=self.nokeep, nokeep=self.nokeep,
) )
self.statsd = get_statsd(config)
self.merge_root = get_default(self.config, 'executor', 'git_dir', self.merge_root = get_default(self.config, 'executor', 'git_dir',
'/var/lib/zuul/executor-git') '/var/lib/zuul/executor-git')
self.default_username = get_default(self.config, 'executor', self.default_username = get_default(self.config, 'executor',
@ -1652,6 +1655,10 @@ class ExecutorServer(object):
"to worker:") "to worker:")
self.merger_worker.shutdown() self.merger_worker.shutdown()
self.executor_worker.shutdown() self.executor_worker.shutdown()
if self.statsd:
base_key = 'zuul.executor.%s' % self.hostname
self.statsd.gauge(base_key + '.load_average', 0)
self.statsd.gauge(base_key + '.running_builds', 0)
self.log.debug("Stopped") self.log.debug("Stopped")
def pause(self): def pause(self):
@ -1776,7 +1783,10 @@ class ExecutorServer(object):
self.manageLoad() self.manageLoad()
def executeJob(self, job): def executeJob(self, job):
self.job_workers[job.unique] = AnsibleJob(self, job) if self.statsd:
base_key = 'zuul.executor.%s' % self.hostname
self.statsd.incr(base_key + '.builds')
self.job_workers[job.unique] = self._job_class(self, job)
self.job_workers[job.unique].run() self.job_workers[job.unique].run()
def manageLoad(self): def manageLoad(self):
@ -1795,6 +1805,12 @@ class ExecutorServer(object):
"Re-registering as load is within limits {} <= {}".format( "Re-registering as load is within limits {} <= {}".format(
load_avg, self.max_load_avg)) load_avg, self.max_load_avg))
self.register_work() self.register_work()
if self.statsd:
base_key = 'zuul.executor.%s' % self.hostname
self.statsd.gauge(base_key + '.load_average',
int(load_avg * 100))
self.statsd.gauge(base_key + '.running_builds',
len(self.job_workers))
def finishJob(self, unique): def finishJob(self, unique):
del(self.job_workers[unique]) del(self.job_workers[unique])