diff --git a/doc/source/admin/monitoring.rst b/doc/source/admin/monitoring.rst index d43fd035c8..1c17c28361 100644 --- a/doc/source/admin/monitoring.rst +++ b/doc/source/admin/monitoring.rst @@ -131,6 +131,14 @@ These metrics are emitted by the Zuul :ref:`scheduler`: component of the key will be replaced with the hostname of the executor. + .. stat:: merger. + :type: counter + + Incremented to represent the status of a Zuul executor's merger + operations. ```` can be either ``SUCCESS`` or ``FAILURE``. + A failed merge operation which would be accounted for as a ``FAILURE`` + is what ends up being returned by Zuul as a ``MERGER_FAILURE``. + .. stat:: builds :type: counter @@ -148,6 +156,27 @@ These metrics are emitted by the Zuul :ref:`scheduler`: The number of builds currently running on this executor. This includes starting builds. + .. stat:: phase + + Subtree detailing per-phase execution statistics: + + .. stat:: + + ```` represents a phase in the execution of a job. + This can be an *internal* phase (such as ``setup`` or ``cleanup``) as + well as *job* phases such as ``pre``, ``run`` or ``post``. + + .. stat:: + :type: counter + + A counter for each type of result. + These results do not, by themselves, determine the status of a build + but are indicators of the exit status provided by Ansible for the + execution of a particular phase. + + Example of possible counters for each phase are: ``RESULT_NORMAL``, + ``RESULT_TIMED_OUT``, ``RESULT_UNREACHABLE``, ``RESULT_ABORTED``. + .. stat:: load_average :type: gauge diff --git a/zuul/executor/server.py b/zuul/executor/server.py index a2a9b42cdd..a831a53efb 100644 --- a/zuul/executor/server.py +++ b/zuul/executor/server.py @@ -780,8 +780,17 @@ class AnsibleJob(object): ret = merger.mergeChanges(items, repo_state=repo_state) if not ret: # merge conflict result = dict(result='MERGER_FAILURE') + if self.executor_server.statsd: + base_key = ("zuul.executor.%s.merger" % + self.executor_server.hostname) + self.executor_server.statsd.incr(base_key + ".FAILURE") self.job.sendWorkComplete(json.dumps(result)) return False + + if self.executor_server.statsd: + base_key = ("zuul.executor.%s.merger" % + self.executor_server.hostname) + self.executor_server.statsd.incr(base_key + ".SUCCESS") recent = ret[3] for key, commit in recent.items(): (connection, project, branch) = key @@ -1465,6 +1474,11 @@ class AnsibleJob(object): wrapped=False) self.log.debug("Ansible complete, result %s code %s" % ( self.RESULT_MAP[result], code)) + if self.executor_server.statsd: + base_key = ("zuul.executor.%s.phase.setup" % + self.executor_server.hostname) + self.executor_server.statsd.incr(base_key + ".%s" % + self.RESULT_MAP[result]) return result, code def runAnsibleCleanup(self, playbook): @@ -1485,6 +1499,11 @@ class AnsibleJob(object): wrapped=False) self.log.debug("Ansible complete, result %s code %s" % ( self.RESULT_MAP[result], code)) + if self.executor_server.statsd: + base_key = ("zuul.executor.%s.phase.cleanup" % + self.executor_server.hostname) + self.executor_server.statsd.incr(base_key + ".%s" % + self.RESULT_MAP[result]) return result, code def emitPlaybookBanner(self, playbook, step, phase, result=None): @@ -1554,6 +1573,11 @@ class AnsibleJob(object): cmd=cmd, timeout=timeout, playbook=playbook) self.log.debug("Ansible complete, result %s code %s" % ( self.RESULT_MAP[result], code)) + if self.executor_server.statsd: + base_key = ("zuul.executor.%s.phase.%s" % + (self.executor_server.hostname, phase or 'unknown')) + self.executor_server.statsd.incr(base_key + ".%s" % + self.RESULT_MAP[result]) self.emitPlaybookBanner(playbook, 'END', phase, result=result) return result, code