Normalize hostname for statsd

When doing the statsd reporting we need to normalize the
hostname. Otherwise if the hostname is an ip address we get scary
metrics like:

  zuul.executor.10.130.3.21.running_builds:0|g"

So we need to normalize these and replace the '.' and ':' by '_' like
we're doing that in nodepool.

Change-Id: Ide5517fb4a851f801c7ff0816d5e4f5876cc6401
This commit is contained in:
Tobias Henkel 2018-02-23 08:35:42 +01:00
parent 94c489437a
commit f176bbda24
No known key found for this signature in database
GPG Key ID: 03750DEC158E5FA2
3 changed files with 69 additions and 18 deletions

View File

@ -2437,6 +2437,16 @@ class TestScheduler(ZuulTestCase):
self.assertReportedStat('test-timing', '3|ms')
self.assertReportedStat('test-gauge', '12|g')
# test key normalization
statsd.extra_keys = {'hostname': '1_2_3_4'}
statsd.incr('test-incr.{hostname}.{fake}', fake='1:2')
statsd.timing('test-timing.{hostname}.{fake}', 3, fake='1:2')
statsd.gauge('test-gauge.{hostname}.{fake}', 12, fake='1:2')
self.assertReportedStat('test-incr.1_2_3_4.1_2', '1|c')
self.assertReportedStat('test-timing.1_2_3_4.1_2', '3|ms')
self.assertReportedStat('test-gauge.1_2_3_4.1_2', '12|g')
def test_stuck_job_cleanup(self):
"Test that pending jobs are cleaned up if removed from layout"

View File

@ -796,15 +796,13 @@ class AnsibleJob(object):
if not ret: # merge conflict
result = dict(result='MERGER_FAILURE')
if self.executor_server.statsd:
base_key = ("zuul.executor.%s.merger" %
self.executor_server.hostname)
base_key = "zuul.executor.{hostname}.merger"
self.executor_server.statsd.incr(base_key + ".FAILURE")
self.job.sendWorkComplete(json.dumps(result))
return False
if self.executor_server.statsd:
base_key = ("zuul.executor.%s.merger" %
self.executor_server.hostname)
base_key = "zuul.executor.{hostname}.merger"
self.executor_server.statsd.incr(base_key + ".SUCCESS")
recent = ret[3]
for key, commit in recent.items():
@ -1507,8 +1505,7 @@ class AnsibleJob(object):
self.log.debug("Ansible complete, result %s code %s" % (
self.RESULT_MAP[result], code))
if self.executor_server.statsd:
base_key = ("zuul.executor.%s.phase.setup" %
self.executor_server.hostname)
base_key = "zuul.executor.{hostname}.phase.setup"
self.executor_server.statsd.incr(base_key + ".%s" %
self.RESULT_MAP[result])
return result, code
@ -1532,8 +1529,7 @@ class AnsibleJob(object):
self.log.debug("Ansible complete, result %s code %s" % (
self.RESULT_MAP[result], code))
if self.executor_server.statsd:
base_key = ("zuul.executor.%s.phase.cleanup" %
self.executor_server.hostname)
base_key = "zuul.executor.{hostname}.phase.cleanup"
self.executor_server.statsd.incr(base_key + ".%s" %
self.RESULT_MAP[result])
return result, code
@ -1606,10 +1602,11 @@ class AnsibleJob(object):
self.log.debug("Ansible complete, result %s code %s" % (
self.RESULT_MAP[result], code))
if self.executor_server.statsd:
base_key = ("zuul.executor.%s.phase.%s" %
(self.executor_server.hostname, phase or 'unknown'))
self.executor_server.statsd.incr(base_key + ".%s" %
self.RESULT_MAP[result])
base_key = "zuul.executor.{hostname}.phase.{phase}"
self.executor_server.statsd.incr(
base_key + ".{result}",
result=self.RESULT_MAP[result],
phase=phase or 'unknown')
self.emitPlaybookBanner(playbook, 'END', phase, result=result)
return result, code
@ -1673,7 +1670,8 @@ class ExecutorServer(object):
nokeep=self.nokeep,
)
self.statsd = get_statsd(config)
statsd_extra_keys = {'hostname': self.hostname}
self.statsd = get_statsd(config, statsd_extra_keys)
self.merge_root = get_default(self.config, 'executor', 'git_dir',
'/var/lib/zuul/executor-git')
self.default_username = get_default(self.config, 'executor',
@ -1863,7 +1861,7 @@ class ExecutorServer(object):
self.executor_worker.shutdown()
if self.statsd:
base_key = 'zuul.executor.%s' % self.hostname
base_key = 'zuul.executor.{hostname}'
self.statsd.gauge(base_key + '.load_average', 0)
self.statsd.gauge(base_key + '.pct_used_ram', 0)
self.statsd.gauge(base_key + '.running_builds', 0)
@ -2004,7 +2002,7 @@ class ExecutorServer(object):
def executeJob(self, job):
if self.statsd:
base_key = 'zuul.executor.%s' % self.hostname
base_key = 'zuul.executor.{hostname}'
self.statsd.incr(base_key + '.builds')
self.job_workers[job.unique] = self._job_class(self, job)
self.job_workers[job.unique].run()
@ -2061,7 +2059,7 @@ class ExecutorServer(object):
starting_builds, max_starting_builds))
self.register_work()
if self.statsd:
base_key = 'zuul.executor.%s' % self.hostname
base_key = 'zuul.executor.{hostname}'
self.statsd.gauge(base_key + '.load_average',
int(load_avg * 100))
self.statsd.gauge(base_key + '.pct_used_ram',

View File

@ -12,6 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import copy
from zuul.lib.config import get_default
@ -22,9 +23,51 @@ def get_statsd_config(config):
return (statsd_host, statsd_port, statsd_prefix)
def get_statsd(config):
def normalize_statsd_name(name):
name = name.replace('.', '_')
name = name.replace(':', '_')
return name
def get_statsd(config, extra_keys=None):
(statsd_host, statsd_port, statsd_prefix) = get_statsd_config(config)
if statsd_host is None:
return None
import statsd
return statsd.StatsClient(statsd_host, statsd_port, statsd_prefix)
class CustomStatsClient(statsd.StatsClient):
def __init__(self, host, port, prefix, extra=None):
self.extra_keys = copy.copy(extra) or {}
for key in self.extra_keys:
value = normalize_statsd_name(self.extra_keys[key])
self.extra_keys[key] = value
super().__init__(host, port, prefix)
def _format_stat(self, name, **keys):
format_keys = copy.copy(keys)
# we need to normalize all keys which go into the metric name
for key in format_keys.keys():
normalized_value = normalize_statsd_name(format_keys[key])
format_keys[key] = normalized_value
format_keys.update(self.extra_keys)
return name.format(**format_keys)
def gauge(self, stat, value, rate=1, delta=False, **format_keys):
stat = self._format_stat(stat, **format_keys)
super().gauge(stat, value, rate, delta)
def incr(self, stat, count=1, rate=1, **format_keys):
stat = self._format_stat(stat, **format_keys)
super().incr(stat, count, rate)
def timing(self, stat, delta, rate=1, **format_keys):
stat = self._format_stat(stat, **format_keys)
super().timing(stat, delta, rate)
return CustomStatsClient(
statsd_host, statsd_port, statsd_prefix, extra_keys)