Add zookeeper-statsd

This adds a program, zookeeper-statsd, which monitors zookeeper
metrics and reports them to statsd.  It also adds a container to
run that program.  And it runs the container on each of the
ZooKeeper quorum members.  And it updates the graphite host to
allow statsd traffic from quorum members.  And it updates the
4-letter-word whitelist to allow the mntr command (which is used
to gather metrics) to be issued.

Change-Id: I298f0b13a05cc615d8496edd4622438507fc5423
This commit is contained in:
James E. Blair 2021-03-17 12:57:21 -07:00
parent b2b1a9062d
commit 96bac7b486
10 changed files with 257 additions and 7 deletions

View File

@ -0,0 +1,21 @@
# Copyright (c) 2019 Red Hat, Inc.
# Copyright (c) 2021 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM docker.io/opendevorg/python-base:3.7
COPY zookeeper-statsd.py /usr/local/bin
RUN pip install statsd
CMD ["/usr/local/bin/zookeeper-statsd.py"]

View File

@ -0,0 +1,142 @@
#!/usr/bin/env python3
# Copyright (C) 2015 Hewlett-Packard Development Company, L.P.
# Copyright (C) 2021 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import re
import socket
import time
from statsd.defaults.env import statsd
INTERVAL = 10
GAUGES = [
'zk_avg_latency',
'zk_min_latency',
'zk_max_latency',
'zk_outstanding_requests',
'zk_znode_count',
'zk_followers',
'zk_synced_followers',
'zk_pending_syncs',
'zk_watch_count',
'zk_ephemerals_count',
'zk_approximate_data_size',
'zk_open_file_descriptor_count',
'zk_max_file_descriptor_count',
]
COUNTERS = [
'zk_packets_received',
'zk_packets_sent',
]
class Socket:
def __init__(self, host, port):
self.host = host
self.port = port
self.socket = None
def open(self):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(5)
s.connect((self.host, self.port))
self.socket = s
def __enter__(self):
self.open()
return self.socket
def __exit__(self, etype, value, tb):
self.socket.close()
self.socket = None
class ZooKeeperStats:
def __init__(self, host, port=2181):
self.socket = Socket(host, port)
# The hostname to use when reporting stats (e.g., zk01)
if host in ('localhost', '127.0.0.1', '::1'):
self.hostname = socket.gethostname()
else:
self.hostname = host
self.log = logging.getLogger("ZooKeeperStats")
self.prevdata = {}
def command(self, command):
with self.socket as socket:
socket.send((command + '\n').encode('utf8'))
data = ''
while True:
r = socket.recv(4096)
data += r.decode('utf8')
if not r:
break
return data
def getStats(self):
data = self.command('mntr')
lines = data.split('\n')
ret = []
for line in lines:
if not line:
continue
if '\t' not in line:
continue
key, value = line.split('\t')
ret.append((key, value))
return dict(ret)
def reportStats(self, stats):
pipe = statsd.pipeline()
base = 'zk.%s.' % (self.hostname,)
for key in GAUGES:
try:
value = int(stats.get(key, 0))
pipe.gauge(base + key, value)
except Exception:
self.log.exception("Unable to process %s", key)
for key in COUNTERS:
try:
newvalue = int(stats.get(key, 0))
oldvalue = self.prevdata.get(key)
if oldvalue is not None:
value = newvalue - oldvalue
pipe.incr(base + key, value)
self.prevdata[key] = newvalue
except Exception:
self.log.exception("Unable to process %s", key)
pipe.send()
def run(self):
while True:
try:
self._run()
except Exception:
self.log.exception("Exception in main loop:")
def _run(self):
time.sleep(INTERVAL)
stats = self.getStats()
self.reportStats(stats)
logging.basicConfig(level=logging.DEBUG)
p = ZooKeeperStats('localhost')
p.run()

View File

@ -11,4 +11,5 @@ iptables_extra_allowed_groups:
- {'protocol': 'udp', 'port': '8125', 'group': 'mirror-update'} - {'protocol': 'udp', 'port': '8125', 'group': 'mirror-update'}
- {'protocol': 'udp', 'port': '8125', 'group': 'logstash'} - {'protocol': 'udp', 'port': '8125', 'group': 'logstash'}
- {'protocol': 'udp', 'port': '8125', 'group': 'nodepool'} - {'protocol': 'udp', 'port': '8125', 'group': 'nodepool'}
- {'protocol': 'udp', 'port': '8125', 'group': 'zookeeper'}
- {'protocol': 'udp', 'port': '8125', 'group': 'zuul'} - {'protocol': 'udp', 'port': '8125', 'group': 'zuul'}

View File

@ -11,4 +11,5 @@ iptables_extra_allowed_groups:
- {'protocol': 'udp', 'port': '8125', 'group': 'mirror-update'} - {'protocol': 'udp', 'port': '8125', 'group': 'mirror-update'}
- {'protocol': 'udp', 'port': '8125', 'group': 'logstash'} - {'protocol': 'udp', 'port': '8125', 'group': 'logstash'}
- {'protocol': 'udp', 'port': '8125', 'group': 'nodepool'} - {'protocol': 'udp', 'port': '8125', 'group': 'nodepool'}
- {'protocol': 'udp', 'port': '8125', 'group': 'zookeeper'}
- {'protocol': 'udp', 'port': '8125', 'group': 'zuul'} - {'protocol': 'udp', 'port': '8125', 'group': 'zuul'}

View File

@ -13,3 +13,10 @@ services:
- "/var/zookeeper/datalog:/datalog" - "/var/zookeeper/datalog:/datalog"
- "/var/zookeeper/logs:/logs" - "/var/zookeeper/logs:/logs"
- "/var/zookeeper/tls:/tls" - "/var/zookeeper/tls:/tls"
zookeeper-statsd:
restart: always
image: docker.io/opendevorg/zookeeper-statsd:latest
network_mode: host
environment:
STATSD_HOST: graphite.opendev.org
STATSD_PORT: 8125

View File

@ -22,7 +22,7 @@ autopurge.purgeInterval=6
maxClientCnxns=60 maxClientCnxns=60
standaloneEnabled=true standaloneEnabled=true
admin.enableServer=true admin.enableServer=true
4lw.commands.whitelist=srvr, stat, dump 4lw.commands.whitelist=srvr, stat, dump, mntr
clientPort=2181 clientPort=2181
secureClientPort=2281 secureClientPort=2281
ssl.keyStore.location=/tls/keys/keystore.pem ssl.keyStore.location=/tls/keys/keystore.pem

View File

@ -12,6 +12,8 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import json
testinfra_hosts = ['zk01.opendev.org'] testinfra_hosts = ['zk01.opendev.org']
@ -41,3 +43,13 @@ def test_l4_commands(host):
cmd = host.run("echo dump | nc localhost 2181") cmd = host.run("echo dump | nc localhost 2181")
assert "SessionTracker dump" in cmd.stdout assert "SessionTracker dump" in cmd.stdout
assert "not executed because it is not in the whitelist" not in cmd.stdout assert "not executed because it is not in the whitelist" not in cmd.stdout
cmd = host.run("echo mntr | nc localhost 2181")
assert "zk_version" in cmd.stdout
assert "not executed because it is not in the whitelist" not in cmd.stdout
def test_zookeeper_statsd_running(host):
cmd = host.run("docker inspect zookeeper-compose_zookeeper-statsd_1")
out = json.loads(cmd.stdout)
assert out[0]["State"]["Status"] == "running"
assert out[0]["RestartCount"] == 0

View File

@ -0,0 +1,28 @@
# zookeeper-statsd jobs
- job:
name: system-config-build-image-zookeeper-statsd
description: Build a zookeeper-statsd image.
parent: system-config-build-image
requires: python-base-3.7-container-image
vars: &zookeeper-statsd_vars
docker_images:
- context: docker/zookeeper-statsd
repository: opendevorg/zookeeper-statsd
files: &zookeeper-statsd_files
- docker/zookeeper-statsd/
- docker/python-base/
- job:
name: system-config-upload-image-zookeeper-statsd
description: Build and upload a zookeeper-statsd image.
parent: system-config-upload-image
requires: python-base-3.7-container-image
vars: *zookeeper-statsd_vars
files: *zookeeper-statsd_files
- job:
name: system-config-promote-image-zookeeper-statsd
description: Promote a previously published zookeeper-statsd image to latest.
parent: system-config-promote-image
vars: *zookeeper-statsd_vars
files: *zookeeper-statsd_files

View File

@ -27,7 +27,11 @@
soft: true soft: true
- system-config-run-kerberos - system-config-run-kerberos
- system-config-run-lists - system-config-run-lists
- system-config-run-nodepool - system-config-run-nodepool:
dependencies:
- name: opendev-buildset-registry
- name: system-config-build-image-zookeeper-statsd
soft: true
- system-config-run-meetpad - system-config-run-meetpad
- system-config-run-mirror-x86 - system-config-run-mirror-x86
- system-config-run-mirror-update - system-config-run-mirror-update
@ -67,8 +71,16 @@
- name: opendev-buildset-registry - name: opendev-buildset-registry
- name: system-config-build-image-refstack - name: system-config-build-image-refstack
soft: true soft: true
- system-config-run-zookeeper - system-config-run-zookeeper:
- system-config-run-zuul dependencies:
- name: opendev-buildset-registry
- name: system-config-build-image-zookeeper-statsd
soft: true
- system-config-run-zuul:
dependencies:
- name: opendev-buildset-registry
- name: system-config-build-image-zookeeper-statsd
soft: true
- system-config-run-zuul-preview - system-config-run-zuul-preview
- system-config-run-letsencrypt - system-config-run-letsencrypt
- system-config-build-image-jinja-init: - system-config-build-image-jinja-init:
@ -94,6 +106,11 @@
- name: opendev-buildset-registry - name: opendev-buildset-registry
- name: system-config-build-image-python-base-3.7 - name: system-config-build-image-python-base-3.7
soft: true soft: true
- system-config-build-image-zookeeper-statsd:
dependencies:
- name: opendev-buildset-registry
- name: system-config-build-image-python-base-3.7
soft: true
- system-config-build-image-accessbot: - system-config-build-image-accessbot:
dependencies: dependencies:
- name: opendev-buildset-registry - name: opendev-buildset-registry
@ -129,7 +146,11 @@
soft: true soft: true
- system-config-run-kerberos - system-config-run-kerberos
- system-config-run-lists - system-config-run-lists
- system-config-run-nodepool - system-config-run-nodepool:
dependencies:
- name: opendev-buildset-registry
- name: system-config-upload-image-zookeeper-statsd
soft: true
- system-config-run-meetpad - system-config-run-meetpad
- system-config-run-mirror-x86 - system-config-run-mirror-x86
- system-config-run-mirror-update - system-config-run-mirror-update
@ -168,8 +189,16 @@
- name: opendev-buildset-registry - name: opendev-buildset-registry
- name: system-config-upload-image-refstack - name: system-config-upload-image-refstack
soft: true soft: true
- system-config-run-zookeeper - system-config-run-zookeeper:
- system-config-run-zuul dependencies:
- name: opendev-buildset-registry
- name: system-config-upload-image-zookeeper-statsd
soft: true
- system-config-run-zuul:
dependencies:
- name: opendev-buildset-registry
- name: system-config-upload-image-zookeeper-statsd
soft: true
- system-config-run-zuul-preview - system-config-run-zuul-preview
- system-config-run-letsencrypt - system-config-run-letsencrypt
- system-config-upload-image-jinja-init: - system-config-upload-image-jinja-init:
@ -192,6 +221,11 @@
- name: opendev-buildset-registry - name: opendev-buildset-registry
- name: system-config-upload-image-python-base-3.7 - name: system-config-upload-image-python-base-3.7
soft: true soft: true
- system-config-upload-image-zookeeper-statsd:
dependencies:
- name: opendev-buildset-registry
- name: system-config-upload-image-python-base-3.7
soft: true
- system-config-upload-image-accessbot: - system-config-upload-image-accessbot:
dependencies: dependencies:
- name: opendev-buildset-registry - name: opendev-buildset-registry
@ -215,6 +249,7 @@
- system-config-promote-image-grafana - system-config-promote-image-grafana
- system-config-promote-image-etherpad - system-config-promote-image-etherpad
- system-config-promote-image-haproxy-statsd - system-config-promote-image-haproxy-statsd
- system-config-promote-image-zookeeper-statsd
- system-config-promote-image-accessbot - system-config-promote-image-accessbot
- system-config-promote-image-refstack - system-config-promote-image-refstack
- system-config-promote-image-python-base-3.7 - system-config-promote-image-python-base-3.7

View File

@ -702,6 +702,9 @@
- playbooks/roles/pip3/ - playbooks/roles/pip3/
- playbooks/roles/install-docker/ - playbooks/roles/install-docker/
- testinfra/test_zookeeper.py - testinfra/test_zookeeper.py
# From zookeeper-statsd_files -- If we rebuild the image, we want
# to run this job as well.
- docker/zookeeper-statsd/
- job: - job:
name: system-config-run-zuul-preview name: system-config-run-zuul-preview