From 96bac7b486147de2b826271efc4bf87b7ec042df Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Wed, 17 Mar 2021 12:57:21 -0700 Subject: [PATCH] Add zookeeper-statsd This adds a program, zookeeper-statsd, which monitors zookeeper metrics and reports them to statsd. It also adds a container to run that program. And it runs the container on each of the ZooKeeper quorum members. And it updates the graphite host to allow statsd traffic from quorum members. And it updates the 4-letter-word whitelist to allow the mntr command (which is used to gather metrics) to be issued. Change-Id: I298f0b13a05cc615d8496edd4622438507fc5423 --- docker/zookeeper-statsd/Dockerfile | 21 +++ docker/zookeeper-statsd/zookeeper-statsd.py | 142 ++++++++++++++++++ inventory/service/group_vars/graphite.yaml | 1 + .../service/group_vars/graphite_opendev.org | 1 + .../zookeeper-compose/docker-compose.yaml | 7 + .../roles/zookeeper/templates/zoo.cfg.j2 | 2 +- testinfra/test_zookeeper.py | 12 ++ zuul.d/docker-images/zookeeper-statsd.yaml | 28 ++++ zuul.d/project.yaml | 47 +++++- zuul.d/system-config-run.yaml | 3 + 10 files changed, 257 insertions(+), 7 deletions(-) create mode 100644 docker/zookeeper-statsd/Dockerfile create mode 100755 docker/zookeeper-statsd/zookeeper-statsd.py create mode 100644 zuul.d/docker-images/zookeeper-statsd.yaml diff --git a/docker/zookeeper-statsd/Dockerfile b/docker/zookeeper-statsd/Dockerfile new file mode 100644 index 0000000000..8cfa6ec8f8 --- /dev/null +++ b/docker/zookeeper-statsd/Dockerfile @@ -0,0 +1,21 @@ +# Copyright (c) 2019 Red Hat, Inc. +# Copyright (c) 2021 Acme Gating, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM docker.io/opendevorg/python-base:3.7 + +COPY zookeeper-statsd.py /usr/local/bin +RUN pip install statsd +CMD ["/usr/local/bin/zookeeper-statsd.py"] diff --git a/docker/zookeeper-statsd/zookeeper-statsd.py b/docker/zookeeper-statsd/zookeeper-statsd.py new file mode 100755 index 0000000000..abdcd705ba --- /dev/null +++ b/docker/zookeeper-statsd/zookeeper-statsd.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2015 Hewlett-Packard Development Company, L.P. +# Copyright (C) 2021 Acme Gating, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import re +import socket +import time + +from statsd.defaults.env import statsd + +INTERVAL = 10 +GAUGES = [ + 'zk_avg_latency', + 'zk_min_latency', + 'zk_max_latency', + 'zk_outstanding_requests', + 'zk_znode_count', + 'zk_followers', + 'zk_synced_followers', + 'zk_pending_syncs', + 'zk_watch_count', + 'zk_ephemerals_count', + 'zk_approximate_data_size', + 'zk_open_file_descriptor_count', + 'zk_max_file_descriptor_count', +] + +COUNTERS = [ + 'zk_packets_received', + 'zk_packets_sent', +] + + +class Socket: + def __init__(self, host, port): + self.host = host + self.port = port + self.socket = None + + def open(self): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(5) + s.connect((self.host, self.port)) + self.socket = s + + def __enter__(self): + self.open() + return self.socket + + def __exit__(self, etype, value, tb): + self.socket.close() + self.socket = None + + +class ZooKeeperStats: + def __init__(self, host, port=2181): + self.socket = Socket(host, port) + # The hostname to use when reporting stats (e.g., zk01) + if host in ('localhost', '127.0.0.1', '::1'): + self.hostname = socket.gethostname() + else: + self.hostname = host + self.log = logging.getLogger("ZooKeeperStats") + self.prevdata = {} + + def command(self, command): + with self.socket as socket: + socket.send((command + '\n').encode('utf8')) + data = '' + while True: + r = socket.recv(4096) + data += r.decode('utf8') + if not r: + break + return data + + def getStats(self): + data = self.command('mntr') + lines = data.split('\n') + ret = [] + for line in lines: + if not line: + continue + if '\t' not in line: + continue + key, value = line.split('\t') + ret.append((key, value)) + return dict(ret) + + def reportStats(self, stats): + pipe = statsd.pipeline() + base = 'zk.%s.' % (self.hostname,) + for key in GAUGES: + try: + value = int(stats.get(key, 0)) + pipe.gauge(base + key, value) + except Exception: + self.log.exception("Unable to process %s", key) + for key in COUNTERS: + try: + newvalue = int(stats.get(key, 0)) + oldvalue = self.prevdata.get(key) + if oldvalue is not None: + value = newvalue - oldvalue + pipe.incr(base + key, value) + self.prevdata[key] = newvalue + except Exception: + self.log.exception("Unable to process %s", key) + pipe.send() + + def run(self): + while True: + try: + self._run() + except Exception: + self.log.exception("Exception in main loop:") + + def _run(self): + time.sleep(INTERVAL) + stats = self.getStats() + self.reportStats(stats) + + +logging.basicConfig(level=logging.DEBUG) +p = ZooKeeperStats('localhost') +p.run() diff --git a/inventory/service/group_vars/graphite.yaml b/inventory/service/group_vars/graphite.yaml index ccbd309b14..f3df914739 100644 --- a/inventory/service/group_vars/graphite.yaml +++ b/inventory/service/group_vars/graphite.yaml @@ -11,4 +11,5 @@ iptables_extra_allowed_groups: - {'protocol': 'udp', 'port': '8125', 'group': 'mirror-update'} - {'protocol': 'udp', 'port': '8125', 'group': 'logstash'} - {'protocol': 'udp', 'port': '8125', 'group': 'nodepool'} + - {'protocol': 'udp', 'port': '8125', 'group': 'zookeeper'} - {'protocol': 'udp', 'port': '8125', 'group': 'zuul'} diff --git a/inventory/service/group_vars/graphite_opendev.org b/inventory/service/group_vars/graphite_opendev.org index ccbd309b14..f3df914739 100644 --- a/inventory/service/group_vars/graphite_opendev.org +++ b/inventory/service/group_vars/graphite_opendev.org @@ -11,4 +11,5 @@ iptables_extra_allowed_groups: - {'protocol': 'udp', 'port': '8125', 'group': 'mirror-update'} - {'protocol': 'udp', 'port': '8125', 'group': 'logstash'} - {'protocol': 'udp', 'port': '8125', 'group': 'nodepool'} + - {'protocol': 'udp', 'port': '8125', 'group': 'zookeeper'} - {'protocol': 'udp', 'port': '8125', 'group': 'zuul'} diff --git a/playbooks/roles/zookeeper/files/zookeeper-compose/docker-compose.yaml b/playbooks/roles/zookeeper/files/zookeeper-compose/docker-compose.yaml index 96e0ecbe00..b2890331ae 100644 --- a/playbooks/roles/zookeeper/files/zookeeper-compose/docker-compose.yaml +++ b/playbooks/roles/zookeeper/files/zookeeper-compose/docker-compose.yaml @@ -13,3 +13,10 @@ services: - "/var/zookeeper/datalog:/datalog" - "/var/zookeeper/logs:/logs" - "/var/zookeeper/tls:/tls" + zookeeper-statsd: + restart: always + image: docker.io/opendevorg/zookeeper-statsd:latest + network_mode: host + environment: + STATSD_HOST: graphite.opendev.org + STATSD_PORT: 8125 diff --git a/playbooks/roles/zookeeper/templates/zoo.cfg.j2 b/playbooks/roles/zookeeper/templates/zoo.cfg.j2 index 79243cd18c..a5590d8017 100644 --- a/playbooks/roles/zookeeper/templates/zoo.cfg.j2 +++ b/playbooks/roles/zookeeper/templates/zoo.cfg.j2 @@ -22,7 +22,7 @@ autopurge.purgeInterval=6 maxClientCnxns=60 standaloneEnabled=true admin.enableServer=true -4lw.commands.whitelist=srvr, stat, dump +4lw.commands.whitelist=srvr, stat, dump, mntr clientPort=2181 secureClientPort=2281 ssl.keyStore.location=/tls/keys/keystore.pem diff --git a/testinfra/test_zookeeper.py b/testinfra/test_zookeeper.py index d96756b440..27255dab17 100644 --- a/testinfra/test_zookeeper.py +++ b/testinfra/test_zookeeper.py @@ -12,6 +12,8 @@ # License for the specific language governing permissions and limitations # under the License. +import json + testinfra_hosts = ['zk01.opendev.org'] @@ -41,3 +43,13 @@ def test_l4_commands(host): cmd = host.run("echo dump | nc localhost 2181") assert "SessionTracker dump" in cmd.stdout assert "not executed because it is not in the whitelist" not in cmd.stdout + + cmd = host.run("echo mntr | nc localhost 2181") + assert "zk_version" in cmd.stdout + assert "not executed because it is not in the whitelist" not in cmd.stdout + +def test_zookeeper_statsd_running(host): + cmd = host.run("docker inspect zookeeper-compose_zookeeper-statsd_1") + out = json.loads(cmd.stdout) + assert out[0]["State"]["Status"] == "running" + assert out[0]["RestartCount"] == 0 diff --git a/zuul.d/docker-images/zookeeper-statsd.yaml b/zuul.d/docker-images/zookeeper-statsd.yaml new file mode 100644 index 0000000000..2efdd3e910 --- /dev/null +++ b/zuul.d/docker-images/zookeeper-statsd.yaml @@ -0,0 +1,28 @@ +# zookeeper-statsd jobs +- job: + name: system-config-build-image-zookeeper-statsd + description: Build a zookeeper-statsd image. + parent: system-config-build-image + requires: python-base-3.7-container-image + vars: &zookeeper-statsd_vars + docker_images: + - context: docker/zookeeper-statsd + repository: opendevorg/zookeeper-statsd + files: &zookeeper-statsd_files + - docker/zookeeper-statsd/ + - docker/python-base/ + +- job: + name: system-config-upload-image-zookeeper-statsd + description: Build and upload a zookeeper-statsd image. + parent: system-config-upload-image + requires: python-base-3.7-container-image + vars: *zookeeper-statsd_vars + files: *zookeeper-statsd_files + +- job: + name: system-config-promote-image-zookeeper-statsd + description: Promote a previously published zookeeper-statsd image to latest. + parent: system-config-promote-image + vars: *zookeeper-statsd_vars + files: *zookeeper-statsd_files diff --git a/zuul.d/project.yaml b/zuul.d/project.yaml index 04634a4127..d2485d2dff 100644 --- a/zuul.d/project.yaml +++ b/zuul.d/project.yaml @@ -27,7 +27,11 @@ soft: true - system-config-run-kerberos - system-config-run-lists - - system-config-run-nodepool + - system-config-run-nodepool: + dependencies: + - name: opendev-buildset-registry + - name: system-config-build-image-zookeeper-statsd + soft: true - system-config-run-meetpad - system-config-run-mirror-x86 - system-config-run-mirror-update @@ -67,8 +71,16 @@ - name: opendev-buildset-registry - name: system-config-build-image-refstack soft: true - - system-config-run-zookeeper - - system-config-run-zuul + - system-config-run-zookeeper: + dependencies: + - name: opendev-buildset-registry + - name: system-config-build-image-zookeeper-statsd + soft: true + - system-config-run-zuul: + dependencies: + - name: opendev-buildset-registry + - name: system-config-build-image-zookeeper-statsd + soft: true - system-config-run-zuul-preview - system-config-run-letsencrypt - system-config-build-image-jinja-init: @@ -94,6 +106,11 @@ - name: opendev-buildset-registry - name: system-config-build-image-python-base-3.7 soft: true + - system-config-build-image-zookeeper-statsd: + dependencies: + - name: opendev-buildset-registry + - name: system-config-build-image-python-base-3.7 + soft: true - system-config-build-image-accessbot: dependencies: - name: opendev-buildset-registry @@ -129,7 +146,11 @@ soft: true - system-config-run-kerberos - system-config-run-lists - - system-config-run-nodepool + - system-config-run-nodepool: + dependencies: + - name: opendev-buildset-registry + - name: system-config-upload-image-zookeeper-statsd + soft: true - system-config-run-meetpad - system-config-run-mirror-x86 - system-config-run-mirror-update @@ -168,8 +189,16 @@ - name: opendev-buildset-registry - name: system-config-upload-image-refstack soft: true - - system-config-run-zookeeper - - system-config-run-zuul + - system-config-run-zookeeper: + dependencies: + - name: opendev-buildset-registry + - name: system-config-upload-image-zookeeper-statsd + soft: true + - system-config-run-zuul: + dependencies: + - name: opendev-buildset-registry + - name: system-config-upload-image-zookeeper-statsd + soft: true - system-config-run-zuul-preview - system-config-run-letsencrypt - system-config-upload-image-jinja-init: @@ -192,6 +221,11 @@ - name: opendev-buildset-registry - name: system-config-upload-image-python-base-3.7 soft: true + - system-config-upload-image-zookeeper-statsd: + dependencies: + - name: opendev-buildset-registry + - name: system-config-upload-image-python-base-3.7 + soft: true - system-config-upload-image-accessbot: dependencies: - name: opendev-buildset-registry @@ -215,6 +249,7 @@ - system-config-promote-image-grafana - system-config-promote-image-etherpad - system-config-promote-image-haproxy-statsd + - system-config-promote-image-zookeeper-statsd - system-config-promote-image-accessbot - system-config-promote-image-refstack - system-config-promote-image-python-base-3.7 diff --git a/zuul.d/system-config-run.yaml b/zuul.d/system-config-run.yaml index 8004e1ac83..0dde030dfd 100644 --- a/zuul.d/system-config-run.yaml +++ b/zuul.d/system-config-run.yaml @@ -702,6 +702,9 @@ - playbooks/roles/pip3/ - playbooks/roles/install-docker/ - testinfra/test_zookeeper.py + # From zookeeper-statsd_files -- If we rebuild the image, we want + # to run this job as well. + - docker/zookeeper-statsd/ - job: name: system-config-run-zuul-preview