prometheus: add options to start the server and process collector
This change adds a new prometheus_port option to start a metric server to be scrapped by a prometheus service. By default, the server exposes process informations. Change-Id: Ie329df6adc69768dfdb158d00283161f8b70f07a
This commit is contained in:
parent
3ca33f0686
commit
0dbd8c0784
@ -392,6 +392,17 @@ The following sections of ``zuul.conf`` are used by the scheduler:
|
||||
If a value higher than ``max_hold_expiration`` is supplied during
|
||||
hold request creation, it will be lowered to this value.
|
||||
|
||||
.. attr:: prometheus_port
|
||||
|
||||
Set a TCP port to start the prometheus metrics client.
|
||||
|
||||
.. attr:: prometheus_addr
|
||||
:default: 0.0.0.0
|
||||
|
||||
The IPv4 addr to listen for prometheus metrics poll.
|
||||
To use IPv6, python>3.8 is required `issue24209 <https://bugs.python.org/issue24209>`_.
|
||||
|
||||
|
||||
Operation
|
||||
~~~~~~~~~
|
||||
|
||||
|
@ -503,3 +503,46 @@ following statsd events:
|
||||
* ``zuul.tenant.mytenant.pipeline.gate.project.example_com.myproject.master.job.myjob.SUCCESS`` +1
|
||||
* ``zuul.tenant.mytenant.pipeline.gate.project.example_com.myproject.master.job.myjob.SUCCESS`` 40 seconds
|
||||
* ``zuul.tenant.mytenant.pipeline.gate.all_jobs`` +1
|
||||
|
||||
|
||||
Prometheus monitoring
|
||||
---------------------
|
||||
|
||||
Zuul comes with support to start a prometheus_ metric server to be added as
|
||||
prometheus's target.
|
||||
|
||||
.. _prometheus: https://prometheus.io/docs/introduction/overview/
|
||||
|
||||
|
||||
Configuration
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
Prometheus support uses the ``prometheus_client`` python module.
|
||||
Note that support is optional and Zuul will start without
|
||||
the prometheus python module present.
|
||||
|
||||
To enable the service, set the ``prometheus_port`` in a service section of
|
||||
``zuul.conf``. For example setting :attr:`scheduler.prometheus_port` to 9091
|
||||
starts a HTTP server to expose metrics to a prometheus services at:
|
||||
http://scheduler:9091/metrics
|
||||
|
||||
|
||||
Metrics
|
||||
~~~~~~~
|
||||
|
||||
These metrics are exposed by default:
|
||||
|
||||
.. stat:: process_virtual_memory_bytes
|
||||
:type: gauge
|
||||
|
||||
.. stat:: process_resident_memory_bytes
|
||||
:type: gauge
|
||||
|
||||
.. stat:: process_open_fds
|
||||
:type: gauge
|
||||
|
||||
.. stat:: process_start_time_seconds
|
||||
:type: gauge
|
||||
|
||||
.. stat:: process_cpu_seconds_total
|
||||
:type: counter
|
||||
|
@ -26,22 +26,27 @@ tenant_config=/etc/zuul/main.yaml
|
||||
log_config=/etc/zuul/logging.conf
|
||||
pidfile=/var/run/zuul/zuul.pid
|
||||
state_dir=/var/lib/zuul
|
||||
prometheus_port=9091
|
||||
;prometheus_addr=0.0.0.0
|
||||
|
||||
[merger]
|
||||
git_dir=/var/lib/zuul/git
|
||||
;git_user_email=zuul@example.com
|
||||
;git_user_name=zuul
|
||||
prometheus_port=9092
|
||||
|
||||
[executor]
|
||||
default_username=zuul
|
||||
trusted_ro_paths=/opt/zuul-scripts:/var/cache
|
||||
trusted_rw_paths=/opt/zuul-logs
|
||||
prometheus_port=9093
|
||||
|
||||
[web]
|
||||
listen_address=127.0.0.1
|
||||
port=9000
|
||||
static_cache_expiry=0
|
||||
status_url=https://zuul.example.com/status
|
||||
prometheus_port=9094
|
||||
|
||||
[webclient]
|
||||
url=https://zuul.example.com
|
||||
|
@ -0,0 +1,5 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
A new prometheus_port option for the services can be used to start the
|
||||
prometheus python client and exposes metrics.
|
@ -10,6 +10,7 @@ GitPython>=2.1.8
|
||||
python-daemon>=2.0.4
|
||||
extras
|
||||
statsd>=3.0
|
||||
prometheus-client
|
||||
voluptuous>=0.10.2
|
||||
gear>=0.13.0,<1.0.0,!=0.15.0
|
||||
apscheduler>=3.0
|
||||
|
@ -65,6 +65,7 @@ from git.exc import NoSuchPathError
|
||||
from git.util import IterableList
|
||||
import yaml
|
||||
import paramiko
|
||||
import prometheus_client.exposition
|
||||
|
||||
from zuul.driver.sql.sqlconnection import DatabaseSession
|
||||
from zuul.model import Change
|
||||
@ -868,6 +869,25 @@ class FakeGerritChange(object):
|
||||
self.reported += 1
|
||||
|
||||
|
||||
class PrometheusServer(object):
|
||||
def start(self):
|
||||
app = prometheus_client.make_wsgi_app(prometheus_client.REGISTRY)
|
||||
self.httpd = prometheus_client.exposition.make_server(
|
||||
"0.0.0.0",
|
||||
0,
|
||||
app,
|
||||
prometheus_client.exposition.ThreadingWSGIServer,
|
||||
handler_class=prometheus_client.exposition._SilentHandler)
|
||||
self.port = self.httpd.socket.getsockname()[1]
|
||||
self.thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.thread.daemon = True
|
||||
self.thread.start()
|
||||
|
||||
def stop(self):
|
||||
self.httpd.shutdown()
|
||||
self.thread.join()
|
||||
|
||||
|
||||
class GerritWebServer(object):
|
||||
|
||||
def __init__(self, fake_gerrit):
|
||||
@ -4156,6 +4176,10 @@ class ZuulTestCase(BaseTestCase):
|
||||
server that all of the Zuul components in this test use to
|
||||
communicate with each other.
|
||||
|
||||
:ivar PrometheusServer prometheus_server: An instance of
|
||||
:py:class: ~test.base.PrometheusServer` which is the Prometheus
|
||||
metrics endpoint.
|
||||
|
||||
:ivar RecordingExecutorServer executor_server: An instance of
|
||||
:py:class:`~tests.base.RecordingExecutorServer` which is the
|
||||
Ansible execute server used to run jobs for this test.
|
||||
@ -4279,6 +4303,8 @@ class ZuulTestCase(BaseTestCase):
|
||||
self.statsd.start()
|
||||
|
||||
self.gearman_server = FakeGearmanServer(self.use_ssl)
|
||||
self.prometheus_server = PrometheusServer()
|
||||
self.prometheus_server.start()
|
||||
|
||||
self.config.set('gearman', 'port', str(self.gearman_server.port))
|
||||
self.log.info("Gearman server on port %s" %
|
||||
@ -4673,6 +4699,7 @@ class ZuulTestCase(BaseTestCase):
|
||||
self.statsd.join()
|
||||
self.rpcclient.shutdown()
|
||||
self.gearman_server.shutdown()
|
||||
self.prometheus_server.stop()
|
||||
self.fake_nodepool.stop()
|
||||
self.zk_client.disconnect()
|
||||
self.printHistory()
|
||||
|
42
tests/unit/test_prometheus.py
Normal file
42
tests/unit/test_prometheus.py
Normal file
@ -0,0 +1,42 @@
|
||||
# Copyright 2019 Red Hat, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import requests
|
||||
|
||||
from tests.base import ZuulTestCase
|
||||
|
||||
|
||||
class BaseTestPrometheus(ZuulTestCase):
|
||||
tenant_config_file = 'config/single-tenant/main.yaml'
|
||||
|
||||
def get_metrics(self):
|
||||
r = requests.get(
|
||||
"http://localhost:%d" % self.prometheus_server.port)
|
||||
metrics = {}
|
||||
for line in r.text.split('\n'):
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
try:
|
||||
key, value = line.split()
|
||||
except ValueError:
|
||||
continue
|
||||
metrics[key] = value
|
||||
return metrics
|
||||
|
||||
|
||||
class TestPrometheus(BaseTestPrometheus):
|
||||
def test_prometheus_process_metrics(self):
|
||||
metrics = self.get_metrics()
|
||||
self.assertIn("process_resident_memory_bytes", metrics)
|
||||
self.assertIn("process_open_fds", metrics)
|
@ -28,6 +28,7 @@ import sys
|
||||
import traceback
|
||||
import threading
|
||||
|
||||
prometheus_client = extras.try_import('prometheus_client')
|
||||
yappi = extras.try_import('yappi')
|
||||
objgraph = extras.try_import('objgraph')
|
||||
|
||||
@ -199,6 +200,15 @@ class ZuulDaemonApp(ZuulApp, metaclass=abc.ABCMeta):
|
||||
"Configured logging: {version}".format(
|
||||
version=zuul_version_info.release_string()))
|
||||
|
||||
def setup_prometheus(self, section):
|
||||
if self.config.has_option(section, 'prometheus_port'):
|
||||
if not prometheus_client:
|
||||
raise RuntimeError("prometheus_client library is missing.")
|
||||
port = int(self.config.get(section, 'prometheus_port'))
|
||||
addr = get_default(
|
||||
self.config, section, 'prometheus_addr', '0.0.0.0')
|
||||
prometheus_client.start_http_server(port, addr)
|
||||
|
||||
def main(self):
|
||||
self.parseArguments()
|
||||
self.readConfig()
|
||||
|
@ -87,6 +87,7 @@ class Executor(zuul.cmd.ZuulDaemonApp):
|
||||
os.mkdir(self.job_dir)
|
||||
|
||||
self.setup_logging('executor', 'log_config')
|
||||
self.setup_prometheus('executor')
|
||||
self.log = logging.getLogger("zuul.Executor")
|
||||
|
||||
self.finger_port = int(
|
||||
|
@ -50,6 +50,7 @@ class Merger(zuul.cmd.ZuulDaemonApp):
|
||||
self.configure_connections(source_only=True)
|
||||
|
||||
self.setup_logging('merger', 'log_config')
|
||||
self.setup_prometheus('merger')
|
||||
|
||||
self.merger = MergeServer(self.config, self.connections)
|
||||
self.merger.start()
|
||||
|
@ -132,6 +132,7 @@ class Scheduler(zuul.cmd.ZuulDaemonApp):
|
||||
self.start_gear_server()
|
||||
|
||||
self.setup_logging('scheduler', 'log_config')
|
||||
self.setup_prometheus('scheduler')
|
||||
self.log = logging.getLogger("zuul.Scheduler")
|
||||
|
||||
self.configure_connections(require_sql=True)
|
||||
|
@ -84,6 +84,7 @@ class WebServer(zuul.cmd.ZuulDaemonApp):
|
||||
sys.exit(0)
|
||||
|
||||
self.setup_logging('web', 'log_config')
|
||||
self.setup_prometheus('web')
|
||||
self.log = logging.getLogger("zuul.WebServer")
|
||||
|
||||
try:
|
||||
|
Loading…
Reference in New Issue
Block a user