Merge "Rabbitmq metrics and splitbrain detection"

This commit is contained in:
Zuul 2022-01-17 23:24:18 +00:00 committed by Gerrit Code Review
commit d79095f6b5
13 changed files with 5961 additions and 3 deletions

View File

@ -49,6 +49,24 @@ an application that supports the rabbitmq interface. For instance:
juju add-relation rabbitmq-server:amqp nova-cloud-controller:amqp
## Monitoring
To collect RabbitMQ metrics, add a relation between rabbitmq-server and
an application that supports the `scrape` interface. For instance:
juju add-relation rabbitmq-server:scrape prometheus:scrape
> **Note:** The scrape relation is only supported when the RabbitMQ version is >= 3.8.
The charm can be related to a dashboard charm like grafana to view visualization metrics:
juju add-relation rabbitmq-server:dashboards grafana:dashboards
To get alerts of RabbitMQ split-brain events, add a relation between rabbitmq-server and
an application that supports the `prometheus-rules` interface. For instance:
juju add-relation rabbitmq-server:prometheus-rules prometheus:prometheus-rules
## High availability
When more than one unit is deployed the charm will bring up a native RabbitMQ

5841
files/grafana-dashboard.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
- alert: RabbitMQ_split_brain
# detect if rabbitmq_queues is different between rabbitmq nodes
expr: count(count(rabbitmq_queues) by (job)) > 1
for: 5m
labels:
severity: page
application: rabbitmq-server
annotations:
description: RabbitMQ split brain detected
summary: RabbitMQ split brain detected

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -767,9 +767,12 @@ def get_plugin_manager():
:returns: Path to rabbitmq-plugins executable
:rtype: str
"""
manager = glob.glob(
'/usr/lib/rabbitmq/lib/rabbitmq_server-*/sbin/rabbitmq-plugins')[0]
return manager
# At version 3.8.2, only /sbin/rabbitmq-plugins can enable plugin correctly
if os.path.exists("/sbin/rabbitmq-plugins"):
return '/sbin/rabbitmq-plugins'
else:
return glob.glob(
'/usr/lib/rabbitmq/lib/rabbitmq_server-*/sbin/rabbitmq-plugins')[0]
def _manage_plugin(plugin, action):

View File

@ -79,6 +79,7 @@ from charmhelpers.core.hookenv import (
DEBUG,
ERROR,
INFO,
WARNING,
leader_set,
leader_get,
relation_get,
@ -259,6 +260,58 @@ def update_clients(check_deferred_restarts=True):
check_deferred_restarts=check_deferred_restarts)
@hooks.hook('dashboards-relation-joined')
def dashboards_relation_joined(relation_id=None, remote_unit=None):
"""
dashboards relation joined
send the dashboard json data via relation
"""
with open(os.path.join("files", "grafana-dashboard.json")) as f:
dashboard_str = f.read()
relation_set(relation_id, relation_settings={"dashboard": dashboard_str,
"name": "RabbitMQ-Overview"})
@hooks.hook('prometheus-rules-relation-joined',
'prometheus-rules-relation-created')
def prometheus_rules_joined(relation_id=None, remote_unit=None):
"""
prometheus rules relation joined
send the prometheus rules via relation
"""
with open(os.path.join("files", "prom_rule_rmq_splitbrain.yaml")) as f:
rule = f.read()
relation_set(relation_id, relation_settings={"groups": rule})
@hooks.hook('scrape-relation-joined', 'scrape-relation-created')
def prometheus_scrape_joined(relation_id=None, remote_unit=None):
"""
scrape relation joined
enable prometheus plugin and open port
"""
err_msg = "rabbitmq-server needs to be >= 3.8 to support Prometheus plugin"
if cmp_pkgrevno('rabbitmq-server', '3.8.0') < 0:
log(err_msg, level=WARNING)
status_set("blocked", err_msg)
raise Exception(err_msg)
rabbit.enable_plugin(PROM_PLUGIN)
open_port(RMQ_MON_PORT)
relation_set(relation_id, relation_settings={"port": RMQ_MON_PORT})
@hooks.hook('scrape-relation-broken')
def prometheus_scape_broken():
"""
scrape relation broken
the relation has been completely removed
disable prometheus plugin and close port
"""
rabbit.disable_plugin(PROM_PLUGIN)
close_port(RMQ_MON_PORT)
log("scrape relation broken, disabled plugin and close port", level=INFO)
@validate_amqp_config_tracker
@hooks.hook('amqp-relation-changed')
def amqp_changed(relation_id=None, remote_unit=None,
@ -681,6 +734,8 @@ def upgrade_charm():
MAN_PLUGIN = 'rabbitmq_management'
PROM_PLUGIN = 'rabbitmq_prometheus'
RMQ_MON_PORT = 15692
@hooks.hook('config-changed')

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -0,0 +1 @@
rabbitmq_server_relations.py

View File

@ -21,6 +21,12 @@ provides:
nrpe-external-master:
interface: nrpe-external-master
scope: container
dashboards:
interface: grafana-dashboard
scrape:
interface: http
prometheus-rules:
interface: prometheus-rules
requires:
ha:
interface: hacluster

View File

@ -897,6 +897,25 @@ class UtilsTests(CharmTestCase):
mock_new_rabbitmq.return_value = True
self.assertEqual(rabbit_utils.get_managment_port(), 15672)
@mock.patch('glob.glob')
@mock.patch('rabbit_utils.subprocess.check_call')
@mock.patch('os.path.exists')
def test_enable_management_plugin(self, mock_os_path,
mock_subprocess,
mock_glob):
mock_os_path.return_value = True
rabbitmq_plugins = '/sbin/rabbitmq-plugins'
rabbit_utils._manage_plugin("rabbitmq_prometheus", "enable")
mock_subprocess.assert_called_with([rabbitmq_plugins,
"enable", "rabbitmq_prometheus"])
mock_os_path.return_value = False
rabbitmq_plugins = '/usr/lib/rabbitmq/lib/'\
'rabbitmq_server-3.8.2/sbin/rabbitmq-plugins'
mock_glob.return_value = [rabbitmq_plugins]
rabbit_utils._manage_plugin("rabbitmq_prometheus", "enable")
mock_subprocess.assert_called_with([rabbitmq_plugins,
"enable", "rabbitmq_prometheus"])
@mock.patch('rabbit_utils.caching_cmp_pkgrevno')
@mock.patch('rabbit_utils.relations_for_id')
@mock.patch('rabbit_utils.subprocess')