Merge "Rabbitmq metrics and splitbrain detection"
This commit is contained in:
commit
d79095f6b5
18
README.md
18
README.md
|
@ -49,6 +49,24 @@ an application that supports the rabbitmq interface. For instance:
|
|||
|
||||
juju add-relation rabbitmq-server:amqp nova-cloud-controller:amqp
|
||||
|
||||
## Monitoring
|
||||
|
||||
To collect RabbitMQ metrics, add a relation between rabbitmq-server and
|
||||
an application that supports the `scrape` interface. For instance:
|
||||
|
||||
juju add-relation rabbitmq-server:scrape prometheus:scrape
|
||||
|
||||
> **Note:** The scrape relation is only supported when the RabbitMQ version is >= 3.8.
|
||||
|
||||
The charm can be related to a dashboard charm like grafana to view visualization metrics:
|
||||
|
||||
juju add-relation rabbitmq-server:dashboards grafana:dashboards
|
||||
|
||||
To get alerts of RabbitMQ split-brain events, add a relation between rabbitmq-server and
|
||||
an application that supports the `prometheus-rules` interface. For instance:
|
||||
|
||||
juju add-relation rabbitmq-server:prometheus-rules prometheus:prometheus-rules
|
||||
|
||||
## High availability
|
||||
|
||||
When more than one unit is deployed the charm will bring up a native RabbitMQ
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,10 @@
|
|||
- alert: RabbitMQ_split_brain
|
||||
# detect if rabbitmq_queues is different between rabbitmq nodes
|
||||
expr: count(count(rabbitmq_queues) by (job)) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
application: rabbitmq-server
|
||||
annotations:
|
||||
description: RabbitMQ split brain detected
|
||||
summary: RabbitMQ split brain detected
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -767,9 +767,12 @@ def get_plugin_manager():
|
|||
:returns: Path to rabbitmq-plugins executable
|
||||
:rtype: str
|
||||
"""
|
||||
manager = glob.glob(
|
||||
'/usr/lib/rabbitmq/lib/rabbitmq_server-*/sbin/rabbitmq-plugins')[0]
|
||||
return manager
|
||||
# At version 3.8.2, only /sbin/rabbitmq-plugins can enable plugin correctly
|
||||
if os.path.exists("/sbin/rabbitmq-plugins"):
|
||||
return '/sbin/rabbitmq-plugins'
|
||||
else:
|
||||
return glob.glob(
|
||||
'/usr/lib/rabbitmq/lib/rabbitmq_server-*/sbin/rabbitmq-plugins')[0]
|
||||
|
||||
|
||||
def _manage_plugin(plugin, action):
|
||||
|
|
|
@ -79,6 +79,7 @@ from charmhelpers.core.hookenv import (
|
|||
DEBUG,
|
||||
ERROR,
|
||||
INFO,
|
||||
WARNING,
|
||||
leader_set,
|
||||
leader_get,
|
||||
relation_get,
|
||||
|
@ -259,6 +260,58 @@ def update_clients(check_deferred_restarts=True):
|
|||
check_deferred_restarts=check_deferred_restarts)
|
||||
|
||||
|
||||
@hooks.hook('dashboards-relation-joined')
|
||||
def dashboards_relation_joined(relation_id=None, remote_unit=None):
|
||||
"""
|
||||
dashboards relation joined
|
||||
send the dashboard json data via relation
|
||||
"""
|
||||
with open(os.path.join("files", "grafana-dashboard.json")) as f:
|
||||
dashboard_str = f.read()
|
||||
relation_set(relation_id, relation_settings={"dashboard": dashboard_str,
|
||||
"name": "RabbitMQ-Overview"})
|
||||
|
||||
|
||||
@hooks.hook('prometheus-rules-relation-joined',
|
||||
'prometheus-rules-relation-created')
|
||||
def prometheus_rules_joined(relation_id=None, remote_unit=None):
|
||||
"""
|
||||
prometheus rules relation joined
|
||||
send the prometheus rules via relation
|
||||
"""
|
||||
with open(os.path.join("files", "prom_rule_rmq_splitbrain.yaml")) as f:
|
||||
rule = f.read()
|
||||
relation_set(relation_id, relation_settings={"groups": rule})
|
||||
|
||||
|
||||
@hooks.hook('scrape-relation-joined', 'scrape-relation-created')
|
||||
def prometheus_scrape_joined(relation_id=None, remote_unit=None):
|
||||
"""
|
||||
scrape relation joined
|
||||
enable prometheus plugin and open port
|
||||
"""
|
||||
err_msg = "rabbitmq-server needs to be >= 3.8 to support Prometheus plugin"
|
||||
if cmp_pkgrevno('rabbitmq-server', '3.8.0') < 0:
|
||||
log(err_msg, level=WARNING)
|
||||
status_set("blocked", err_msg)
|
||||
raise Exception(err_msg)
|
||||
rabbit.enable_plugin(PROM_PLUGIN)
|
||||
open_port(RMQ_MON_PORT)
|
||||
relation_set(relation_id, relation_settings={"port": RMQ_MON_PORT})
|
||||
|
||||
|
||||
@hooks.hook('scrape-relation-broken')
|
||||
def prometheus_scape_broken():
|
||||
"""
|
||||
scrape relation broken
|
||||
the relation has been completely removed
|
||||
disable prometheus plugin and close port
|
||||
"""
|
||||
rabbit.disable_plugin(PROM_PLUGIN)
|
||||
close_port(RMQ_MON_PORT)
|
||||
log("scrape relation broken, disabled plugin and close port", level=INFO)
|
||||
|
||||
|
||||
@validate_amqp_config_tracker
|
||||
@hooks.hook('amqp-relation-changed')
|
||||
def amqp_changed(relation_id=None, remote_unit=None,
|
||||
|
@ -681,6 +734,8 @@ def upgrade_charm():
|
|||
|
||||
|
||||
MAN_PLUGIN = 'rabbitmq_management'
|
||||
PROM_PLUGIN = 'rabbitmq_prometheus'
|
||||
RMQ_MON_PORT = 15692
|
||||
|
||||
|
||||
@hooks.hook('config-changed')
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -0,0 +1 @@
|
|||
rabbitmq_server_relations.py
|
|
@ -21,6 +21,12 @@ provides:
|
|||
nrpe-external-master:
|
||||
interface: nrpe-external-master
|
||||
scope: container
|
||||
dashboards:
|
||||
interface: grafana-dashboard
|
||||
scrape:
|
||||
interface: http
|
||||
prometheus-rules:
|
||||
interface: prometheus-rules
|
||||
requires:
|
||||
ha:
|
||||
interface: hacluster
|
||||
|
|
|
@ -897,6 +897,25 @@ class UtilsTests(CharmTestCase):
|
|||
mock_new_rabbitmq.return_value = True
|
||||
self.assertEqual(rabbit_utils.get_managment_port(), 15672)
|
||||
|
||||
@mock.patch('glob.glob')
|
||||
@mock.patch('rabbit_utils.subprocess.check_call')
|
||||
@mock.patch('os.path.exists')
|
||||
def test_enable_management_plugin(self, mock_os_path,
|
||||
mock_subprocess,
|
||||
mock_glob):
|
||||
mock_os_path.return_value = True
|
||||
rabbitmq_plugins = '/sbin/rabbitmq-plugins'
|
||||
rabbit_utils._manage_plugin("rabbitmq_prometheus", "enable")
|
||||
mock_subprocess.assert_called_with([rabbitmq_plugins,
|
||||
"enable", "rabbitmq_prometheus"])
|
||||
mock_os_path.return_value = False
|
||||
rabbitmq_plugins = '/usr/lib/rabbitmq/lib/'\
|
||||
'rabbitmq_server-3.8.2/sbin/rabbitmq-plugins'
|
||||
mock_glob.return_value = [rabbitmq_plugins]
|
||||
rabbit_utils._manage_plugin("rabbitmq_prometheus", "enable")
|
||||
mock_subprocess.assert_called_with([rabbitmq_plugins,
|
||||
"enable", "rabbitmq_prometheus"])
|
||||
|
||||
@mock.patch('rabbit_utils.caching_cmp_pkgrevno')
|
||||
@mock.patch('rabbit_utils.relations_for_id')
|
||||
@mock.patch('rabbit_utils.subprocess')
|
||||
|
|
Loading…
Reference in New Issue