rabbitmq: monitoring and reliablity improvements

Sem-Ver: bugfix
Change-Id: I655a6e5237ee0dc98547b5e8b4fa146a020f5606
This commit is contained in:
Mohammed Naser 2022-08-22 23:18:13 -04:00
parent 051244513d
commit e658866811
3 changed files with 30 additions and 2 deletions

View File

@ -0,0 +1,7 @@
---
features:
- Added additional monitoring to RabbitMQ in order to detect and alert on
alarms raised by it such as memory, etc.
fixes:
- Switch RabbitmqConnections to a more reliable solution that can avoid
alerting on larger scale clouds.

View File

@ -31,6 +31,9 @@
operator: In operator: In
values: values:
- enabled - enabled
rabbitmq:
additionalConfig: |
vm_memory_high_watermark.relative = 0.9
resources: resources:
requests: requests:
cpu: 500m cpu: 500m

View File

@ -129,6 +129,20 @@
"(.*)" "(.*)"
) )
) )
- name: alarms
rules:
- alert: RabbitmqAlarmFreeDiskSpace
expr: rabbitmq_alarms_free_disk_space_watermark == 1
labels:
severity: critical
- alert: RabbitmqAlarmMemoryUsedWatermark
expr: rabbitmq_alarms_memory_used_watermark == 1
labels:
severity: critical
- alert: RabbitmqAlarmFileDescriptorLimit
expr: rabbitmq_alarms_file_descriptor_limit == 1
labels:
severity: critical
- name: limits - name: limits
rules: rules:
- alert: RabbitmqMemoryHigh - alert: RabbitmqMemoryHigh
@ -147,10 +161,14 @@
expr: rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.95 expr: rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.95
labels: labels:
severity: critical severity: critical
- alert: RabbitmqConnections - alert: RabbitmqTcpSocketsUsage
expr: rabbitmq_connections > 1000 expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.80
labels: labels:
severity: warning severity: warning
- alert: RabbitmqTcpSocketsUsage
expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.95
labels:
severity: critical
- name: msgs - name: msgs
rules: rules:
- alert: RabbitmqUnackedMessages - alert: RabbitmqUnackedMessages