openstack-helm-infra/prometheus/values_overrides/alertmanager.yaml

34 lines
1.6 KiB
YAML

---
conf:
prometheus:
rules:
alertmanager:
groups:
- name: alertmanager.rules
rules:
- alert: AlertmanagerConfigInconsistent
expr: count_values("config_hash", alertmanager_config_hash) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
for: 5m
labels:
severity: critical
annotations:
description: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.
summary: Alertmanager configurations are inconsistent
- alert: AlertmanagerDownOrMissing
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1", "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
for: 5m
labels:
severity: warning
annotations:
description: An unexpected number of Alertmanagers are scraped or Alertmanagers disappeared from discovery.
summary: Alertmanager down or not discovered
- alert: FailedReload
expr: alertmanager_config_last_reload_successful == 0
for: 10m
labels:
severity: warning
annotations:
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod }}.
summary: Alertmanager configuration reload has failed
...