Prometheus: Update Alertmanager discovery, fix rules entry
Updates the service discovery mechanism used by Prometheus to identify Alertmanager instances to push alerts to. It moves to use the 'application' label to identify Alertmanager pods instead of searching for pods by the label 'name', as the previous definition was resulting in empty results for Alertmanager targets This also fixes the name of the prometheus label used to track alerts for kube-controller-manager, as it was defined incorrect previously Change-Id: I1fb194550baf803435722e3a01892e49b44259d1
This commit is contained in:
parent
e36dfcd21d
commit
e0c4469fdf
@ -120,8 +120,9 @@ endpoints:
|
||||
name: alertmanager
|
||||
namespace: null
|
||||
hosts:
|
||||
default: alerts-api
|
||||
default: alerts-engine
|
||||
public: alertmanager
|
||||
discovery: alertmanager-discovery
|
||||
host_fqdn_override:
|
||||
default: null
|
||||
path:
|
||||
@ -132,6 +133,8 @@ endpoints:
|
||||
api:
|
||||
default: 9093
|
||||
public: 80
|
||||
mesh:
|
||||
default: 6783
|
||||
|
||||
dependencies:
|
||||
dynamic:
|
||||
@ -452,20 +455,23 @@ conf:
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- kubernetes_sd_configs:
|
||||
- role: pod
|
||||
- role: pod
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_label_name
|
||||
- source_labels: [__meta_kubernetes_pod_label_application]
|
||||
regex: alertmanager
|
||||
action: keep
|
||||
- source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
||||
regex: alerts-api
|
||||
action: keep
|
||||
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
||||
regex: peer-mesh
|
||||
action: drop
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
regex: openstack
|
||||
action: keep
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_container_port_number
|
||||
regex:
|
||||
action: drop
|
||||
rules:
|
||||
alertmanager:
|
||||
groups:
|
||||
@ -626,7 +632,7 @@ conf:
|
||||
- name: kube-controller-manager.rules
|
||||
rules:
|
||||
- alert: K8SControllerManagerDown
|
||||
expr: absent(up{job="kube-controller-manager"} == 1)
|
||||
expr: absent(up{job="kube-controller-manager-discovery"} == 1)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
|
Loading…
x
Reference in New Issue
Block a user