From 1c01274207393aa15a4591d8a9d709748d5e9754 Mon Sep 17 00:00:00 2001 From: Steve Wilkerson Date: Mon, 14 May 2018 17:27:48 -0500 Subject: [PATCH] Update prometheus rule for terminated containers in pods This updates the prometheus rule for checking for terminated containers in pods. The previous rule checked for any terminations, which raised alarms due to completed containers in jobs being included, which isn't desired behavior. This changes the expression to check for any containers that have terminated with a status other than completed Change-Id: I88e533a56f81f81bd1a81420ecfb7d43ac9e2d0b --- prometheus/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/values.yaml b/prometheus/values.yaml index a940b9cbc..41a24d587 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -841,7 +841,7 @@ conf: description: 'Replicaset {{$labels.replicaset}} is missing desired number of replicas for more than 10 minutes' summary: 'Replicaset {{$labels.replicaset}} is missing replicas' - alert: kube_pod_container_terminated - expr: kube_pod_container_status_terminated > 0 + expr: kube_pod_container_status_terminated_reason{reason=~"OOMKilled|Error|ContainerCannotRun"} > 0 for: 10m labels: severity: page