From b9a9ee323b81968297ee6dde414a8c6f0760ca33 Mon Sep 17 00:00:00 2001 From: Hemant Date: Wed, 5 Jun 2019 14:15:07 +0200 Subject: [PATCH] Change the expression of defined alert in prometheus to avoid unnecessary errors There were some false alerts about volume_claim_capacity_high_utilization due to wrong formula used to determine the percentage of used capacity. Change-Id: I24afed7946f915e5e13f0ba759eca252c2598af9 --- prometheus/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/values.yaml b/prometheus/values.yaml index bd9a85a67..62036d4ea 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -1416,7 +1416,7 @@ conf: description: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} has a container terminated for more than 10 minutes' summary: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status' - alert: volume_claim_capacity_high_utilization - expr: (kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes) > 0.80 + expr: (kubelet_volume_stats_capacity_bytes / kubelet_volume_stats_used_bytes) < 1.25 for: 5m labels: severity: page