58 lines
1.8 KiB
YAML
58 lines
1.8 KiB
YAML
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ include "tempest-pushgateway.fullname" . }}
|
|
labels:
|
|
{{ include "tempest-pushgateway.labels" . | indent 4 }}
|
|
spec:
|
|
groups:
|
|
- name: tempest
|
|
rules:
|
|
- alert: TempestTestNotRunning
|
|
expr: |
|
|
time() - tempest_last_run_unixtime > 900
|
|
labels:
|
|
severity: P3
|
|
annotations:
|
|
summary: "[`{{`{{$labels.instance}}`}}`] Tempest not reporting"
|
|
description: >
|
|
Tempest has not reported in for over 15 minutes which means that the
|
|
tests are not running and the state of the cloud is unknown.
|
|
|
|
- alert: TempestTestFailure
|
|
expr: |
|
|
tempest_last_run_result{tempest_last_run_result="success"} != 1
|
|
labels:
|
|
severity: P5
|
|
annotations:
|
|
summary: "[`{{`{{$labels.instance}}`}}`] Tempest test failure"
|
|
description: >
|
|
The test `{{`{{$labels.instance}}`}}` has failed in it's most recent
|
|
run.
|
|
|
|
- alert: TempestTestFailure
|
|
for: 8m
|
|
expr: |
|
|
tempest_last_run_result{tempest_last_run_result="success"} != 1
|
|
labels:
|
|
severity: P4
|
|
annotations:
|
|
summary: "[`{{`{{$labels.instance}}`}}`] Tempest test failure"
|
|
description: >
|
|
The test `{{`{{$labels.instance}}`}}` has failed in it's most recent
|
|
run for 8 minutes.
|
|
|
|
- alert: TempestTestFailure
|
|
for: 13m
|
|
expr: |
|
|
tempest_last_run_result{tempest_last_run_result="success"} != 1
|
|
labels:
|
|
severity: P3
|
|
annotations:
|
|
summary: "[`{{`{{$labels.instance}}`}}`] Tempest test failure"
|
|
description: >
|
|
The test `{{`{{$labels.instance}}`}}` has failed in it's most recent
|
|
run for 13 minutes.
|