166 lines
5.3 KiB
YAML
Executable File
166 lines
5.3 KiB
YAML
Executable File
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: {{ include "rally-exporter.fullname" . }}
|
|
namespace: {{ .Release.Namespace }}
|
|
labels:
|
|
{{ include "rally-exporter.labels" . | indent 4 }}
|
|
spec:
|
|
groups:
|
|
- name: rally
|
|
rules:
|
|
- alert: RallyTestFailed
|
|
expr: |
|
|
rally_task_passed != 1
|
|
labels:
|
|
severity: P5
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed"
|
|
description: >
|
|
The Rally task {{`{{$labels.title}}`}} has failed in it's most
|
|
recent run which means the cloud is currently seeing issues.
|
|
- alert: RallyTestFailed
|
|
for: 7m
|
|
expr: |
|
|
rally_task_passed != 1
|
|
labels:
|
|
severity: P4
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed"
|
|
description: >
|
|
The Rally task {{`{{$labels.title}}`}} has failed in it's most
|
|
recent run which means the cloud is currently seeing issues.
|
|
- alert: RallyTestFailed
|
|
for: 12m
|
|
expr: |
|
|
rally_task_passed != 1
|
|
labels:
|
|
severity: P3
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed"
|
|
description: >
|
|
The Rally task {{`{{$labels.title}}`}} has failed in it's most
|
|
recent run which means the cloud is currently seeing issues.
|
|
- alert: RallyTestFailed
|
|
for: 17m
|
|
expr: |
|
|
rally_task_passed != 1
|
|
labels:
|
|
severity: P2
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed"
|
|
description: >
|
|
The Rally task {{`{{$labels.title}}`}} has failed in it's most
|
|
recent run which means the cloud is currently seeing issues.
|
|
|
|
- alert: RallyNotReporting
|
|
for: 5m
|
|
expr: |
|
|
absent(rally_task_passed)
|
|
labels:
|
|
severity: P4
|
|
annotations:
|
|
summary: "[Rally] Not reporting"
|
|
description: >
|
|
Rally is not reporting any details at all for the past 5 minutes
|
|
which means that the service is down and it could likely be
|
|
masking issues with the cloud.
|
|
|
|
- alert: RallyNotReporting
|
|
for: 7m
|
|
expr: |
|
|
absent(rally_task_passed)
|
|
labels:
|
|
severity: P3
|
|
annotations:
|
|
summary: "[Rally] Not reporting"
|
|
description: >
|
|
Rally is not reporting any details at all for the past 7 minutes
|
|
which means that the service is down and it could likely be
|
|
masking issues with the cloud.
|
|
|
|
- alert: RallyNotRunning
|
|
expr: |
|
|
time() - rally_task_time > 900
|
|
labels:
|
|
severity: P3
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} not running"
|
|
description: >
|
|
The most recent Rally run for the {{`{{$labels.title}}`}} task
|
|
was {{`{{$value}}`}} seconds ago which implies that it is not
|
|
running properly.
|
|
|
|
- alert: RallyInstanceStatus
|
|
for: 15m
|
|
expr: |
|
|
count(openstack_nova_server_status{name=~"^s_rally.*"}) > 2
|
|
labels:
|
|
severity: P5
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances"
|
|
description: >
|
|
Rally has failed to cleanup {{`{{$value}}`}} instances which resulted
|
|
to an ERROR state.
|
|
|
|
- alert: RallyInstanceStatus
|
|
for: 15m
|
|
expr: |
|
|
count(openstack_nova_server_status{name=~"^s_rally.*"}) > 5
|
|
labels:
|
|
severity: P4
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances"
|
|
description: >
|
|
Rally has failed to cleanup {{`{{$value}}`}} instances which resulted
|
|
to an ERROR state.
|
|
|
|
- alert: RallyInstanceStatus
|
|
for: 15m
|
|
expr: |
|
|
count(openstack_nova_server_status{name=~"^s_rally.*"}) >= 10
|
|
labels:
|
|
severity: P3
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances"
|
|
description: >
|
|
Rally has failed to cleanup {{`{{$value}}`}} instances which resulted
|
|
to an ERROR state.
|
|
|
|
- alert: RallyVolumeStatus
|
|
for: 15m
|
|
expr: |
|
|
count(openstack_cinder_volume_status{name=~"^s_rally.*"}) > 2
|
|
labels:
|
|
severity: P5
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes"
|
|
description: >
|
|
Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted
|
|
to an ERROR state.
|
|
|
|
- alert: RallyVolumeStatus
|
|
for: 15m
|
|
expr: |
|
|
count(openstack_cinder_volume_status{name=~"^s_rally.*"}) > 5
|
|
labels:
|
|
severity: P4
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes"
|
|
description: >
|
|
Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted
|
|
to an ERROR state.
|
|
|
|
- alert: RallyVolumeStatus
|
|
for: 15m
|
|
expr: |
|
|
count(openstack_cinder_volume_status{name=~"^s_rally.*"}) >= 10
|
|
labels:
|
|
severity: P3
|
|
annotations:
|
|
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes"
|
|
description: >
|
|
Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted
|
|
to an ERROR state.
|