7 changed files with 380 additions and 0 deletions
@ -0,0 +1,11 @@
|
||||
--- |
||||
apiVersion: v1 |
||||
name: rally-exporter |
||||
version: 0.1.1 |
||||
description: Rally exporter for Prometheus |
||||
home: https://github.com/vexxhost/rally-exporter |
||||
maintainers: |
||||
- name: Mohammed Naser |
||||
email: mnaser@vexxhost.com |
||||
url: https://github.com/mnaser |
||||
appVersion: 0.0.1 |
@ -0,0 +1,45 @@
|
||||
{{/* vim: set filetype=mustache: */}} |
||||
{{/* |
||||
Expand the name of the chart. |
||||
*/}} |
||||
{{- define "rally-exporter.name" -}} |
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} |
||||
{{- end -}} |
||||
|
||||
{{/* |
||||
Create a default fully qualified app name. |
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). |
||||
If release name contains chart name it will be used as a full name. |
||||
*/}} |
||||
{{- define "rally-exporter.fullname" -}} |
||||
{{- if .Values.fullnameOverride -}} |
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} |
||||
{{- else -}} |
||||
{{- $name := default .Chart.Name .Values.nameOverride -}} |
||||
{{- if contains $name .Release.Name -}} |
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" -}} |
||||
{{- else -}} |
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} |
||||
{{- end -}} |
||||
{{- end -}} |
||||
{{- end -}} |
||||
|
||||
{{/* |
||||
Generate basic labels |
||||
*/}} |
||||
{{- define "rally-exporter.labels" }} |
||||
app.kubernetes.io/name: {{ include "rally-exporter.fullname" . }} |
||||
app.kubernetes.io/instance: {{ .Release.Name }} |
||||
app.kubernetes.io/component: metrics |
||||
app.kubernetes.io/part-of: rally |
||||
{{- if .Values.commonLabels }} |
||||
{{ toYaml .Values.commonLabels }} |
||||
{{- end }} |
||||
{{- end }} |
||||
|
||||
{{/* |
||||
Create chart name and version as used by the chart label. |
||||
*/}} |
||||
{{- define "rally-exporter.chart" -}} |
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} |
||||
{{- end -}} |
@ -0,0 +1,50 @@
|
||||
--- |
||||
apiVersion: apps/v1 |
||||
kind: Deployment |
||||
metadata: |
||||
namespace: {{ .Release.Namespace }} |
||||
name: {{ include "rally-exporter.fullname" . }} |
||||
labels: |
||||
{{- include "rally-exporter.labels" . | indent 4 }} |
||||
spec: |
||||
selector: |
||||
matchLabels: |
||||
{{- include "rally-exporter.labels" . | indent 6 }} |
||||
template: |
||||
metadata: |
||||
annotations: |
||||
checksum/config: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }} |
||||
labels: |
||||
{{- include "rally-exporter.labels" . | indent 8 }} |
||||
spec: |
||||
containers: |
||||
- name: rally-exporter |
||||
image: vexxhost/rally-exporter:latest |
||||
args: |
||||
- default |
||||
- /etc/rally/plan.yaml |
||||
volumeMounts: |
||||
- name: exporter-config |
||||
mountPath: /etc/rally |
||||
ports: |
||||
- name: metrics |
||||
containerPort: 9355 |
||||
env: |
||||
- name: OS_CLIENT_CONFIG_FILE |
||||
value: /etc/rally/clouds.yaml |
||||
volumes: |
||||
- name: exporter-config |
||||
secret: |
||||
secretName: {{ include "rally-exporter.fullname" . }} |
||||
{{- with .Values.hostAliases }} |
||||
hostAliases: |
||||
{{ toYaml . | indent 8 }} |
||||
{{- end }} |
||||
{{- with .Values.tolerations }} |
||||
tolerations: |
||||
{{ toYaml . | indent 8 }} |
||||
{{- end }} |
||||
{{- with .Values.nodeSelector }} |
||||
nodeSelector: |
||||
{{ toYaml . | indent 8 }} |
||||
{{- end }} |
@ -0,0 +1,25 @@
|
||||
--- |
||||
apiVersion: monitoring.coreos.com/v1 |
||||
kind: PodMonitor |
||||
metadata: |
||||
name: {{ include "rally-exporter.fullname" . }} |
||||
namespace: {{ .Release.Namespace }} |
||||
labels: |
||||
{{ include "rally-exporter.labels" . | indent 4 }} |
||||
spec: |
||||
podMetricsEndpoints: |
||||
- interval: {{ .Values.podMonitor.interval }} |
||||
scrapeTimeout: {{ .Values.podMonitor.scrapeTimeout }} |
||||
port: metrics |
||||
relabelings: |
||||
- action: replace |
||||
regex: (.*) |
||||
replacement: default |
||||
targetLabel: instance |
||||
- action: labeldrop |
||||
regex: '^(container|endpoint|namespace|pod)$' |
||||
namespaceSelector: |
||||
any: true |
||||
selector: |
||||
matchLabels: |
||||
{{ include "rally-exporter.labels" . | indent 6 }} |
@ -0,0 +1,165 @@
|
||||
--- |
||||
apiVersion: monitoring.coreos.com/v1 |
||||
kind: PrometheusRule |
||||
metadata: |
||||
name: {{ include "rally-exporter.fullname" . }} |
||||
namespace: {{ .Release.Namespace }} |
||||
labels: |
||||
{{ include "rally-exporter.labels" . | indent 4 }} |
||||
spec: |
||||
groups: |
||||
- name: rally |
||||
rules: |
||||
- alert: RallyTestFailed |
||||
expr: | |
||||
rally_task_passed != 1 |
||||
labels: |
||||
severity: P5 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed" |
||||
description: > |
||||
The Rally task {{`{{$labels.title}}`}} has failed in it's most |
||||
recent run which means the cloud is currently seeing issues. |
||||
- alert: RallyTestFailed |
||||
for: 7m |
||||
expr: | |
||||
rally_task_passed != 1 |
||||
labels: |
||||
severity: P4 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed" |
||||
description: > |
||||
The Rally task {{`{{$labels.title}}`}} has failed in it's most |
||||
recent run which means the cloud is currently seeing issues. |
||||
- alert: RallyTestFailed |
||||
for: 12m |
||||
expr: | |
||||
rally_task_passed != 1 |
||||
labels: |
||||
severity: P3 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed" |
||||
description: > |
||||
The Rally task {{`{{$labels.title}}`}} has failed in it's most |
||||
recent run which means the cloud is currently seeing issues. |
||||
- alert: RallyTestFailed |
||||
for: 17m |
||||
expr: | |
||||
rally_task_passed != 1 |
||||
labels: |
||||
severity: P2 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed" |
||||
description: > |
||||
The Rally task {{`{{$labels.title}}`}} has failed in it's most |
||||
recent run which means the cloud is currently seeing issues. |
||||
|
||||
- alert: RallyNotReporting |
||||
for: 5m |
||||
expr: | |
||||
absent(rally_task_passed) |
||||
labels: |
||||
severity: P4 |
||||
annotations: |
||||
summary: "[Rally] Not reporting" |
||||
description: > |
||||
Rally is not reporting any details at all for the past 5 minutes |
||||
which means that the service is down and it could likely be |
||||
masking issues with the cloud. |
||||
|
||||
- alert: RallyNotReporting |
||||
for: 7m |
||||
expr: | |
||||
absent(rally_task_passed) |
||||
labels: |
||||
severity: P3 |
||||
annotations: |
||||
summary: "[Rally] Not reporting" |
||||
description: > |
||||
Rally is not reporting any details at all for the past 7 minutes |
||||
which means that the service is down and it could likely be |
||||
masking issues with the cloud. |
||||
|
||||
- alert: RallyNotRunning |
||||
expr: | |
||||
time() - rally_task_time > 900 |
||||
labels: |
||||
severity: P3 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} not running" |
||||
description: > |
||||
The most recent Rally run for the {{`{{$labels.title}}`}} task |
||||
was {{`{{$value}}`}} seconds ago which implies that it is not |
||||
running properly. |
||||
|
||||
- alert: RallyInstanceStatus |
||||
for: 15m |
||||
expr: | |
||||
count(openstack_nova_server_status{name=~"^s_rally.*"}) > 2 |
||||
labels: |
||||
severity: P5 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances" |
||||
description: > |
||||
Rally has failed to cleanup {{`{{$value}}`}} instances which resulted |
||||
to an ERROR state. |
||||
|
||||
- alert: RallyInstanceStatus |
||||
for: 15m |
||||
expr: | |
||||
count(openstack_nova_server_status{name=~"^s_rally.*"}) > 5 |
||||
labels: |
||||
severity: P4 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances" |
||||
description: > |
||||
Rally has failed to cleanup {{`{{$value}}`}} instances which resulted |
||||
to an ERROR state. |
||||
|
||||
- alert: RallyInstanceStatus |
||||
for: 15m |
||||
expr: | |
||||
count(openstack_nova_server_status{name=~"^s_rally.*"}) >= 10 |
||||
labels: |
||||
severity: P3 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances" |
||||
description: > |
||||
Rally has failed to cleanup {{`{{$value}}`}} instances which resulted |
||||
to an ERROR state. |
||||
|
||||
- alert: RallyVolumeStatus |
||||
for: 15m |
||||
expr: | |
||||
count(openstack_cinder_volume_status{name=~"^s_rally.*"}) > 2 |
||||
labels: |
||||
severity: P5 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes" |
||||
description: > |
||||
Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted |
||||
to an ERROR state. |
||||
|
||||
- alert: RallyVolumeStatus |
||||
for: 15m |
||||
expr: | |
||||
count(openstack_cinder_volume_status{name=~"^s_rally.*"}) > 5 |
||||
labels: |
||||
severity: P4 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes" |
||||
description: > |
||||
Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted |
||||
to an ERROR state. |
||||
|
||||
- alert: RallyVolumeStatus |
||||
for: 15m |
||||
expr: | |
||||
count(openstack_cinder_volume_status{name=~"^s_rally.*"}) >= 10 |
||||
labels: |
||||
severity: P3 |
||||
annotations: |
||||
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes" |
||||
description: > |
||||
Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted |
||||
to an ERROR state. |
@ -0,0 +1,66 @@
|
||||
--- |
||||
apiVersion: v1 |
||||
kind: Secret |
||||
metadata: |
||||
namespace: {{ .Release.Namespace }} |
||||
name: {{ include "rally-exporter.fullname" . }} |
||||
labels: |
||||
{{- include "rally-exporter.labels" . | indent 4 }} |
||||
stringData: |
||||
rally.conf: | |
||||
[database] |
||||
connection=sqlite:////home/rally/data/rally.db |
||||
[openstack] |
||||
nova_server_boot_timeout=90 |
||||
clouds.yaml: | |
||||
clouds: |
||||
default: |
||||
auth: |
||||
auth_url: {{ .Values.cloud.auth_url }} |
||||
project_name: {{ .Values.cloud.project_name }} |
||||
tenant_name: {{ .Values.cloud.project_name }} |
||||
username: {{ .Values.cloud.username }} |
||||
password: {{ .Values.cloud.password }} |
||||
user_domain_name: {{ .Values.cloud.user_domain_name }} |
||||
project_domain_name: {{ .Values.cloud.project_domain_name }} |
||||
region_name: {{ .Values.cloud.region_name }} |
||||
interface: {{ .Values.cloud.interface }} |
||||
plan.yaml: | |
||||
--- |
||||
version: 2 |
||||
title: OpenStack Cloud Validation |
||||
description: | |
||||
This task runs a few synethic tests to ensure that the cloud is in a |
||||
functional state. |
||||
subtasks: |
||||
- title: keystone |
||||
scenario: |
||||
Authenticate.keystone: {} |
||||
runner: |
||||
constant: |
||||
times: 1 |
||||
concurrency: 1 |
||||
sla: |
||||
max_seconds_per_iteration: 5 |
||||
failure_rate: |
||||
max: 0 |
||||
- title: nova |
||||
scenario: |
||||
NovaServers.boot_server_from_volume_and_delete: |
||||
{{ if .Values.test.network }} |
||||
nics: |
||||
- net-name: {{ .Values.test.network }} |
||||
{{- end }} |
||||
volume_size: 20 |
||||
flavor: |
||||
name: {{ .Values.test.flavor }} |
||||
image: |
||||
name: {{ .Values.test.image }} |
||||
runner: |
||||
constant: |
||||
times: 1 |
||||
concurrency: 1 |
||||
sla: |
||||
max_seconds_per_iteration: 90 |
||||
failure_rate: |
||||
max: 0 |
Loading…
Reference in new issue