Browse Source

Add rally-exporter chart

Change-Id: I5a7c70e5d9602dedbf550217b7f02ace294d84f2
changes/75/750975/1
okozachenko 2 years ago
parent
commit
63915ca596
  1. 11
      charts/rally-exporter/Chart.yaml
  2. 45
      charts/rally-exporter/templates/_helpers.tpl
  3. 50
      charts/rally-exporter/templates/deployment.yaml
  4. 25
      charts/rally-exporter/templates/podmonitor.yaml
  5. 165
      charts/rally-exporter/templates/prometheusrule.yaml
  6. 66
      charts/rally-exporter/templates/secret.yaml
  7. 18
      charts/rally-exporter/values.yaml

11
charts/rally-exporter/Chart.yaml

@ -0,0 +1,11 @@
---
apiVersion: v1
name: rally-exporter
version: 0.1.1
description: Rally exporter for Prometheus
home: https://github.com/vexxhost/rally-exporter
maintainers:
- name: Mohammed Naser
email: mnaser@vexxhost.com
url: https://github.com/mnaser
appVersion: 0.0.1

45
charts/rally-exporter/templates/_helpers.tpl

@ -0,0 +1,45 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "rally-exporter.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "rally-exporter.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Generate basic labels
*/}}
{{- define "rally-exporter.labels" }}
app.kubernetes.io/name: {{ include "rally-exporter.fullname" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: metrics
app.kubernetes.io/part-of: rally
{{- if .Values.commonLabels }}
{{ toYaml .Values.commonLabels }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "rally-exporter.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}

50
charts/rally-exporter/templates/deployment.yaml

@ -0,0 +1,50 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "rally-exporter.fullname" . }}
labels:
{{- include "rally-exporter.labels" . | indent 4 }}
spec:
selector:
matchLabels:
{{- include "rally-exporter.labels" . | indent 6 }}
template:
metadata:
annotations:
checksum/config: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
labels:
{{- include "rally-exporter.labels" . | indent 8 }}
spec:
containers:
- name: rally-exporter
image: vexxhost/rally-exporter:latest
args:
- default
- /etc/rally/plan.yaml
volumeMounts:
- name: exporter-config
mountPath: /etc/rally
ports:
- name: metrics
containerPort: 9355
env:
- name: OS_CLIENT_CONFIG_FILE
value: /etc/rally/clouds.yaml
volumes:
- name: exporter-config
secret:
secretName: {{ include "rally-exporter.fullname" . }}
{{- with .Values.hostAliases }}
hostAliases:
{{ toYaml . | indent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{ toYaml . | indent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{ toYaml . | indent 8 }}
{{- end }}

25
charts/rally-exporter/templates/podmonitor.yaml

@ -0,0 +1,25 @@
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: {{ include "rally-exporter.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{ include "rally-exporter.labels" . | indent 4 }}
spec:
podMetricsEndpoints:
- interval: {{ .Values.podMonitor.interval }}
scrapeTimeout: {{ .Values.podMonitor.scrapeTimeout }}
port: metrics
relabelings:
- action: replace
regex: (.*)
replacement: default
targetLabel: instance
- action: labeldrop
regex: '^(container|endpoint|namespace|pod)$'
namespaceSelector:
any: true
selector:
matchLabels:
{{ include "rally-exporter.labels" . | indent 6 }}

165
charts/rally-exporter/templates/prometheusrule.yaml

@ -0,0 +1,165 @@
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ include "rally-exporter.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{ include "rally-exporter.labels" . | indent 4 }}
spec:
groups:
- name: rally
rules:
- alert: RallyTestFailed
expr: |
rally_task_passed != 1
labels:
severity: P5
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed"
description: >
The Rally task {{`{{$labels.title}}`}} has failed in it's most
recent run which means the cloud is currently seeing issues.
- alert: RallyTestFailed
for: 7m
expr: |
rally_task_passed != 1
labels:
severity: P4
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed"
description: >
The Rally task {{`{{$labels.title}}`}} has failed in it's most
recent run which means the cloud is currently seeing issues.
- alert: RallyTestFailed
for: 12m
expr: |
rally_task_passed != 1
labels:
severity: P3
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed"
description: >
The Rally task {{`{{$labels.title}}`}} has failed in it's most
recent run which means the cloud is currently seeing issues.
- alert: RallyTestFailed
for: 17m
expr: |
rally_task_passed != 1
labels:
severity: P2
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed"
description: >
The Rally task {{`{{$labels.title}}`}} has failed in it's most
recent run which means the cloud is currently seeing issues.
- alert: RallyNotReporting
for: 5m
expr: |
absent(rally_task_passed)
labels:
severity: P4
annotations:
summary: "[Rally] Not reporting"
description: >
Rally is not reporting any details at all for the past 5 minutes
which means that the service is down and it could likely be
masking issues with the cloud.
- alert: RallyNotReporting
for: 7m
expr: |
absent(rally_task_passed)
labels:
severity: P3
annotations:
summary: "[Rally] Not reporting"
description: >
Rally is not reporting any details at all for the past 7 minutes
which means that the service is down and it could likely be
masking issues with the cloud.
- alert: RallyNotRunning
expr: |
time() - rally_task_time > 900
labels:
severity: P3
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} not running"
description: >
The most recent Rally run for the {{`{{$labels.title}}`}} task
was {{`{{$value}}`}} seconds ago which implies that it is not
running properly.
- alert: RallyInstanceStatus
for: 15m
expr: |
count(openstack_nova_server_status{name=~"^s_rally.*"}) > 2
labels:
severity: P5
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances"
description: >
Rally has failed to cleanup {{`{{$value}}`}} instances which resulted
to an ERROR state.
- alert: RallyInstanceStatus
for: 15m
expr: |
count(openstack_nova_server_status{name=~"^s_rally.*"}) > 5
labels:
severity: P4
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances"
description: >
Rally has failed to cleanup {{`{{$value}}`}} instances which resulted
to an ERROR state.
- alert: RallyInstanceStatus
for: 15m
expr: |
count(openstack_nova_server_status{name=~"^s_rally.*"}) >= 10
labels:
severity: P3
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances"
description: >
Rally has failed to cleanup {{`{{$value}}`}} instances which resulted
to an ERROR state.
- alert: RallyVolumeStatus
for: 15m
expr: |
count(openstack_cinder_volume_status{name=~"^s_rally.*"}) > 2
labels:
severity: P5
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes"
description: >
Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted
to an ERROR state.
- alert: RallyVolumeStatus
for: 15m
expr: |
count(openstack_cinder_volume_status{name=~"^s_rally.*"}) > 5
labels:
severity: P4
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes"
description: >
Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted
to an ERROR state.
- alert: RallyVolumeStatus
for: 15m
expr: |
count(openstack_cinder_volume_status{name=~"^s_rally.*"}) >= 10
labels:
severity: P3
annotations:
summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes"
description: >
Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted
to an ERROR state.

66
charts/rally-exporter/templates/secret.yaml

@ -0,0 +1,66 @@
---
apiVersion: v1
kind: Secret
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "rally-exporter.fullname" . }}
labels:
{{- include "rally-exporter.labels" . | indent 4 }}
stringData:
rally.conf: |
[database]
connection=sqlite:////home/rally/data/rally.db
[openstack]
nova_server_boot_timeout=90
clouds.yaml: |
clouds:
default:
auth:
auth_url: {{ .Values.cloud.auth_url }}
project_name: {{ .Values.cloud.project_name }}
tenant_name: {{ .Values.cloud.project_name }}
username: {{ .Values.cloud.username }}
password: {{ .Values.cloud.password }}
user_domain_name: {{ .Values.cloud.user_domain_name }}
project_domain_name: {{ .Values.cloud.project_domain_name }}
region_name: {{ .Values.cloud.region_name }}
interface: {{ .Values.cloud.interface }}
plan.yaml: |
---
version: 2
title: OpenStack Cloud Validation
description: |
This task runs a few synethic tests to ensure that the cloud is in a
functional state.
subtasks:
- title: keystone
scenario:
Authenticate.keystone: {}
runner:
constant:
times: 1
concurrency: 1
sla:
max_seconds_per_iteration: 5
failure_rate:
max: 0
- title: nova
scenario:
NovaServers.boot_server_from_volume_and_delete:
{{ if .Values.test.network }}
nics:
- net-name: {{ .Values.test.network }}
{{- end }}
volume_size: 20
flavor:
name: {{ .Values.test.flavor }}
image:
name: {{ .Values.test.image }}
runner:
constant:
times: 1
concurrency: 1
sla:
max_seconds_per_iteration: 90
failure_rate:
max: 0

18
charts/rally-exporter/values.yaml

@ -0,0 +1,18 @@
---
cloud:
auth_url:
region_name:
interface: internal
user_domain_name: Default
project_domain_name: Default
project_name: rally
username: rally
password:
test:
flavor:
image:
podMonitor:
interval: 1m
scrapeTimeout: 1m
Loading…
Cancel
Save