Add dell-exporter

Change-Id: I55877064e5ba72abae8037578b1b50f51a67e518
This commit is contained in:
Mohammed Naser 2020-08-15 09:32:56 -04:00
parent 4ff3e37b8f
commit 8b7783f156
7 changed files with 253 additions and 0 deletions

View File

@ -0,0 +1,11 @@
---
apiVersion: v1
name: dell-exporter
version: 0.1.1
description: Dell exporter for Prometheus
home: https://github.com/vexxhost/helm-charts
maintainers:
- name: Mohammed Naser
email: mnaser@vexxhost.com
url: https://github.com/mnaser
appVersion: v1.4.0

View File

@ -0,0 +1,45 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "dell-exporter.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "dell-exporter.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Generate basic labels
*/}}
{{- define "dell-exporter.labels" }}
app.kubernetes.io/name: {{ include "dell-exporter.fullname" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: metrics
app.kubernetes.io/part-of: hardware
{{- if .Values.commonLabels }}
{{ toYaml .Values.commonLabels }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "dell-exporter.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}

View File

@ -0,0 +1,43 @@
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "dell-exporter.fullname" . }}
labels:
{{ include "dell-exporter.labels" . | indent 4 }}
spec:
selector:
matchLabels:
{{ include "dell-exporter.labels" . | indent 6 }}
template:
metadata:
labels:
{{ include "dell-exporter.labels" . | indent 8 }}
spec:
containers:
- name: dell-exporter
image: {{ .Values.image }}
ports:
- name: metrics
containerPort: 9137
resources:
limits:
cpu: 6000m
memory: 512Mi
requests:
cpu: 500m
memory: 512Mi
securityContext:
privileged: true
{{- with $.Values.tolerations }}
tolerations:
{{ toYaml . | indent 8 }}
{{- end }}
{{- with $.Values.nodeSelector }}
nodeSelector:
{{ toYaml . | indent 8 }}
{{- end }}
updateStrategy:
rollingUpdate:
maxUnavailable: 10%

View File

@ -0,0 +1,111 @@
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "dell-exporter.fullname" . }}
labels:
{{ include "dell-exporter.labels" . | indent 4 }}
spec:
groups:
- name: dell
rules:
- alert: DellSystemStatus
expr: |
dell_hw_system_status == 2
for: 1m
labels:
severity: P4
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] System warning"
description: >
The host `{{`{{$labels.instance}}`}}` is reporting a non-critical status,
please investigate. If this is the only firing alert, please add
another one to handle the specific failure.
- alert: DellSystemStatus
expr: |
dell_hw_system_status == 1
for: 1m
labels:
severity: P3
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] System failure"
description: >
The host `{{`{{$labels.instance}}`}}` is reporting a critical status,
please investigate. If this is the only firing alert, please add
another one to handle the specific failure.
- alert: DellFanFailure
expr: |
dell_hw_chassis_fan_status != 0
for: 1m
labels:
severity: P4
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] Fan failure"
description: >
The `{{`{{$labels.fan}}`}}` on host `{{`{{$labels.instance}}`}}` is reporting
a failure, please replace it.
- alert: DellMemoryFailure
expr: |
dell_hw_chassis_memory_status != 0
for: 1m
labels:
severity: P4
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] Memory failure"
description: >
The `{{`{{$labels.memory}}`}}` on host `{{`{{$labels.instance}}`}}` is reporting
a failure, please replace it.
- alert: DellPowerSupplyFailure
expr: |
dell_hw_ps_status != 0
for: 1m
labels:
severity: P3
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] Power failure"
description: >
The PSU `{{`{{$labels.id}}`}}` on host `{{`{{$labels.instance}}`}}` is reporting
a failure, please investigate if power is lost or power supply
requires replacement.
- alert: DellHardwareStorageFailure
expr: |
dell_hw_storage_controller_status != 0
for: 1m
labels:
severity: P3
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] Storage failure"
description: >
The storage controller `{{`{{$labels.id}}`}}` on host `{{`{{$labels.instance}}`}}`
is reporting a failure, please investigate inside the host and add the
appropriate alerting rules if no alerts except this one have fired.
- alert: DellChassisTemperature
expr: |
dell_hw_chassis_temps_reading > dell_hw_chassis_temps_max_warning
labels:
severity: P3
annotations:
summary: "[{{`{{$labels.instance}}`}}] {{`{{$labels.component}}`}}"
description: >
The component {{`{{$labels.component}}`}} is reporting temperatures
of {{`{{$value}}`}} which is above the critical reading for
{{`{{$labels.instance}}`}}.
- alert: DellChassisTemperature
expr: |
dell_hw_chassis_temps_reading > dell_hw_chassis_temps_max_warning
labels:
severity: P4
annotations:
summary: "[{{`{{$labels.instance}}`}}] {{`{{$labels.component}}`}}"
description: >
The component {{`{{$labels.component}}`}} is reporting temperatures
of {{`{{$value}}`}} which is above the warning reading for
{{`{{$labels.instance}}`}}.

View File

@ -0,0 +1,16 @@
---
apiVersion: v1
kind: Service
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "dell-exporter.fullname" . }}
labels:
{{ include "dell-exporter.labels" . | indent 4 }}
spec:
clusterIP: None
ports:
- name: metrics
port: 9137
targetPort: metrics
selector:
{{ include "dell-exporter.labels" . | indent 4 }}

View File

@ -0,0 +1,25 @@
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "dell-exporter.fullname" . }}
labels:
{{ include "dell-exporter.labels" $ | indent 4 }}
spec:
endpoints:
- port: metrics
scrapeTimeout: 30s
relabelings:
- action: replace
regex: (.*)
replacment: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance
- action: labeldrop
regex: '^(endpoint|namespace|pod|service)$'
jobLabel: jobLabel
selector:
matchLabels:
{{ include "dell-exporter.labels" $ | indent 6 }}

View File

@ -0,0 +1,2 @@
---
image: galexrt/dellhw_exporter@sha256:09ce2b8459dcd30a2bfdb6f3b2764dd461d09844d18a893ca5eb273c39eadce4