Browse Source

Add dell-exporter

Change-Id: I55877064e5ba72abae8037578b1b50f51a67e518
changes/70/746370/1
Mohammed Naser 1 year ago
parent
commit
8b7783f156
  1. 11
      charts/dell-exporter/Chart.yaml
  2. 45
      charts/dell-exporter/templates/_helpers.tpl
  3. 43
      charts/dell-exporter/templates/daemonset.yaml
  4. 111
      charts/dell-exporter/templates/prometheusrule.yaml
  5. 16
      charts/dell-exporter/templates/service.yaml
  6. 25
      charts/dell-exporter/templates/servicemonitor.yaml
  7. 2
      charts/dell-exporter/values.yaml

11
charts/dell-exporter/Chart.yaml

@ -0,0 +1,11 @@
---
apiVersion: v1
name: dell-exporter
version: 0.1.1
description: Dell exporter for Prometheus
home: https://github.com/vexxhost/helm-charts
maintainers:
- name: Mohammed Naser
email: mnaser@vexxhost.com
url: https://github.com/mnaser
appVersion: v1.4.0

45
charts/dell-exporter/templates/_helpers.tpl

@ -0,0 +1,45 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "dell-exporter.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "dell-exporter.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Generate basic labels
*/}}
{{- define "dell-exporter.labels" }}
app.kubernetes.io/name: {{ include "dell-exporter.fullname" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: metrics
app.kubernetes.io/part-of: hardware
{{- if .Values.commonLabels }}
{{ toYaml .Values.commonLabels }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "dell-exporter.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}

43
charts/dell-exporter/templates/daemonset.yaml

@ -0,0 +1,43 @@
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "dell-exporter.fullname" . }}
labels:
{{ include "dell-exporter.labels" . | indent 4 }}
spec:
selector:
matchLabels:
{{ include "dell-exporter.labels" . | indent 6 }}
template:
metadata:
labels:
{{ include "dell-exporter.labels" . | indent 8 }}
spec:
containers:
- name: dell-exporter
image: {{ .Values.image }}
ports:
- name: metrics
containerPort: 9137
resources:
limits:
cpu: 6000m
memory: 512Mi
requests:
cpu: 500m
memory: 512Mi
securityContext:
privileged: true
{{- with $.Values.tolerations }}
tolerations:
{{ toYaml . | indent 8 }}
{{- end }}
{{- with $.Values.nodeSelector }}
nodeSelector:
{{ toYaml . | indent 8 }}
{{- end }}
updateStrategy:
rollingUpdate:
maxUnavailable: 10%

111
charts/dell-exporter/templates/prometheusrule.yaml

@ -0,0 +1,111 @@
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "dell-exporter.fullname" . }}
labels:
{{ include "dell-exporter.labels" . | indent 4 }}
spec:
groups:
- name: dell
rules:
- alert: DellSystemStatus
expr: |
dell_hw_system_status == 2
for: 1m
labels:
severity: P4
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] System warning"
description: >
The host `{{`{{$labels.instance}}`}}` is reporting a non-critical status,
please investigate. If this is the only firing alert, please add
another one to handle the specific failure.
- alert: DellSystemStatus
expr: |
dell_hw_system_status == 1
for: 1m
labels:
severity: P3
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] System failure"
description: >
The host `{{`{{$labels.instance}}`}}` is reporting a critical status,
please investigate. If this is the only firing alert, please add
another one to handle the specific failure.
- alert: DellFanFailure
expr: |
dell_hw_chassis_fan_status != 0
for: 1m
labels:
severity: P4
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] Fan failure"
description: >
The `{{`{{$labels.fan}}`}}` on host `{{`{{$labels.instance}}`}}` is reporting
a failure, please replace it.
- alert: DellMemoryFailure
expr: |
dell_hw_chassis_memory_status != 0
for: 1m
labels:
severity: P4
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] Memory failure"
description: >
The `{{`{{$labels.memory}}`}}` on host `{{`{{$labels.instance}}`}}` is reporting
a failure, please replace it.
- alert: DellPowerSupplyFailure
expr: |
dell_hw_ps_status != 0
for: 1m
labels:
severity: P3
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] Power failure"
description: >
The PSU `{{`{{$labels.id}}`}}` on host `{{`{{$labels.instance}}`}}` is reporting
a failure, please investigate if power is lost or power supply
requires replacement.
- alert: DellHardwareStorageFailure
expr: |
dell_hw_storage_controller_status != 0
for: 1m
labels:
severity: P3
annotations:
summary: "[`{{`{{$labels.instance}}`}}`] Storage failure"
description: >
The storage controller `{{`{{$labels.id}}`}}` on host `{{`{{$labels.instance}}`}}`
is reporting a failure, please investigate inside the host and add the
appropriate alerting rules if no alerts except this one have fired.
- alert: DellChassisTemperature
expr: |
dell_hw_chassis_temps_reading > dell_hw_chassis_temps_max_warning
labels:
severity: P3
annotations:
summary: "[{{`{{$labels.instance}}`}}] {{`{{$labels.component}}`}}"
description: >
The component {{`{{$labels.component}}`}} is reporting temperatures
of {{`{{$value}}`}} which is above the critical reading for
{{`{{$labels.instance}}`}}.
- alert: DellChassisTemperature
expr: |
dell_hw_chassis_temps_reading > dell_hw_chassis_temps_max_warning
labels:
severity: P4
annotations:
summary: "[{{`{{$labels.instance}}`}}] {{`{{$labels.component}}`}}"
description: >
The component {{`{{$labels.component}}`}} is reporting temperatures
of {{`{{$value}}`}} which is above the warning reading for
{{`{{$labels.instance}}`}}.

16
charts/dell-exporter/templates/service.yaml

@ -0,0 +1,16 @@
---
apiVersion: v1
kind: Service
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "dell-exporter.fullname" . }}
labels:
{{ include "dell-exporter.labels" . | indent 4 }}
spec:
clusterIP: None
ports:
- name: metrics
port: 9137
targetPort: metrics
selector:
{{ include "dell-exporter.labels" . | indent 4 }}

25
charts/dell-exporter/templates/servicemonitor.yaml

@ -0,0 +1,25 @@
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "dell-exporter.fullname" . }}
labels:
{{ include "dell-exporter.labels" $ | indent 4 }}
spec:
endpoints:
- port: metrics
scrapeTimeout: 30s
relabelings:
- action: replace
regex: (.*)
replacment: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance
- action: labeldrop
regex: '^(endpoint|namespace|pod|service)$'
jobLabel: jobLabel
selector:
matchLabels:
{{ include "dell-exporter.labels" $ | indent 6 }}

2
charts/dell-exporter/values.yaml

@ -0,0 +1,2 @@
---
image: galexrt/dellhw_exporter@sha256:09ce2b8459dcd30a2bfdb6f3b2764dd461d09844d18a893ca5eb273c39eadce4
Loading…
Cancel
Save