diff --git a/charts/dell-exporter/Chart.yaml b/charts/dell-exporter/Chart.yaml new file mode 100644 index 0000000..c71cdf5 --- /dev/null +++ b/charts/dell-exporter/Chart.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: v1 +name: dell-exporter +version: 0.1.1 +description: Dell exporter for Prometheus +home: https://github.com/vexxhost/helm-charts +maintainers: +- name: Mohammed Naser + email: mnaser@vexxhost.com + url: https://github.com/mnaser +appVersion: v1.4.0 diff --git a/charts/dell-exporter/templates/_helpers.tpl b/charts/dell-exporter/templates/_helpers.tpl new file mode 100644 index 0000000..c4f7f3f --- /dev/null +++ b/charts/dell-exporter/templates/_helpers.tpl @@ -0,0 +1,45 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "dell-exporter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "dell-exporter.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Generate basic labels +*/}} +{{- define "dell-exporter.labels" }} +app.kubernetes.io/name: {{ include "dell-exporter.fullname" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: metrics +app.kubernetes.io/part-of: hardware +{{- if .Values.commonLabels }} +{{ toYaml .Values.commonLabels }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "dell-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dell-exporter/templates/daemonset.yaml b/charts/dell-exporter/templates/daemonset.yaml new file mode 100644 index 0000000..efdafa7 --- /dev/null +++ b/charts/dell-exporter/templates/daemonset.yaml @@ -0,0 +1,43 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + namespace: {{ .Release.Namespace }} + name: {{ include "dell-exporter.fullname" . }} + labels: +{{ include "dell-exporter.labels" . | indent 4 }} +spec: + selector: + matchLabels: +{{ include "dell-exporter.labels" . | indent 6 }} + template: + metadata: + labels: +{{ include "dell-exporter.labels" . | indent 8 }} + spec: + containers: + - name: dell-exporter + image: {{ .Values.image }} + ports: + - name: metrics + containerPort: 9137 + resources: + limits: + cpu: 6000m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + securityContext: + privileged: true + {{- with $.Values.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with $.Values.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + updateStrategy: + rollingUpdate: + maxUnavailable: 10% diff --git a/charts/dell-exporter/templates/prometheusrule.yaml b/charts/dell-exporter/templates/prometheusrule.yaml new file mode 100644 index 0000000..c545a51 --- /dev/null +++ b/charts/dell-exporter/templates/prometheusrule.yaml @@ -0,0 +1,111 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + namespace: {{ .Release.Namespace }} + name: {{ include "dell-exporter.fullname" . }} + labels: +{{ include "dell-exporter.labels" . | indent 4 }} +spec: + groups: + - name: dell + rules: + - alert: DellSystemStatus + expr: | + dell_hw_system_status == 2 + for: 1m + labels: + severity: P4 + annotations: + summary: "[`{{`{{$labels.instance}}`}}`] System warning" + description: > + The host `{{`{{$labels.instance}}`}}` is reporting a non-critical status, + please investigate. If this is the only firing alert, please add + another one to handle the specific failure. + + - alert: DellSystemStatus + expr: | + dell_hw_system_status == 1 + for: 1m + labels: + severity: P3 + annotations: + summary: "[`{{`{{$labels.instance}}`}}`] System failure" + description: > + The host `{{`{{$labels.instance}}`}}` is reporting a critical status, + please investigate. If this is the only firing alert, please add + another one to handle the specific failure. + + - alert: DellFanFailure + expr: | + dell_hw_chassis_fan_status != 0 + for: 1m + labels: + severity: P4 + annotations: + summary: "[`{{`{{$labels.instance}}`}}`] Fan failure" + description: > + The `{{`{{$labels.fan}}`}}` on host `{{`{{$labels.instance}}`}}` is reporting + a failure, please replace it. + + - alert: DellMemoryFailure + expr: | + dell_hw_chassis_memory_status != 0 + for: 1m + labels: + severity: P4 + annotations: + summary: "[`{{`{{$labels.instance}}`}}`] Memory failure" + description: > + The `{{`{{$labels.memory}}`}}` on host `{{`{{$labels.instance}}`}}` is reporting + a failure, please replace it. + + - alert: DellPowerSupplyFailure + expr: | + dell_hw_ps_status != 0 + for: 1m + labels: + severity: P3 + annotations: + summary: "[`{{`{{$labels.instance}}`}}`] Power failure" + description: > + The PSU `{{`{{$labels.id}}`}}` on host `{{`{{$labels.instance}}`}}` is reporting + a failure, please investigate if power is lost or power supply + requires replacement. + + - alert: DellHardwareStorageFailure + expr: | + dell_hw_storage_controller_status != 0 + for: 1m + labels: + severity: P3 + annotations: + summary: "[`{{`{{$labels.instance}}`}}`] Storage failure" + description: > + The storage controller `{{`{{$labels.id}}`}}` on host `{{`{{$labels.instance}}`}}` + is reporting a failure, please investigate inside the host and add the + appropriate alerting rules if no alerts except this one have fired. + + - alert: DellChassisTemperature + expr: | + dell_hw_chassis_temps_reading > dell_hw_chassis_temps_max_warning + labels: + severity: P3 + annotations: + summary: "[{{`{{$labels.instance}}`}}] {{`{{$labels.component}}`}}" + description: > + The component {{`{{$labels.component}}`}} is reporting temperatures + of {{`{{$value}}`}} which is above the critical reading for + {{`{{$labels.instance}}`}}. + + - alert: DellChassisTemperature + expr: | + dell_hw_chassis_temps_reading > dell_hw_chassis_temps_max_warning + labels: + severity: P4 + annotations: + summary: "[{{`{{$labels.instance}}`}}] {{`{{$labels.component}}`}}" + description: > + The component {{`{{$labels.component}}`}} is reporting temperatures + of {{`{{$value}}`}} which is above the warning reading for + {{`{{$labels.instance}}`}}. diff --git a/charts/dell-exporter/templates/service.yaml b/charts/dell-exporter/templates/service.yaml new file mode 100644 index 0000000..3f3213f --- /dev/null +++ b/charts/dell-exporter/templates/service.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: v1 +kind: Service +metadata: + namespace: {{ .Release.Namespace }} + name: {{ include "dell-exporter.fullname" . }} + labels: +{{ include "dell-exporter.labels" . | indent 4 }} +spec: + clusterIP: None + ports: + - name: metrics + port: 9137 + targetPort: metrics + selector: +{{ include "dell-exporter.labels" . | indent 4 }} \ No newline at end of file diff --git a/charts/dell-exporter/templates/servicemonitor.yaml b/charts/dell-exporter/templates/servicemonitor.yaml new file mode 100644 index 0000000..8a161d5 --- /dev/null +++ b/charts/dell-exporter/templates/servicemonitor.yaml @@ -0,0 +1,25 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + namespace: {{ .Release.Namespace }} + name: {{ include "dell-exporter.fullname" . }} + labels: +{{ include "dell-exporter.labels" $ | indent 4 }} +spec: + endpoints: + - port: metrics + scrapeTimeout: 30s + relabelings: + - action: replace + regex: (.*) + replacment: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: instance + - action: labeldrop + regex: '^(endpoint|namespace|pod|service)$' + jobLabel: jobLabel + selector: + matchLabels: +{{ include "dell-exporter.labels" $ | indent 6 }} diff --git a/charts/dell-exporter/values.yaml b/charts/dell-exporter/values.yaml new file mode 100644 index 0000000..93b47b8 --- /dev/null +++ b/charts/dell-exporter/values.yaml @@ -0,0 +1,2 @@ +--- +image: galexrt/dellhw_exporter@sha256:09ce2b8459dcd30a2bfdb6f3b2764dd461d09844d18a893ca5eb273c39eadce4