fabc6822a0
This upgrade is needed in support of A100 GPU, kernel upgrade and bug 1948050. It eliminates the requirement to create nvidia specific runtimeclass prior to installing the charts by pre-installing the toolkit through toolkit- installer subchart. This commit has been tested with the following: driver: 470.57.02 toolkit: 1.7.1-ubi8 defaultRuntime: containerd Test Plan: PASS: Verify gpu-operator starts and adds nvidia.com/gpu to the node. PASS: Verify nvidia-toolkit is removed with helm override of global.toolkit_force_clean=true. PASS: Verify pods can access gpu device and nvidia tools to monitor the GPU. PASS: Verify pod can build and execute cuda sample code. PASS: Verify driver pod prints out warning when building on Low Latency kernel with helm override of: --set driver.env[0].name=IGNORE_PREEMPT_RT_PRESENCE Closes-Bug: 1948050 Signed-off-by: Babak Sarashki <babak.sarashki@windriver.com> Change-Id: I18dd2a0ab1adc6f9364314a22373aadc93cad27f
137 lines
5.3 KiB
Diff
137 lines
5.3 KiB
Diff
From 1094b6f1593ec454b3a6313ecf9fae53f8c66899 Mon Sep 17 00:00:00 2001
|
|
From: Babak Sarashki <babak.sarashki@windriver.com>
|
|
Date: Sat, 6 Mar 2021 00:22:40 +0000
|
|
Subject: [PATCH 1/2] deployments: setup configmap with assets for volumemounts
|
|
|
|
This feature allows inclusion of assets/ in the helm chart and their
|
|
export to the gpu-operator pod through configmap volumeMounts.
|
|
|
|
Signed-off-by: Babak Sarashki <babak.sarashki@windriver.com>
|
|
---
|
|
.../gpu-operator/templates/operator.yaml | 44 +++++++++++++++++++
|
|
.../templates/operator_configmap.yaml | 36 +++++++++++++++
|
|
deployments/gpu-operator/values.yaml | 2 +
|
|
3 files changed, 82 insertions(+)
|
|
create mode 100644 deployments/gpu-operator/templates/operator_configmap.yaml
|
|
|
|
diff --git a/deployments/gpu-operator/templates/operator.yaml b/deployments/gpu-operator/templates/operator.yaml
|
|
index 1d81f74..c97b4b1 100644
|
|
--- a/deployments/gpu-operator/templates/operator.yaml
|
|
+++ b/deployments/gpu-operator/templates/operator.yaml
|
|
@@ -49,6 +49,44 @@ spec:
|
|
- name: host-os-release
|
|
mountPath: "/host-etc/os-release"
|
|
readOnly: true
|
|
+
|
|
+ {{- if eq .Values.operator.include_assets "include_assets" }}
|
|
+ {{- range $path, $_ := .Files.Glob "assets/gpu-feature-discovery/*" }}
|
|
+ - name: assets
|
|
+ mountPath: {{ printf "/opt/gpu-operator/gpu-feature-discovery/%s" (base $path) }}
|
|
+ subPath: {{ printf "gfd_%s" (base $path) }}
|
|
+ {{- end }}
|
|
+
|
|
+ {{- range $path, $_ := .Files.Glob "assets/state-container-toolkit/*" }}
|
|
+ - name: assets
|
|
+ mountPath: {{ printf "/opt/gpu-operator/state-container-toolkit/%s" (base $path) }}
|
|
+ subPath: {{ printf "state_container_toolkit_%s" (base $path) }}
|
|
+ {{- end }}
|
|
+
|
|
+ {{- range $path, $_ := .Files.Glob "assets/state-device-plugin/*" }}
|
|
+ - name: assets
|
|
+ mountPath: {{ printf "/opt/gpu-operator/state-device-plugin/%s" (base $path) }}
|
|
+ subPath: {{ printf "state_device_%s" (base $path) }}
|
|
+ {{- end }}
|
|
+
|
|
+ {{- range $path, $_ := .Files.Glob "assets/state-device-plugin-validation/*" }}
|
|
+ - name: assets
|
|
+ mountPath: {{ printf "/opt/gpu-operator/state-device-plugin-validation/%s" (base $path) }}
|
|
+ subPath: {{ printf "state_device_validation_%s" (base $path) }}
|
|
+ {{- end }}
|
|
+
|
|
+ {{- range $path, $_ := .Files.Glob "assets/state-driver/*" }}
|
|
+ - name: assets
|
|
+ mountPath: {{ printf "/opt/gpu-operator/state-driver/%s" (base $path) }}
|
|
+ subPath: {{ printf "state_driver_%s" (base $path) }}
|
|
+ {{- end }}
|
|
+
|
|
+ {{- range $path, $_ := .Files.Glob "assets/state-monitoring/*" }}
|
|
+ - name: assets
|
|
+ mountPath: {{ printf "/opt/gpu-operator/state-monitoring/%s" (base $path) }}
|
|
+ subPath: {{ printf "state_monitor_%s" (base $path) }}
|
|
+ {{- end }}
|
|
+ {{- end }}
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
@@ -72,6 +110,12 @@ spec:
|
|
- name: host-os-release
|
|
hostPath:
|
|
path: "/etc/os-release"
|
|
+ {{- if eq .Values.operator.include_assets "include_assets" }}
|
|
+ - name: assets
|
|
+ configMap:
|
|
+ name: operator-configmap
|
|
+ {{- end }}
|
|
+
|
|
{{- with .Values.operator.nodeSelector }}
|
|
nodeSelector:
|
|
{{- toYaml . | nindent 8 }}
|
|
diff --git a/deployments/gpu-operator/templates/operator_configmap.yaml b/deployments/gpu-operator/templates/operator_configmap.yaml
|
|
new file mode 100644
|
|
index 0000000..61f366e
|
|
--- /dev/null
|
|
+++ b/deployments/gpu-operator/templates/operator_configmap.yaml
|
|
@@ -0,0 +1,36 @@
|
|
+{{- if eq .Values.operator.include_assets "include_assets" }}
|
|
+apiVersion: v1
|
|
+kind: ConfigMap
|
|
+metadata:
|
|
+ name: operator-configmap
|
|
+data:
|
|
+{{- range $path, $_ := .Files.Glob "assets/gpu-feature-discovery/*" }}
|
|
+{{ printf "gfd_%s" (base $path) | indent 2 }}: |-
|
|
+{{ $.Files.Get $path | indent 4 }}
|
|
+{{- end }}
|
|
+
|
|
+{{- range $path, $_ := .Files.Glob "assets/state-container-toolkit/*" }}
|
|
+{{ printf "state_container_toolkit_%s" (base $path) | indent 2 }}: |-
|
|
+{{ $.Files.Get $path | indent 4 }}
|
|
+{{- end }}
|
|
+
|
|
+{{- range $path, $_ := .Files.Glob "assets/state-device-plugin/*" }}
|
|
+{{ printf "state_device_%s" (base $path) | indent 2 }}: |-
|
|
+{{ $.Files.Get $path | indent 4 }}
|
|
+{{- end }}
|
|
+
|
|
+{{- range $path, $_ := .Files.Glob "assets/state-device-plugin-validation/*" }}
|
|
+{{ printf "state_device_validation_%s" (base $path) | indent 2 }}: |-
|
|
+{{ $.Files.Get $path | indent 4 }}
|
|
+{{- end }}
|
|
+
|
|
+{{- range $path, $_ := .Files.Glob "assets/state-driver/*" }}
|
|
+{{ printf "state_driver_%s" (base $path) | indent 2 }}: |-
|
|
+{{ $.Files.Get $path | indent 4 }}
|
|
+{{- end }}
|
|
+
|
|
+{{- range $path, $_ := .Files.Glob "assets/state-monitoring/*" }}
|
|
+{{ printf "state_monitor_%s" (base $path) | indent 2 }}: |-
|
|
+{{ $.Files.Get $path | indent 4 }}
|
|
+{{- end }}
|
|
+{{- end }}
|
|
diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml
|
|
index 78a4757..6689636 100644
|
|
--- a/deployments/gpu-operator/values.yaml
|
|
+++ b/deployments/gpu-operator/values.yaml
|
|
@@ -70,6 +70,8 @@ operator:
|
|
values: [""]
|
|
logging:
|
|
timeEncoding: epoch
|
|
+ # Set "include_assets" true to include assets/gpu-operator with the helm chart
|
|
+ include_assets: ""
|
|
resources:
|
|
limits:
|
|
cpu: 500m
|
|
--
|
|
2.17.1
|
|
|