From d813ce95f268650952a74cca373782c716d5b5ee Mon Sep 17 00:00:00 2001 From: Thiago Miranda Date: Tue, 12 Nov 2024 16:31:22 -0300 Subject: [PATCH] Optimize FEC operator probe values This commit adds the ability to customize the liveness and readiness probe values of the FEC Operator DaemonSet, allowing them to be increased in high CPU demand scenarios. Test Plan: PASS: Install the application and override the values of the probe environment variables, then verify that both the daemonset and the controller are respecting the changes. PASS: Validate basic application lifecycle operations: upload/apply/remove/delete. Closes-Bug: #2087998 Change-Id: I2323aa947637f8e49bf3a18acdd28a6a157edf40 Signed-off-by: Thiago Miranda --- .../templates/deployment.yaml | 56 +++++++++++++++++-- .../sriov-fec-operator/values.yaml | 21 +++++++ .../sriov-fec-daemon.stable_docker_image | 3 +- .../sriov-fec-labeler.stable_docker_image | 3 +- .../sriov-fec-operator.stable_docker_image | 3 +- ...on-for-readiness-and-liveness-values.patch | 51 +++++++++++++++++ 6 files changed, 130 insertions(+), 7 deletions(-) create mode 100644 sriov-fec-operator-images/files/0003-Customization-for-readiness-and-liveness-values.patch diff --git a/helm-charts/custom/sriov-fec-operator-helm/sriov-fec-operator-helm/sriov-fec-operator/templates/deployment.yaml b/helm-charts/custom/sriov-fec-operator-helm/sriov-fec-operator-helm/sriov-fec-operator/templates/deployment.yaml index f744ca2..56c8a86 100644 --- a/helm-charts/custom/sriov-fec-operator-helm/sriov-fec-operator-helm/sriov-fec-operator/templates/deployment.yaml +++ b/helm-charts/custom/sriov-fec-operator-helm/sriov-fec-operator-helm/sriov-fec-operator/templates/deployment.yaml @@ -43,6 +43,48 @@ spec: value: {{.Values.env.SRIOV_FEC_LABELER_IMAGE}} - name: SRIOV_FEC_NETWORK_DEVICE_PLUGIN_IMAGE value: {{.Values.env.SRIOV_FEC_NETWORK_DEVICE_PLUGIN_IMAGE}} + - name: SRIOV_FEC_METRIC_GATHER_INTERVAL + value: "{{.Values.env.SRIOV_FEC_METRIC_GATHER_INTERVAL}}" + - name: SRIOV_FEC_DAEMON_LIVENESS_INITIAL_DELAY_SECONDS + value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_INITIAL_DELAY_SECONDS}}" + - name: SRIOV_FEC_DAEMON_LIVENESS_PERIOD_SECONDS + value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_PERIOD_SECONDS}}" + - name: SRIOV_FEC_DAEMON_LIVENESS_FAILURE_THRESHOLD + value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_FAILURE_THRESHOLD}}" + - name: SRIOV_FEC_DAEMON_LIVENESS_SUCCESS_THRESHOLD + value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_SUCCESS_THRESHOLD}}" + - name: SRIOV_FEC_DAEMON_LIVENESS_TIMEOUT_SECONDS + value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_TIMEOUT_SECONDS}}" + - name: SRIOV_FEC_DAEMON_READINESS_INITIAL_DELAY_SECONDS + value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_INITIAL_DELAY_SECONDS}}" + - name: SRIOV_FEC_DAEMON_READINESS_PERIOD_SECONDS + value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_PERIOD_SECONDS}}" + - name: SRIOV_FEC_DAEMON_READINESS_FAILURE_THRESHOLD + value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_FAILURE_THRESHOLD}}" + - name: SRIOV_FEC_DAEMON_READINESS_SUCCESS_THRESHOLD + value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_SUCCESS_THRESHOLD}}" + - name: SRIOV_FEC_DAEMON_READINESS_TIMEOUT_SECONDS + value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_TIMEOUT_SECONDS}}" + - name: SRIOV_FEC_MANAGER_LIVENESS_INITIAL_DELAY_SECONDS + value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_INITIAL_DELAY_SECONDS}}" + - name: SRIOV_FEC_MANAGER_LIVENESS_PERIOD_SECONDS + value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_PERIOD_SECONDS}}" + - name: SRIOV_FEC_MANAGER_LIVENESS_FAILURE_THRESHOLD + value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_FAILURE_THRESHOLD}}" + - name: SRIOV_FEC_MANAGER_LIVENESS_SUCCESS_THRESHOLD + value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_SUCCESS_THRESHOLD}}" + - name: SRIOV_FEC_MANAGER_LIVENESS_TIMEOUT_SECONDS + value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_TIMEOUT_SECONDS}}" + - name: SRIOV_FEC_MANAGER_READINESS_INITIAL_DELAY_SECONDS + value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_INITIAL_DELAY_SECONDS}}" + - name: SRIOV_FEC_MANAGER_READINESS_PERIOD_SECONDS + value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_PERIOD_SECONDS}}" + - name: SRIOV_FEC_MANAGER_READINESS_FAILURE_THRESHOLD + value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_FAILURE_THRESHOLD}}" + - name: SRIOV_FEC_MANAGER_READINESS_SUCCESS_THRESHOLD + value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_SUCCESS_THRESHOLD}}" + - name: SRIOV_FEC_MANAGER_READINESS_TIMEOUT_SECONDS + value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_TIMEOUT_SECONDS}}" - name: SRIOV_FEC_NAMESPACE valueFrom: fieldRef: @@ -61,8 +103,11 @@ spec: httpGet: path: /healthz port: 8081 - initialDelaySeconds: 15 - periodSeconds: 20 + initialDelaySeconds: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_INITIAL_DELAY_SECONDS }} + periodSeconds: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_PERIOD_SECONDS }} + failureThreshold: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_FAILURE_THRESHOLD }} + successThreshold: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_SUCCESS_THRESHOLD }} + timeoutSeconds: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_TIMEOUT_SECONDS }} image: "{{ .Values.manager.image.repository}}:{{ .Values.manager.image.tag}}" name: manager ports: @@ -73,8 +118,11 @@ spec: httpGet: path: /readyz port: 8081 - initialDelaySeconds: 5 - periodSeconds: 10 + initialDelaySeconds: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_INITIAL_DELAY_SECONDS }} + periodSeconds: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_PERIOD_SECONDS }} + failureThreshold: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_FAILURE_THRESHOLD }} + successThreshold: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_SUCCESS_THRESHOLD }} + timeoutSeconds: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_TIMEOUT_SECONDS }} resources: limits: cpu: 200m diff --git a/helm-charts/custom/sriov-fec-operator-helm/sriov-fec-operator-helm/sriov-fec-operator/values.yaml b/helm-charts/custom/sriov-fec-operator-helm/sriov-fec-operator-helm/sriov-fec-operator/values.yaml index 570eb70..6759ba1 100644 --- a/helm-charts/custom/sriov-fec-operator-helm/sriov-fec-operator-helm/sriov-fec-operator/values.yaml +++ b/helm-charts/custom/sriov-fec-operator-helm/sriov-fec-operator-helm/sriov-fec-operator/values.yaml @@ -21,6 +21,27 @@ env: SRIOV_FEC_VFIO_TOKEN: "" SRIOV_FEC_IMAGE_PULL_SECRET: "default-registry-key" SRIOV_FEC_DAEMON_IMAGE: "registry.local:9001/docker.io/starlingx/sriov-fec-daemon:stx.10.0-v2.9.0" + SRIOV_FEC_DAEMON_LIVENESS_INITIAL_DELAY_SECONDS: "15" + SRIOV_FEC_DAEMON_LIVENESS_PERIOD_SECONDS: "20" + SRIOV_FEC_DAEMON_LIVENESS_FAILURE_THRESHOLD: "3" + SRIOV_FEC_DAEMON_LIVENESS_SUCCESS_THRESHOLD: "1" + SRIOV_FEC_DAEMON_LIVENESS_TIMEOUT_SECONDS: "5" + SRIOV_FEC_DAEMON_READINESS_INITIAL_DELAY_SECONDS: "5" + SRIOV_FEC_DAEMON_READINESS_PERIOD_SECONDS: "10" + SRIOV_FEC_DAEMON_READINESS_FAILURE_THRESHOLD: "3" + SRIOV_FEC_DAEMON_READINESS_SUCCESS_THRESHOLD: "1" + SRIOV_FEC_DAEMON_READINESS_TIMEOUT_SECONDS: "5" + SRIOV_FEC_MANAGER_LIVENESS_INITIAL_DELAY_SECONDS: "15" + SRIOV_FEC_MANAGER_LIVENESS_PERIOD_SECONDS: "20" + SRIOV_FEC_MANAGER_LIVENESS_FAILURE_THRESHOLD: "3" + SRIOV_FEC_MANAGER_LIVENESS_SUCCESS_THRESHOLD: "1" + SRIOV_FEC_MANAGER_LIVENESS_TIMEOUT_SECONDS: "5" + SRIOV_FEC_MANAGER_READINESS_INITIAL_DELAY_SECONDS: "5" + SRIOV_FEC_MANAGER_READINESS_PERIOD_SECONDS: "10" + SRIOV_FEC_MANAGER_READINESS_FAILURE_THRESHOLD: "3" + SRIOV_FEC_MANAGER_READINESS_SUCCESS_THRESHOLD: "1" + SRIOV_FEC_MANAGER_READINESS_TIMEOUT_SECONDS: "5" + SRIOV_FEC_METRIC_GATHER_INTERVAL: "15s" SRIOV_FEC_LABELER_IMAGE: "registry.local:9001/docker.io/starlingx/sriov-fec-labeler:stx.10.0-v2.9.0" SRIOV_FEC_NETWORK_DEVICE_PLUGIN_IMAGE: "registry.local:9001/ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin:v3.6.2" KUBE_RBAC_PROXY_IMAGE: "registry.local:9001/gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0" diff --git a/sriov-fec-operator-images/debian/sriov-fec-daemon.stable_docker_image b/sriov-fec-operator-images/debian/sriov-fec-daemon.stable_docker_image index 12f3af1..7c9bcae 100644 --- a/sriov-fec-operator-images/debian/sriov-fec-daemon.stable_docker_image +++ b/sriov-fec-operator-images/debian/sriov-fec-daemon.stable_docker_image @@ -3,7 +3,8 @@ LABEL=sriov-fec-daemon SOURCE_REPO=https://github.com/intel/sriov-fec-operator.git SOURCE_REF=471f355304105ecf7f558dd5dab7a7123064e4de SOURCE_PATCHES="../files/0001-Add-image-pull-secrets-for-stx-builds.patch \ - ../files/0002-kmod-version-fix.patch" + ../files/0002-kmod-version-fix.patch \ + ../files/0003-Customization-for-readiness-and-liveness-values.patch" COMMAND=bash SCRIPT=build-sriov-fec-operator-image.sh ARGS=daemon diff --git a/sriov-fec-operator-images/debian/sriov-fec-labeler.stable_docker_image b/sriov-fec-operator-images/debian/sriov-fec-labeler.stable_docker_image index ac9e987..2c02612 100644 --- a/sriov-fec-operator-images/debian/sriov-fec-labeler.stable_docker_image +++ b/sriov-fec-operator-images/debian/sriov-fec-labeler.stable_docker_image @@ -3,7 +3,8 @@ LABEL=sriov-fec-labeler SOURCE_REPO=https://github.com/intel/sriov-fec-operator.git SOURCE_REF=471f355304105ecf7f558dd5dab7a7123064e4de SOURCE_PATCHES="../files/0001-Add-image-pull-secrets-for-stx-builds.patch \ - ../files/0002-kmod-version-fix.patch" + ../files/0002-kmod-version-fix.patch \ + ../files/0003-Customization-for-readiness-and-liveness-values.patch" COMMAND=bash SCRIPT=build-sriov-fec-operator-image.sh ARGS=labeler diff --git a/sriov-fec-operator-images/debian/sriov-fec-operator.stable_docker_image b/sriov-fec-operator-images/debian/sriov-fec-operator.stable_docker_image index 0466391..ae6895c 100644 --- a/sriov-fec-operator-images/debian/sriov-fec-operator.stable_docker_image +++ b/sriov-fec-operator-images/debian/sriov-fec-operator.stable_docker_image @@ -3,7 +3,8 @@ LABEL=sriov-fec-operator SOURCE_REPO=https://github.com/intel/sriov-fec-operator.git SOURCE_REF=471f355304105ecf7f558dd5dab7a7123064e4de SOURCE_PATCHES="../files/0001-Add-image-pull-secrets-for-stx-builds.patch \ - ../files/0002-kmod-version-fix.patch" + ../files/0002-kmod-version-fix.patch \ + ../files/0003-Customization-for-readiness-and-liveness-values.patch" COMMAND=bash SCRIPT=build-sriov-fec-operator-image.sh ARGS=operator diff --git a/sriov-fec-operator-images/files/0003-Customization-for-readiness-and-liveness-values.patch b/sriov-fec-operator-images/files/0003-Customization-for-readiness-and-liveness-values.patch new file mode 100644 index 0000000..e0f20df --- /dev/null +++ b/sriov-fec-operator-images/files/0003-Customization-for-readiness-and-liveness-values.patch @@ -0,0 +1,51 @@ +From 6a143b24b9cf4626f1dde53bab9876d9b1823515 Mon Sep 17 00:00:00 2001 +From: Thiago Miranda +Date: Tue, 12 Nov 2024 15:02:30 -0300 +Subject: [PATCH] Customization for readiness and liveness values + +Signed-off-by: Thiago Antonio Miranda +--- + assets/300-daemon.yaml | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/assets/300-daemon.yaml b/assets/300-daemon.yaml +index 03dc090..74f6967 100644 +--- a/assets/300-daemon.yaml ++++ b/assets/300-daemon.yaml +@@ -196,14 +196,20 @@ data: + httpGet: + path: /healthz + port: 8081 +- initialDelaySeconds: 15 +- periodSeconds: 20 ++ initialDelaySeconds: {{ .SRIOV_FEC_DAEMON_LIVENESS_INITIAL_DELAY_SECONDS }} ++ periodSeconds: {{ .SRIOV_FEC_DAEMON_LIVENESS_PERIOD_SECONDS }} ++ failureThreshold: {{ .SRIOV_FEC_DAEMON_LIVENESS_FAILURE_THRESHOLD }} ++ successThreshold: {{ .SRIOV_FEC_DAEMON_LIVENESS_SUCCESS_THRESHOLD }} ++ timeoutSeconds: {{ .SRIOV_FEC_DAEMON_LIVENESS_TIMEOUT_SECONDS }} + readinessProbe: + httpGet: + path: /readyz + port: 8081 +- initialDelaySeconds: 5 +- periodSeconds: 10 ++ initialDelaySeconds: {{ .SRIOV_FEC_DAEMON_READINESS_INITIAL_DELAY_SECONDS }} ++ periodSeconds: {{ .SRIOV_FEC_DAEMON_READINESS_PERIOD_SECONDS }} ++ failureThreshold: {{ .SRIOV_FEC_DAEMON_READINESS_FAILURE_THRESHOLD }} ++ successThreshold: {{ .SRIOV_FEC_DAEMON_READINESS_SUCCESS_THRESHOLD }} ++ timeoutSeconds: {{ .SRIOV_FEC_DAEMON_READINESS_TIMEOUT_SECONDS }} + ports: + - containerPort: 8080 + name: bbdevconfig +@@ -245,6 +251,8 @@ data: + value: "90" + - name: LEASE_DURATION_SECONDS + value: "600" ++ - name: SRIOV_FEC_METRIC_GATHER_INTERVAL ++ value: {{ .SRIOV_FEC_METRIC_GATHER_INTERVAL }} + securityContext: + readOnlyRootFilesystem: true + privileged: true +-- +2.34.1 +