Optimize FEC operator probe values

This commit adds the ability to customize the liveness and readiness
probe  values  of  the FEC  Operator DaemonSet, allowing  them to be
increased in high CPU demand scenarios.

Test Plan:
PASS: Install the application and override the values of  the  probe
      environment  variables,  then verify  that both  the daemonset
      and the controller are respecting the changes.
PASS: Validate basic application lifecycle operations:
      upload/apply/remove/delete.

Closes-Bug: #2087998
Change-Id: I2323aa947637f8e49bf3a18acdd28a6a157edf40
Signed-off-by: Thiago Miranda <tmarques@windriver.com>
This commit is contained in:
Thiago Miranda 2024-11-12 16:31:22 -03:00
parent b1a4499ec2
commit d813ce95f2
6 changed files with 130 additions and 7 deletions

View File

@ -43,6 +43,48 @@ spec:
value: {{.Values.env.SRIOV_FEC_LABELER_IMAGE}} value: {{.Values.env.SRIOV_FEC_LABELER_IMAGE}}
- name: SRIOV_FEC_NETWORK_DEVICE_PLUGIN_IMAGE - name: SRIOV_FEC_NETWORK_DEVICE_PLUGIN_IMAGE
value: {{.Values.env.SRIOV_FEC_NETWORK_DEVICE_PLUGIN_IMAGE}} value: {{.Values.env.SRIOV_FEC_NETWORK_DEVICE_PLUGIN_IMAGE}}
- name: SRIOV_FEC_METRIC_GATHER_INTERVAL
value: "{{.Values.env.SRIOV_FEC_METRIC_GATHER_INTERVAL}}"
- name: SRIOV_FEC_DAEMON_LIVENESS_INITIAL_DELAY_SECONDS
value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_INITIAL_DELAY_SECONDS}}"
- name: SRIOV_FEC_DAEMON_LIVENESS_PERIOD_SECONDS
value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_PERIOD_SECONDS}}"
- name: SRIOV_FEC_DAEMON_LIVENESS_FAILURE_THRESHOLD
value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_FAILURE_THRESHOLD}}"
- name: SRIOV_FEC_DAEMON_LIVENESS_SUCCESS_THRESHOLD
value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_SUCCESS_THRESHOLD}}"
- name: SRIOV_FEC_DAEMON_LIVENESS_TIMEOUT_SECONDS
value: "{{.Values.env.SRIOV_FEC_DAEMON_LIVENESS_TIMEOUT_SECONDS}}"
- name: SRIOV_FEC_DAEMON_READINESS_INITIAL_DELAY_SECONDS
value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_INITIAL_DELAY_SECONDS}}"
- name: SRIOV_FEC_DAEMON_READINESS_PERIOD_SECONDS
value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_PERIOD_SECONDS}}"
- name: SRIOV_FEC_DAEMON_READINESS_FAILURE_THRESHOLD
value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_FAILURE_THRESHOLD}}"
- name: SRIOV_FEC_DAEMON_READINESS_SUCCESS_THRESHOLD
value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_SUCCESS_THRESHOLD}}"
- name: SRIOV_FEC_DAEMON_READINESS_TIMEOUT_SECONDS
value: "{{.Values.env.SRIOV_FEC_DAEMON_READINESS_TIMEOUT_SECONDS}}"
- name: SRIOV_FEC_MANAGER_LIVENESS_INITIAL_DELAY_SECONDS
value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_INITIAL_DELAY_SECONDS}}"
- name: SRIOV_FEC_MANAGER_LIVENESS_PERIOD_SECONDS
value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_PERIOD_SECONDS}}"
- name: SRIOV_FEC_MANAGER_LIVENESS_FAILURE_THRESHOLD
value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_FAILURE_THRESHOLD}}"
- name: SRIOV_FEC_MANAGER_LIVENESS_SUCCESS_THRESHOLD
value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_SUCCESS_THRESHOLD}}"
- name: SRIOV_FEC_MANAGER_LIVENESS_TIMEOUT_SECONDS
value: "{{.Values.env.SRIOV_FEC_MANAGER_LIVENESS_TIMEOUT_SECONDS}}"
- name: SRIOV_FEC_MANAGER_READINESS_INITIAL_DELAY_SECONDS
value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_INITIAL_DELAY_SECONDS}}"
- name: SRIOV_FEC_MANAGER_READINESS_PERIOD_SECONDS
value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_PERIOD_SECONDS}}"
- name: SRIOV_FEC_MANAGER_READINESS_FAILURE_THRESHOLD
value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_FAILURE_THRESHOLD}}"
- name: SRIOV_FEC_MANAGER_READINESS_SUCCESS_THRESHOLD
value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_SUCCESS_THRESHOLD}}"
- name: SRIOV_FEC_MANAGER_READINESS_TIMEOUT_SECONDS
value: "{{.Values.env.SRIOV_FEC_MANAGER_READINESS_TIMEOUT_SECONDS}}"
- name: SRIOV_FEC_NAMESPACE - name: SRIOV_FEC_NAMESPACE
valueFrom: valueFrom:
fieldRef: fieldRef:
@ -61,8 +103,11 @@ spec:
httpGet: httpGet:
path: /healthz path: /healthz
port: 8081 port: 8081
initialDelaySeconds: 15 initialDelaySeconds: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_INITIAL_DELAY_SECONDS }}
periodSeconds: 20 periodSeconds: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_PERIOD_SECONDS }}
failureThreshold: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_FAILURE_THRESHOLD }}
successThreshold: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_SUCCESS_THRESHOLD }}
timeoutSeconds: {{ .Values.env.SRIOV_FEC_MANAGER_LIVENESS_TIMEOUT_SECONDS }}
image: "{{ .Values.manager.image.repository}}:{{ .Values.manager.image.tag}}" image: "{{ .Values.manager.image.repository}}:{{ .Values.manager.image.tag}}"
name: manager name: manager
ports: ports:
@ -73,8 +118,11 @@ spec:
httpGet: httpGet:
path: /readyz path: /readyz
port: 8081 port: 8081
initialDelaySeconds: 5 initialDelaySeconds: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_INITIAL_DELAY_SECONDS }}
periodSeconds: 10 periodSeconds: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_PERIOD_SECONDS }}
failureThreshold: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_FAILURE_THRESHOLD }}
successThreshold: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_SUCCESS_THRESHOLD }}
timeoutSeconds: {{ .Values.env.SRIOV_FEC_MANAGER_READINESS_TIMEOUT_SECONDS }}
resources: resources:
limits: limits:
cpu: 200m cpu: 200m

View File

@ -21,6 +21,27 @@ env:
SRIOV_FEC_VFIO_TOKEN: "" SRIOV_FEC_VFIO_TOKEN: ""
SRIOV_FEC_IMAGE_PULL_SECRET: "default-registry-key" SRIOV_FEC_IMAGE_PULL_SECRET: "default-registry-key"
SRIOV_FEC_DAEMON_IMAGE: "registry.local:9001/docker.io/starlingx/sriov-fec-daemon:stx.10.0-v2.9.0" SRIOV_FEC_DAEMON_IMAGE: "registry.local:9001/docker.io/starlingx/sriov-fec-daemon:stx.10.0-v2.9.0"
SRIOV_FEC_DAEMON_LIVENESS_INITIAL_DELAY_SECONDS: "15"
SRIOV_FEC_DAEMON_LIVENESS_PERIOD_SECONDS: "20"
SRIOV_FEC_DAEMON_LIVENESS_FAILURE_THRESHOLD: "3"
SRIOV_FEC_DAEMON_LIVENESS_SUCCESS_THRESHOLD: "1"
SRIOV_FEC_DAEMON_LIVENESS_TIMEOUT_SECONDS: "5"
SRIOV_FEC_DAEMON_READINESS_INITIAL_DELAY_SECONDS: "5"
SRIOV_FEC_DAEMON_READINESS_PERIOD_SECONDS: "10"
SRIOV_FEC_DAEMON_READINESS_FAILURE_THRESHOLD: "3"
SRIOV_FEC_DAEMON_READINESS_SUCCESS_THRESHOLD: "1"
SRIOV_FEC_DAEMON_READINESS_TIMEOUT_SECONDS: "5"
SRIOV_FEC_MANAGER_LIVENESS_INITIAL_DELAY_SECONDS: "15"
SRIOV_FEC_MANAGER_LIVENESS_PERIOD_SECONDS: "20"
SRIOV_FEC_MANAGER_LIVENESS_FAILURE_THRESHOLD: "3"
SRIOV_FEC_MANAGER_LIVENESS_SUCCESS_THRESHOLD: "1"
SRIOV_FEC_MANAGER_LIVENESS_TIMEOUT_SECONDS: "5"
SRIOV_FEC_MANAGER_READINESS_INITIAL_DELAY_SECONDS: "5"
SRIOV_FEC_MANAGER_READINESS_PERIOD_SECONDS: "10"
SRIOV_FEC_MANAGER_READINESS_FAILURE_THRESHOLD: "3"
SRIOV_FEC_MANAGER_READINESS_SUCCESS_THRESHOLD: "1"
SRIOV_FEC_MANAGER_READINESS_TIMEOUT_SECONDS: "5"
SRIOV_FEC_METRIC_GATHER_INTERVAL: "15s"
SRIOV_FEC_LABELER_IMAGE: "registry.local:9001/docker.io/starlingx/sriov-fec-labeler:stx.10.0-v2.9.0" SRIOV_FEC_LABELER_IMAGE: "registry.local:9001/docker.io/starlingx/sriov-fec-labeler:stx.10.0-v2.9.0"
SRIOV_FEC_NETWORK_DEVICE_PLUGIN_IMAGE: "registry.local:9001/ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin:v3.6.2" SRIOV_FEC_NETWORK_DEVICE_PLUGIN_IMAGE: "registry.local:9001/ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin:v3.6.2"
KUBE_RBAC_PROXY_IMAGE: "registry.local:9001/gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0" KUBE_RBAC_PROXY_IMAGE: "registry.local:9001/gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0"

View File

@ -3,7 +3,8 @@ LABEL=sriov-fec-daemon
SOURCE_REPO=https://github.com/intel/sriov-fec-operator.git SOURCE_REPO=https://github.com/intel/sriov-fec-operator.git
SOURCE_REF=471f355304105ecf7f558dd5dab7a7123064e4de SOURCE_REF=471f355304105ecf7f558dd5dab7a7123064e4de
SOURCE_PATCHES="../files/0001-Add-image-pull-secrets-for-stx-builds.patch \ SOURCE_PATCHES="../files/0001-Add-image-pull-secrets-for-stx-builds.patch \
../files/0002-kmod-version-fix.patch" ../files/0002-kmod-version-fix.patch \
../files/0003-Customization-for-readiness-and-liveness-values.patch"
COMMAND=bash COMMAND=bash
SCRIPT=build-sriov-fec-operator-image.sh SCRIPT=build-sriov-fec-operator-image.sh
ARGS=daemon ARGS=daemon

View File

@ -3,7 +3,8 @@ LABEL=sriov-fec-labeler
SOURCE_REPO=https://github.com/intel/sriov-fec-operator.git SOURCE_REPO=https://github.com/intel/sriov-fec-operator.git
SOURCE_REF=471f355304105ecf7f558dd5dab7a7123064e4de SOURCE_REF=471f355304105ecf7f558dd5dab7a7123064e4de
SOURCE_PATCHES="../files/0001-Add-image-pull-secrets-for-stx-builds.patch \ SOURCE_PATCHES="../files/0001-Add-image-pull-secrets-for-stx-builds.patch \
../files/0002-kmod-version-fix.patch" ../files/0002-kmod-version-fix.patch \
../files/0003-Customization-for-readiness-and-liveness-values.patch"
COMMAND=bash COMMAND=bash
SCRIPT=build-sriov-fec-operator-image.sh SCRIPT=build-sriov-fec-operator-image.sh
ARGS=labeler ARGS=labeler

View File

@ -3,7 +3,8 @@ LABEL=sriov-fec-operator
SOURCE_REPO=https://github.com/intel/sriov-fec-operator.git SOURCE_REPO=https://github.com/intel/sriov-fec-operator.git
SOURCE_REF=471f355304105ecf7f558dd5dab7a7123064e4de SOURCE_REF=471f355304105ecf7f558dd5dab7a7123064e4de
SOURCE_PATCHES="../files/0001-Add-image-pull-secrets-for-stx-builds.patch \ SOURCE_PATCHES="../files/0001-Add-image-pull-secrets-for-stx-builds.patch \
../files/0002-kmod-version-fix.patch" ../files/0002-kmod-version-fix.patch \
../files/0003-Customization-for-readiness-and-liveness-values.patch"
COMMAND=bash COMMAND=bash
SCRIPT=build-sriov-fec-operator-image.sh SCRIPT=build-sriov-fec-operator-image.sh
ARGS=operator ARGS=operator

View File

@ -0,0 +1,51 @@
From 6a143b24b9cf4626f1dde53bab9876d9b1823515 Mon Sep 17 00:00:00 2001
From: Thiago Miranda <tmarques@windriver.com>
Date: Tue, 12 Nov 2024 15:02:30 -0300
Subject: [PATCH] Customization for readiness and liveness values
Signed-off-by: Thiago Antonio Miranda <tmarques@windriver.com>
---
assets/300-daemon.yaml | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/assets/300-daemon.yaml b/assets/300-daemon.yaml
index 03dc090..74f6967 100644
--- a/assets/300-daemon.yaml
+++ b/assets/300-daemon.yaml
@@ -196,14 +196,20 @@ data:
httpGet:
path: /healthz
port: 8081
- initialDelaySeconds: 15
- periodSeconds: 20
+ initialDelaySeconds: {{ .SRIOV_FEC_DAEMON_LIVENESS_INITIAL_DELAY_SECONDS }}
+ periodSeconds: {{ .SRIOV_FEC_DAEMON_LIVENESS_PERIOD_SECONDS }}
+ failureThreshold: {{ .SRIOV_FEC_DAEMON_LIVENESS_FAILURE_THRESHOLD }}
+ successThreshold: {{ .SRIOV_FEC_DAEMON_LIVENESS_SUCCESS_THRESHOLD }}
+ timeoutSeconds: {{ .SRIOV_FEC_DAEMON_LIVENESS_TIMEOUT_SECONDS }}
readinessProbe:
httpGet:
path: /readyz
port: 8081
- initialDelaySeconds: 5
- periodSeconds: 10
+ initialDelaySeconds: {{ .SRIOV_FEC_DAEMON_READINESS_INITIAL_DELAY_SECONDS }}
+ periodSeconds: {{ .SRIOV_FEC_DAEMON_READINESS_PERIOD_SECONDS }}
+ failureThreshold: {{ .SRIOV_FEC_DAEMON_READINESS_FAILURE_THRESHOLD }}
+ successThreshold: {{ .SRIOV_FEC_DAEMON_READINESS_SUCCESS_THRESHOLD }}
+ timeoutSeconds: {{ .SRIOV_FEC_DAEMON_READINESS_TIMEOUT_SECONDS }}
ports:
- containerPort: 8080
name: bbdevconfig
@@ -245,6 +251,8 @@ data:
value: "90"
- name: LEASE_DURATION_SECONDS
value: "600"
+ - name: SRIOV_FEC_METRIC_GATHER_INTERVAL
+ value: {{ .SRIOV_FEC_METRIC_GATHER_INTERVAL }}
securityContext:
readOnlyRootFilesystem: true
privileged: true
--
2.34.1