[nova] Unhardcode readiness/liveness probe timings

This patch adds ability to unhardcode readiness/
liveness probes timings. Moreover it introduces
RPC_PROBE_TIMEOUT and RPC_PROBE_RETRIES variables
which are passed to health probe script and
allow to unhardcode RPCtest  timeout and number of
retries

Change-Id: I2498a14e97557feafbd45c8df3c683f8500026e6
This commit is contained in:
Oleksii Grudev 2020-02-10 15:57:14 +02:00
parent 4a5374aebd
commit af4e2aaadd
11 changed files with 300 additions and 150 deletions

View File

@ -45,6 +45,8 @@ from oslo_context import context
from oslo_log import log
import oslo_messaging
rpc_timeout = int(os.getenv('RPC_PROBE_TIMEOUT', '60'))
rpc_retries = int(os.getenv('RPC_PROBE_RETRIES', '2'))
tcp_established = "ESTABLISHED"
@ -66,8 +68,8 @@ def check_service_status(transport):
namespace='baseapi',
version="1.1")
client = oslo_messaging.RPCClient(transport, target,
timeout=60,
retry=2)
timeout=rpc_timeout,
retry=rpc_retries)
client.call(context.RequestContext(),
'ping',
arg=None)

View File

@ -14,6 +14,35 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- define "novaComputeLivenessProbeTemplate" }}
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- compute
- --liveness-probe
{{- if .Values.pod.use_fqdn.compute }}
- --use-fqdn
{{- end }}
{{- end }}
{{- define "novaComputeReadinessProbeTemplate" }}
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- compute
{{- if .Values.pod.use_fqdn.compute }}
- --use-fqdn
{{- end }}
{{- end }}
{{- define "nova.compute.daemonset" }}
{{- $daemonset := index . 0 }}
{{- $configMapName := index . 1 }}
@ -185,8 +214,8 @@ spec:
{{ dict "envAll" $envAll "application" "nova" "container" "nova_compute" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
securityContext:
privileged: true
{{- if .Values.conf.ceph.enabled }}
env:
{{- if .Values.conf.ceph.enabled }}
- name: CEPH_CINDER_USER
value: "{{ .Values.conf.ceph.cinder.user }}"
{{- if .Values.conf.ceph.cinder.keyring }}
@ -196,37 +225,12 @@ spec:
- name: LIBVIRT_CEPH_SECRET_UUID
value: "{{ .Values.conf.ceph.secret_uuid }}"
{{ end }}
readinessProbe:
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- compute
{{- if .Values.pod.use_fqdn.compute }}
- --use-fqdn
{{- end }}
initialDelaySeconds: 80
periodSeconds: 190
timeoutSeconds: 185
livenessProbe:
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- compute
- --liveness-probe
{{- if .Values.pod.use_fqdn.compute }}
- --use-fqdn
{{- end }}
initialDelaySeconds: 120
periodSeconds: 600
timeoutSeconds: 580
- name: RPC_PROBE_TIMEOUT
value: "{{ .Values.pod.probes.rpc_timeout }}"
- name: RPC_PROBE_RETRIES
value: "{{ .Values.pod.probes.rpc_retries }}"
{{ dict "envAll" $envAll "component" "compute" "container" "default" "type" "liveness" "probeTemplate" (include "novaComputeLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
{{ dict "envAll" $envAll "component" "compute" "container" "default" "type" "readiness" "probeTemplate" (include "novaComputeReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
command:
- /tmp/nova-compute.sh
terminationMessagePath: /var/log/termination-log

View File

@ -14,6 +14,16 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- define "novaApiMetadataLivenessProbeTemplate" }}
tcpSocket:
port: {{ .Values.network.metadata.port }}
{{- end }}
{{- define "novaApiMetadataReadinessProbeTemplate" }}
tcpSocket:
port: {{ .Values.network.metadata.port }}
{{- end }}
{{- if .Values.manifests.deployment_api_metadata }}
{{- $envAll := . }}
@ -91,13 +101,8 @@ spec:
- stop
ports:
- containerPort: {{ .Values.network.metadata.port }}
readinessProbe:
tcpSocket:
port: {{ .Values.network.metadata.port }}
livenessProbe:
tcpSocket:
port: {{ .Values.network.metadata.port }}
initialDelaySeconds: 30
{{ dict "envAll" $envAll "component" "api-metadata" "container" "default" "type" "liveness" "probeTemplate" (include "novaApiMetadataLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
{{ dict "envAll" $envAll "component" "api-metadata" "container" "default" "type" "readiness" "probeTemplate" (include "novaApiMetadataReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
volumeMounts:
- name: pod-tmp
mountPath: /tmp

View File

@ -14,6 +14,16 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- define "novaApiOsapiLivenessProbeTemplate" }}
tcpSocket:
port: {{ tuple "compute" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
{{- end }}
{{- define "novaApiOsapiReadinessProbeTemplate" }}
tcpSocket:
port: {{ tuple "compute" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
{{- end }}
{{- if .Values.manifests.deployment_api_osapi }}
{{- $envAll := . }}
@ -73,13 +83,8 @@ spec:
ports:
- name: n-api
containerPort: {{ tuple "compute" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
readinessProbe:
tcpSocket:
port: {{ tuple "compute" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
livenessProbe:
tcpSocket:
port: {{ tuple "compute" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
initialDelaySeconds: 30
{{ dict "envAll" $envAll "component" "api-osapi" "container" "default" "type" "liveness" "probeTemplate" (include "novaApiOsapiLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
{{ dict "envAll" $envAll "component" "api-osapi" "container" "default" "type" "readiness" "probeTemplate" (include "novaApiOsapiReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
volumeMounts:
- name: pod-tmp
mountPath: /tmp

View File

@ -14,6 +14,29 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- define "novaConductorLivenessProbeTemplate" }}
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- conductor
- --liveness-probe
{{- end }}
{{- define "novaConductorReadinessProbeTemplate" }}
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- conductor
{{- end }}
{{- if .Values.manifests.deployment_conductor }}
{{- $envAll := . }}
@ -60,31 +83,13 @@ spec:
{{ tuple $envAll "nova_conductor" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.conductor | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
{{ dict "envAll" $envAll "application" "nova" "container" "nova_conductor" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
readinessProbe:
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- conductor
initialDelaySeconds: 80
periodSeconds: 190
timeoutSeconds: 185
livenessProbe:
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- conductor
- --liveness-probe
initialDelaySeconds: 120
periodSeconds: 600
timeoutSeconds: 580
{{ dict "envAll" $envAll "component" "conductor" "container" "default" "type" "liveness" "probeTemplate" (include "novaConductorLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
{{ dict "envAll" $envAll "component" "conductor" "container" "default" "type" "readiness" "probeTemplate" (include "novaConductorReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
env:
- name: RPC_PROBE_TIMEOUT
value: "{{ .Values.pod.probes.rpc_timeout }}"
- name: RPC_PROBE_RETRIES
value: "{{ .Values.pod.probes.rpc_retries }}"
command:
- /tmp/nova-conductor.sh
volumeMounts:

View File

@ -14,6 +14,29 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- define "novaConsoleauthLivenessProbeTemplate" }}
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- consoleauth
- --liveness-probe
{{- end }}
{{- define "novaConsoleauthReadinessProbeTemplate" }}
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- consoleauth
{{- end }}
{{- if .Values.manifests.deployment_consoleauth }}
{{- $envAll := . }}
@ -60,31 +83,13 @@ spec:
{{ tuple $envAll "nova_consoleauth" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.consoleauth | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
{{ dict "envAll" $envAll "application" "nova" "container" "nova_consoleauth" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
readinessProbe:
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- consoleauth
initialDelaySeconds: 80
periodSeconds: 190
timeoutSeconds: 185
livenessProbe:
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- consoleauth
- --liveness-probe
initialDelaySeconds: 120
periodSeconds: 600
timeoutSeconds: 580
{{ dict "envAll" $envAll "component" "consoleauth" "container" "default" "type" "liveness" "probeTemplate" (include "novaConsoleauthLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
{{ dict "envAll" $envAll "component" "consoleauth" "container" "default" "type" "readiness" "probeTemplate" (include "novaConsoleauthReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
env:
- name: RPC_PROBE_TIMEOUT
value: "{{ .Values.pod.probes.rpc_timeout }}"
- name: RPC_PROBE_RETRIES
value: "{{ .Values.pod.probes.rpc_retries }}"
command:
- /tmp/nova-consoleauth.sh
volumeMounts:

View File

@ -14,6 +14,16 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- define "novaNovncproxyLivenessProbeTemplate" }}
tcpSocket:
port: {{ tuple "compute_novnc_proxy" "internal" "novnc_proxy" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
{{- end }}
{{- define "novaNovncproxyReadinessProbeTemplate" }}
tcpSocket:
port: {{ tuple "compute_novnc_proxy" "internal" "novnc_proxy" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
{{- end }}
{{- if and .Values.manifests.deployment_novncproxy ( eq .Values.console.console_kind "novnc" )}}
{{- $envAll := . }}
@ -104,14 +114,8 @@ spec:
{{ tuple $envAll "nova_novncproxy" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.novncproxy | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
{{ dict "envAll" $envAll "application" "nova" "container" "nova_novncproxy" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
readinessProbe:
tcpSocket:
port: {{ tuple "compute_novnc_proxy" "internal" "novnc_proxy" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
initialDelaySeconds: 30
livenessProbe:
tcpSocket:
port: {{ tuple "compute_novnc_proxy" "internal" "novnc_proxy" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
initialDelaySeconds: 30
{{ dict "envAll" $envAll "component" "novncproxy" "container" "default" "type" "liveness" "probeTemplate" (include "novaNovncproxyLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
{{ dict "envAll" $envAll "component" "novncproxy" "container" "default" "type" "readiness" "probeTemplate" (include "novaNovncproxyReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
command:
- /tmp/nova-console-proxy.sh
ports:

View File

@ -14,6 +14,17 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- define "novaPlacementLivenessProbeTemplate" }}
tcpSocket:
port: {{ tuple "placement" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
{{- end }}
{{- define "novaPlacementReadinessProbeTemplate" }}
#NOTE(portdirect): use tcpSocket check as HTTP will return 401
tcpSocket:
port: {{ tuple "placement" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
{{- end }}
{{- if .Values.manifests.deployment_placement }}
{{- $envAll := . }}
@ -72,17 +83,8 @@ spec:
ports:
- name: p-api
containerPort: {{ tuple "placement" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
readinessProbe:
#NOTE(portdirect): use tcpSocket check as HTTP will return 401
tcpSocket:
port: {{ tuple "placement" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
initialDelaySeconds: 15
periodSeconds: 10
livenessProbe:
tcpSocket:
port: {{ tuple "placement" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
initialDelaySeconds: 50
periodSeconds: 10
{{ dict "envAll" $envAll "component" "placement" "container" "default" "type" "liveness" "probeTemplate" (include "novaPlacementLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
{{ dict "envAll" $envAll "component" "placement" "container" "default" "type" "readiness" "probeTemplate" (include "novaPlacementReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
volumeMounts:
- name: pod-tmp
mountPath: /tmp

View File

@ -14,6 +14,29 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- define "novaSchedulerLivenessProbeTemplate" }}
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- scheduler
- --liveness-probe
{{- end }}
{{- define "novaSchedulerReadinessProbeTemplate" }}
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- scheduler
{{- end }}
{{- if .Values.manifests.deployment_scheduler }}
{{- $envAll := . }}
@ -60,31 +83,13 @@ spec:
{{ tuple $envAll "nova_scheduler" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.scheduler | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
{{ dict "envAll" $envAll "application" "nova" "container" "nova_scheduler" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
readinessProbe:
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- scheduler
initialDelaySeconds: 80
periodSeconds: 190
timeoutSeconds: 185
livenessProbe:
exec:
command:
- python
- /tmp/health-probe.py
- --config-file
- /etc/nova/nova.conf
- --service-queue-name
- scheduler
- --liveness-probe
initialDelaySeconds: 120
periodSeconds: 600
timeoutSeconds: 580
{{ dict "envAll" $envAll "component" "scheduler" "container" "default" "type" "liveness" "probeTemplate" (include "novaSchedulerLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
{{ dict "envAll" $envAll "component" "scheduler" "container" "default" "type" "readiness" "probeTemplate" (include "novaSchedulerReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
env:
- name: RPC_PROBE_TIMEOUT
value: "{{ .Values.pod.probes.rpc_timeout }}"
- name: RPC_PROBE_RETRIES
value: "{{ .Values.pod.probes.rpc_retries }}"
command:
- /tmp/nova-scheduler.sh
volumeMounts:

View File

@ -14,6 +14,16 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- define "novaSpiceproxyLivenessProbeTemplate" }}
tcpSocket:
port: {{ tuple "compute_spice_proxy" "internal" "spice_proxy" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
{{- end }}
{{- define "novaSpiceproxyReadynessProbeTemplate" }}
tcpSocket:
port: {{ tuple "compute_spice_proxy" "internal" "spice_proxy" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
{{- end }}
{{- if and .Values.manifests.deployment_spiceproxy ( eq .Values.console.console_kind "spice" )}}
{{- $envAll := . }}
@ -101,14 +111,8 @@ spec:
{{ tuple $envAll "nova_spiceproxy" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.spiceproxy | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
{{ dict "envAll" $envAll "application" "nova" "container" "nova_spiceproxy" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
readinessProbe:
tcpSocket:
port: {{ tuple "compute_spice_proxy" "internal" "spice_proxy" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
initialDelaySeconds: 30
livenessProbe:
tcpSocket:
port: {{ tuple "compute_spice_proxy" "internal" "spice_proxy" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
initialDelaySeconds: 30
{{ dict "envAll" $envAll "component" "compute-spice-proxy" "container" "default" "type" "liveness" "probeTemplate" (include "novaSpiceproxyLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
{{ dict "envAll" $envAll "component" "compute-spice-proxy" "container" "default" "type" "readiness" "probeTemplate" (include "novaSpiceproxyReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }}
command:
- /tmp/nova-console-proxy.sh
ports:

View File

@ -2217,6 +2217,115 @@ endpoints:
default: 80
pod:
probes:
rpc_timeout: 60
rpc_retries: 2
compute:
default:
liveness:
enabled: True
params:
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
readiness:
enabled: True
params:
initialDelaySeconds: 80
periodSeconds: 90
timeoutSeconds: 70
api-metadata:
default:
liveness:
enabled: True
params:
initialDelaySeconds: 30
readiness:
enabled: True
params:
api-osapi:
default:
liveness:
enabled: True
params:
initialDelaySeconds: 30
readiness:
enabled: True
params:
conductor:
default:
liveness:
enabled: True
params:
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
readiness:
enabled: True
params:
initialDelaySeconds: 80
periodSeconds: 90
timeoutSeconds: 70
consoleauth:
default:
liveness:
enabled: True
params:
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
readiness:
enabled: True
params:
initialDelaySeconds: 80
periodSeconds: 90
timeoutSeconds: 70
novncproxy:
default:
liveness:
enabled: True
params:
initialDelaySeconds: 30
readiness:
enabled: True
params:
initialDelaySeconds: 30
placement:
default:
liveness:
enabled: True
params:
initialDelaySeconds: 50
periodSeconds: 10
readiness:
enabled: True
params:
initialDelaySeconds: 15
periodSeconds: 10
scheduler:
default:
liveness:
enabled: True
params:
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
readiness:
enabled: True
params:
initialDelaySeconds: 80
periodSeconds: 90
timeoutSeconds: 70
compute-spice-proxy:
default:
liveness:
enabled: True
params:
initialDelaySeconds: 30
readiness:
enabled: True
params:
initialDelaySeconds: 30
user:
nova:
uid: 42424