kube-proxy: use HTTP probes instead of exec
The existing liveness and readiness probes for kube-proxy are in need of adjustment. The current implementation is exec-based, which can be a resource concern, and is tied heavily to iptables, so is incompatible with ipvs. This change removes the exec-based liveness and readiness probes from the kube-proxy daemonset, and replaces them with HTTP probes of the healthz endpoint, following the direction that kubernetes seems to be taking.[0][1] The values.yaml interface to enable and disable the probes and set various parameters is also modified to use the helm-toolkit standard snippet.[2] Notably, the settings previously configurable under livenessProbe.config are now under pod.probes.proxy.proxy.liveness.params. 0: https://github.com/kubernetes/kubernetes/issues/81630 1: https://github.com/kubernetes/kubernetes/pull/75323 2: https://opendev.org/openstack/openstack-helm-infra/src/branch/master/helm-toolkit/templates/snippets/_kubernetes_probes.tpl Change-Id: I99ccbc2270a1f8a204417aa410868d04788dc60f
This commit is contained in:
parent
a75a54569c
commit
354deab382
|
@ -1,49 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -eu
|
||||
|
||||
IPTS_DIR=/tmp/liveness
|
||||
|
||||
FAILURE=0
|
||||
{{- if .Values.livenessProbe.whitelist }}
|
||||
WHITELIST='({{- join "|" .Values.livenessProbe.whitelist -}})'
|
||||
{{- end }}
|
||||
|
||||
REQUEST='GET /healthz HTTP/1.0\r\nHost: localhost:10256\r\n'
|
||||
|
||||
if [[ $(echo -e "${REQUEST}" | socat - TCP4:localhost:10256 | grep -sc '200 OK') -lt 1 ]]; then
|
||||
echo Failed proxy built-in HTTP health check.
|
||||
echo -e "${REQUEST}" | socat - TCP4:localhost:10256
|
||||
FAILURE=1
|
||||
fi
|
||||
|
||||
mkdir -p "${IPTS_DIR}"
|
||||
iptables-save {{- if .Values.livenessProbe.whitelist }} | grep -Ev "${WHITELIST}" {{- end }} | grep -s 'has no endpoints' | sort > "${IPTS_DIR}/current"
|
||||
|
||||
if [[ $(wc -l < "${IPTS_DIR}/current") -gt 0 ]]; then
|
||||
if [[ "${IPTS_DIR}/previous" ]]; then
|
||||
if cmp "${IPTS_DIR}/current" "${IPTS_DIR}/previous"; then
|
||||
echo Some non-whitelisted services have no endpoints:
|
||||
cat "${IPTS_DIR}/current"
|
||||
FAILURE=1
|
||||
else
|
||||
echo Detected issues have changed. Passing check:
|
||||
diff "${IPTS_DIR}/previous" "${IPTS_DIR}/current"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
mv "${IPTS_DIR}/current" "${IPTS_DIR}/previous"
|
||||
|
||||
IPTABLES_IPS=$(iptables-save | grep -E 'KUBE-SEP.*to-destination' | sed 's/.*to-destination \(.*\):.*/\1/' | sort -u)
|
||||
KUBECTL_IPS=$(kubectl get --all-namespaces -o json endpoints | jq -r '.items | arrays | .[] | objects | .subsets | arrays | .[] | objects | .addresses | arrays | .[] | objects | .ip' | sort -u)
|
||||
|
||||
if [[ $(comm -23 <(echo "${IPTABLES_IPS}") <(echo "${KUBECTL_IPS}")) ]]; then
|
||||
FAILURE=1
|
||||
echo "Found non-current Pod IPs in iptables rules:"
|
||||
comm -23 <(echo "${IPTABLES_IPS}") <(echo "${KUBECTL_IPS}")
|
||||
fi
|
||||
|
||||
if [[ "${FAILURE}" == "1" ]]; then
|
||||
exit 1
|
||||
fi
|
|
@ -1,5 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
iptables-save | grep 'default/kubernetes:https'
|
|
@ -1,26 +0,0 @@
|
|||
{{/*
|
||||
# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License. */}}
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: kubernetes-proxy-bin
|
||||
data:
|
||||
liveness-probe.sh: |
|
||||
{{ tuple "bin/_liveness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
readiness-probe.sh: |
|
||||
{{ tuple "bin/_readiness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
...
|
|
@ -14,6 +14,13 @@ See the License for the specific language governing permissions and
|
|||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
{{- define "probeTemplate" }}
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10256
|
||||
scheme: HTTP
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.manifests.daemonset_proxy }}
|
||||
{{- $envAll := . }}
|
||||
{{- $labels := tuple $envAll "kubernetes" "proxy" | include "helm-toolkit.snippets.kubernetes_metadata_labels" -}}
|
||||
|
@ -75,20 +82,9 @@ spec:
|
|||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
livenessProbe:
|
||||
{{ toYaml .Values.livenessProbe.config | indent 10 }}
|
||||
exec:
|
||||
command:
|
||||
- /tmp/bin/liveness-probe.sh
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- /tmp/bin/readiness-probe.sh
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 15
|
||||
{{ dict "envAll" . "component" "proxy" "container" "proxy" "type" "liveness" "probeTemplate" (include "probeTemplate" . | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | trim | indent 8 }}
|
||||
{{ dict "envAll" . "component" "proxy" "container" "proxy" "type" "readiness" "probeTemplate" (include "probeTemplate" . | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | trim | indent 8 }}
|
||||
volumeMounts:
|
||||
- name: bin
|
||||
mountPath: /tmp/bin/
|
||||
- mountPath: /run/xtables.lock
|
||||
name: xtables-lock
|
||||
readOnly: false
|
||||
|
@ -97,10 +93,6 @@ spec:
|
|||
readOnly: true
|
||||
serviceAccountName: kube-proxy
|
||||
volumes:
|
||||
- name: bin
|
||||
configMap:
|
||||
name: kubernetes-proxy-bin
|
||||
defaultMode: 0555
|
||||
- name: xtables-lock
|
||||
hostPath:
|
||||
path: /run/xtables.lock
|
||||
|
|
|
@ -47,6 +47,23 @@ pod:
|
|||
limits:
|
||||
memory: "1024Mi"
|
||||
cpu: "2000m"
|
||||
probes:
|
||||
proxy:
|
||||
proxy:
|
||||
liveness:
|
||||
enabled: true
|
||||
params:
|
||||
initialDelaySeconds: 15
|
||||
timeoutSeconds: 15
|
||||
successThreshold: 1
|
||||
failureThreshold: 2
|
||||
readiness:
|
||||
enabled: true
|
||||
params:
|
||||
initialDelaySeconds: 15
|
||||
timeoutSeconds: 15
|
||||
successThreshold: 1
|
||||
failureThreshold: 2
|
||||
|
||||
images:
|
||||
tags:
|
||||
|
@ -72,17 +89,3 @@ network:
|
|||
kube_service:
|
||||
host: 127.0.0.1
|
||||
port: 6553
|
||||
|
||||
livenessProbe:
|
||||
config:
|
||||
# NOTE(mark-burnett): To avoid cascading failure modes, it is
|
||||
# important that these values are configured to avoid the possibility
|
||||
# of CrashLoopBackoff for this pod. Otherwise, a small non-impacting
|
||||
# issue could disable kube-proxy for the entire site.
|
||||
failureThreshold: 10
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 35
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 10
|
||||
whitelist:
|
||||
# - postgres
|
||||
|
|
Loading…
Reference in New Issue