kube-proxy: use HTTP probes instead of exec
The existing liveness and readiness probes for kube-proxy are in need of adjustment. The current implementation is exec-based, which can be a resource concern, and is tied heavily to iptables, so is incompatible with ipvs. This change removes the exec-based liveness and readiness probes from the kube-proxy daemonset, and replaces them with HTTP probes of the healthz endpoint, following the direction that kubernetes seems to be taking.[0][1] The values.yaml interface to enable and disable the probes and set various parameters is also modified to use the helm-toolkit standard snippet.[2] Notably, the settings previously configurable under livenessProbe.config are now under pod.probes.proxy.proxy.liveness.params. 0: https://github.com/kubernetes/kubernetes/issues/81630 1: https://github.com/kubernetes/kubernetes/pull/75323 2: https://opendev.org/openstack/openstack-helm-infra/src/branch/master/helm-toolkit/templates/snippets/_kubernetes_probes.tpl Change-Id: I99ccbc2270a1f8a204417aa410868d04788dc60f
This commit is contained in:
parent
a75a54569c
commit
354deab382
|
@ -1,49 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eu
|
|
||||||
|
|
||||||
IPTS_DIR=/tmp/liveness
|
|
||||||
|
|
||||||
FAILURE=0
|
|
||||||
{{- if .Values.livenessProbe.whitelist }}
|
|
||||||
WHITELIST='({{- join "|" .Values.livenessProbe.whitelist -}})'
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
REQUEST='GET /healthz HTTP/1.0\r\nHost: localhost:10256\r\n'
|
|
||||||
|
|
||||||
if [[ $(echo -e "${REQUEST}" | socat - TCP4:localhost:10256 | grep -sc '200 OK') -lt 1 ]]; then
|
|
||||||
echo Failed proxy built-in HTTP health check.
|
|
||||||
echo -e "${REQUEST}" | socat - TCP4:localhost:10256
|
|
||||||
FAILURE=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
mkdir -p "${IPTS_DIR}"
|
|
||||||
iptables-save {{- if .Values.livenessProbe.whitelist }} | grep -Ev "${WHITELIST}" {{- end }} | grep -s 'has no endpoints' | sort > "${IPTS_DIR}/current"
|
|
||||||
|
|
||||||
if [[ $(wc -l < "${IPTS_DIR}/current") -gt 0 ]]; then
|
|
||||||
if [[ "${IPTS_DIR}/previous" ]]; then
|
|
||||||
if cmp "${IPTS_DIR}/current" "${IPTS_DIR}/previous"; then
|
|
||||||
echo Some non-whitelisted services have no endpoints:
|
|
||||||
cat "${IPTS_DIR}/current"
|
|
||||||
FAILURE=1
|
|
||||||
else
|
|
||||||
echo Detected issues have changed. Passing check:
|
|
||||||
diff "${IPTS_DIR}/previous" "${IPTS_DIR}/current"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
mv "${IPTS_DIR}/current" "${IPTS_DIR}/previous"
|
|
||||||
|
|
||||||
IPTABLES_IPS=$(iptables-save | grep -E 'KUBE-SEP.*to-destination' | sed 's/.*to-destination \(.*\):.*/\1/' | sort -u)
|
|
||||||
KUBECTL_IPS=$(kubectl get --all-namespaces -o json endpoints | jq -r '.items | arrays | .[] | objects | .subsets | arrays | .[] | objects | .addresses | arrays | .[] | objects | .ip' | sort -u)
|
|
||||||
|
|
||||||
if [[ $(comm -23 <(echo "${IPTABLES_IPS}") <(echo "${KUBECTL_IPS}")) ]]; then
|
|
||||||
FAILURE=1
|
|
||||||
echo "Found non-current Pod IPs in iptables rules:"
|
|
||||||
comm -23 <(echo "${IPTABLES_IPS}") <(echo "${KUBECTL_IPS}")
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "${FAILURE}" == "1" ]]; then
|
|
||||||
exit 1
|
|
||||||
fi
|
|
|
@ -1,5 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
iptables-save | grep 'default/kubernetes:https'
|
|
|
@ -1,26 +0,0 @@
|
||||||
{{/*
|
|
||||||
# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License. */}}
|
|
||||||
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: kubernetes-proxy-bin
|
|
||||||
data:
|
|
||||||
liveness-probe.sh: |
|
|
||||||
{{ tuple "bin/_liveness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
|
||||||
readiness-probe.sh: |
|
|
||||||
{{ tuple "bin/_readiness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
|
||||||
...
|
|
|
@ -14,6 +14,13 @@ See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
*/}}
|
*/}}
|
||||||
|
|
||||||
|
{{- define "probeTemplate" }}
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 10256
|
||||||
|
scheme: HTTP
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
{{- if .Values.manifests.daemonset_proxy }}
|
{{- if .Values.manifests.daemonset_proxy }}
|
||||||
{{- $envAll := . }}
|
{{- $envAll := . }}
|
||||||
{{- $labels := tuple $envAll "kubernetes" "proxy" | include "helm-toolkit.snippets.kubernetes_metadata_labels" -}}
|
{{- $labels := tuple $envAll "kubernetes" "proxy" | include "helm-toolkit.snippets.kubernetes_metadata_labels" -}}
|
||||||
|
@ -75,20 +82,9 @@ spec:
|
||||||
valueFrom:
|
valueFrom:
|
||||||
fieldRef:
|
fieldRef:
|
||||||
fieldPath: spec.nodeName
|
fieldPath: spec.nodeName
|
||||||
livenessProbe:
|
{{ dict "envAll" . "component" "proxy" "container" "proxy" "type" "liveness" "probeTemplate" (include "probeTemplate" . | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | trim | indent 8 }}
|
||||||
{{ toYaml .Values.livenessProbe.config | indent 10 }}
|
{{ dict "envAll" . "component" "proxy" "container" "proxy" "type" "readiness" "probeTemplate" (include "probeTemplate" . | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | trim | indent 8 }}
|
||||||
exec:
|
|
||||||
command:
|
|
||||||
- /tmp/bin/liveness-probe.sh
|
|
||||||
readinessProbe:
|
|
||||||
exec:
|
|
||||||
command:
|
|
||||||
- /tmp/bin/readiness-probe.sh
|
|
||||||
initialDelaySeconds: 15
|
|
||||||
periodSeconds: 15
|
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: bin
|
|
||||||
mountPath: /tmp/bin/
|
|
||||||
- mountPath: /run/xtables.lock
|
- mountPath: /run/xtables.lock
|
||||||
name: xtables-lock
|
name: xtables-lock
|
||||||
readOnly: false
|
readOnly: false
|
||||||
|
@ -97,10 +93,6 @@ spec:
|
||||||
readOnly: true
|
readOnly: true
|
||||||
serviceAccountName: kube-proxy
|
serviceAccountName: kube-proxy
|
||||||
volumes:
|
volumes:
|
||||||
- name: bin
|
|
||||||
configMap:
|
|
||||||
name: kubernetes-proxy-bin
|
|
||||||
defaultMode: 0555
|
|
||||||
- name: xtables-lock
|
- name: xtables-lock
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /run/xtables.lock
|
path: /run/xtables.lock
|
||||||
|
|
|
@ -47,6 +47,23 @@ pod:
|
||||||
limits:
|
limits:
|
||||||
memory: "1024Mi"
|
memory: "1024Mi"
|
||||||
cpu: "2000m"
|
cpu: "2000m"
|
||||||
|
probes:
|
||||||
|
proxy:
|
||||||
|
proxy:
|
||||||
|
liveness:
|
||||||
|
enabled: true
|
||||||
|
params:
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
timeoutSeconds: 15
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 2
|
||||||
|
readiness:
|
||||||
|
enabled: true
|
||||||
|
params:
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
timeoutSeconds: 15
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 2
|
||||||
|
|
||||||
images:
|
images:
|
||||||
tags:
|
tags:
|
||||||
|
@ -72,17 +89,3 @@ network:
|
||||||
kube_service:
|
kube_service:
|
||||||
host: 127.0.0.1
|
host: 127.0.0.1
|
||||||
port: 6553
|
port: 6553
|
||||||
|
|
||||||
livenessProbe:
|
|
||||||
config:
|
|
||||||
# NOTE(mark-burnett): To avoid cascading failure modes, it is
|
|
||||||
# important that these values are configured to avoid the possibility
|
|
||||||
# of CrashLoopBackoff for this pod. Otherwise, a small non-impacting
|
|
||||||
# issue could disable kube-proxy for the entire site.
|
|
||||||
failureThreshold: 10
|
|
||||||
initialDelaySeconds: 15
|
|
||||||
periodSeconds: 35
|
|
||||||
successThreshold: 1
|
|
||||||
timeoutSeconds: 10
|
|
||||||
whitelist:
|
|
||||||
# - postgres
|
|
||||||
|
|
Loading…
Reference in New Issue