Make static manifest cleanup configurable

By design, the anchor pods clean up after their static pods
(and associated secrets/configs) via a hook when they the anchor
pods are stopped, to make sure that cruft is not left lying around
(or running) when an anchor pod is no longer scheduled to a host.

However, it's been observed that on a host under high load, e.g.
if one or two other control plane hosts are down, then the anchor
pods may be stopped in an unplanned manner.  This results in
service unavailability for the anchored static manifest pods.

This change makes that cleanup behavior configurable (following the
pattern already implemented in the haproxy chart) but leaves it on by
by default.

Change-Id: Iab14510ef8ea5b9e400e0f744231811117029887
This commit is contained in:
Matt McEuen 2019-06-11 13:40:23 -05:00
parent dec57a5dbe
commit 46b6437e72
8 changed files with 13 additions and 0 deletions

View File

@ -65,7 +65,9 @@ snapshot_files "${SNAPSHOT_DIR}"
while true; do
if [ -e /tmp/stop ]; then
echo Stopping
{{- if .Values.anchor.enable_cleanup }}
cleanup
{{- end }}
break
fi

View File

@ -77,6 +77,7 @@ labels:
anchor:
dns_policy: Default
enable_cleanup: true
kubelet:
manifest_path: /etc/kubernetes/manifests
period: 15

View File

@ -37,7 +37,9 @@ while true; do
if [ -e /tmp/stop ]; then
echo Stopping
{{- if .Values.anchor.enable_cleanup }}
cleanup
{{- end }}
break
fi

View File

@ -27,6 +27,7 @@ labels:
anchor:
dns_policy: Default
enable_cleanup: true
kubelet:
manifest_path: /etc/kubernetes/manifests
period: 15

View File

@ -26,6 +26,7 @@ function cleanup_host {
# Let the anchor process know it should not try to start the server.
touch /tmp/stopping
{{- if .Values.anchor.enable_cleanup }}
while true; do
if etcdctl member list > /tmp/stop_members; then
if grep $PEER_ENDPOINT /tmp/stop_members; then
@ -43,3 +44,5 @@ while true; do
sleep {{ .Values.anchor.period }}
done
{{- end }}
touch /tmp/stopped

View File

@ -25,6 +25,7 @@ labels:
anchor:
dns_policy: ClusterFirstWithHostNet
enable_cleanup: true
etcdctl_endpoint: example-etcd
host_data_path: /var/lib/etcd/example

View File

@ -36,7 +36,9 @@ cleanup() {
while true; do
if [ -e /tmp/stop ]; then
echo Stopping
{{- if .Values.anchor.enable_cleanup }}
cleanup
{{- end }}
break
fi

View File

@ -2,6 +2,7 @@ release_group: null
anchor:
dns_policy: Default
enable_cleanup: true
kubelet:
manifest_path: /etc/kubernetes/manifests
period: 15