CoreDNS probe refactoring and version uplift

This is uplift for CoreDNS to version 1.6.2

Upstream CoreDNS image has no tools inside like wget/dig and can't
be used as is because pod probes will fail. Coredns pod has
Liveness/Readiness probes which are just a shell script to run
wget/dig to determine that CoreDNS is functional. So, decided
to add tools for probes in promenade image and do refactoring.

New endpoints for health check are running in side-car:
/externalhealth - to do the same check like previous shell script,
/selfcheck - to do check of the health of side-car itself.

Main container should be pointed to check endpoint provided by
side-car container.

Change-Id: Ib7fcf309b6cc34a86eeeec6e2109988cfa862955
This commit is contained in:
Egorov, Stanislav 2019-09-25 14:18:23 -07:00
parent 9a50a59bf6
commit 4f0ae384a8
12 changed files with 134 additions and 48 deletions

View File

@ -42,6 +42,7 @@ RUN set -ex \
&& apt-get clean \
&& apt-get update -q \
&& apt-get install --no-install-recommends -y \
dnsutils \
libyaml-dev \
rsync \
&& useradd -u 1000 -g users -d /opt/promenade promenade \

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
import argparse
import requests
import socket
import subprocess
from http.server import BaseHTTPRequestHandler, HTTPServer
class httpHandler(BaseHTTPRequestHandler):
def _set_headers(self):
self.send_header('Content-type', 'text/html')
self.end_headers()
def do_GET(self):
try:
if self.path == '/externalhealth':
failed = False
res = requests.get("http://127.0.0.1:{}/health".format(args.check_port))
if res.status_code >= 400:
failed = True
res = subprocess.run(
["dig", "+time=2", "+tries=1", "@127.0.0.1", "-f", args.filename],
stdout=subprocess.DEVNULL)
if res.returncode != 0:
failed = True
if failed:
print('Check failed')
self.send_response(500)
else:
self.send_response(200)
elif self.path == '/selfcheck':
self.send_response(200)
else:
print('Unsupported endpoint')
self.send_response(404)
except Exception as e:
print(e)
self.send_response(502)
finally:
self._set_headers()
def run(port='80'):
print("Running...")
httpd = HTTPServer(('', port), httpHandler)
httpd.serve_forever()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run name resolution for a list of names from the file')
parser.add_argument('--filename', dest='filename', help='Path to file with names to resolve', required=True)
parser.add_argument('--check-port', dest='check_port', help='Port to check for health', default=8080, type=int)
parser.add_argument('--listen-port', dest='listen_port', help='Port to listen for health checks', default=8282, type=int)
args = parser.parse_args()
run(port=args.listen_port)

View File

@ -1,25 +0,0 @@
#!/bin/sh
set -x
SUCCESS=1
{{/* Use built-in health check */}}
if ! wget -O - http://127.0.0.1:8080/health; then
echo "Failed CoreDNS health check endpoint"
SUCCESS=0
fi
{{/* Perform direct name lookups*/}}
{{- range .Values.conf.test.names_to_resolve }}
if dig +time=2 +tries=1 {{ . }} @127.0.0.1; then
echo "Successfully resolved {{ . }}"
else
echo "Failed to resolve {{ . }}"
SUCCESS=0
fi
{{- end }}
if [ "$SUCCESS" != "1" ]; then
echo "Test failed to resolve all names."
exit 1
fi

View File

@ -4,5 +4,5 @@ kind: ConfigMap
metadata:
name: {{ .Values.service.name }}-bin
data:
probe.sh: |
{{ tuple "bin/_probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
probe.py: |
{{ tuple "bin/_probe.py.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}

View File

@ -0,0 +1,8 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Values.service.name }}-list
data:
names_to_resolve: |
{{ tuple "etc/_list.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}

View File

@ -82,6 +82,44 @@ spec:
topologyKey: kubernetes.io/hostname
{{- end }}
containers:
- name: coredns-health
image: {{ $envAll.Values.images.tags.test | quote }}
imagePullPolicy: {{ $envAll.Values.images.pull_policy | quote }}
{{ tuple $envAll $envAll.Values.pod.resources.coredns | include "helm-toolkit.snippets.kubernetes_resources" | indent 8 }}
{{ dict "envAll" $envAll "application" "coredns" "container" "coredns" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 8 }}
command: ["python"]
args:
- "-u"
- "/tmp/bin/probe.py"
- "--filename"
- "/tmp/etc/names_to_resolve"
- "--check-port"
- {{ $envAll.Values.conf.test.coredns_check_port | quote | default "8080" }}
- "--listen-port"
- {{ $envAll.Values.conf.test.ext_health_check_port | quote | default "8282" }}
volumeMounts:
- name: scripts
mountPath: /tmp/bin
- name: dns-names
mountPath: /tmp/etc
livenessProbe:
httpGet:
port: {{ $envAll.Values.conf.test.ext_health_check_port | default "8282" }}
path: /selfcheck
scheme: HTTP
initialDelaySeconds: 60
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
port: {{ $envAll.Values.conf.test.ext_health_check_port | default "8282" }}
path: /selfcheck
scheme: HTTP
initialDelaySeconds: 10
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
- name: coredns
image: {{ $envAll.Values.images.tags.coredns | quote }}
imagePullPolicy: {{ $envAll.Values.images.pull_policy | quote }}
@ -91,8 +129,6 @@ spec:
volumeMounts:
- name: config-volume
mountPath: /etc/coredns
- name: scripts
mountPath: /tmp/bin
ports:
- containerPort: 53
name: dns
@ -100,17 +136,20 @@ spec:
- containerPort: 53
name: dns-tcp
protocol: TCP
readinessProbe:
exec:
command:
- /tmp/bin/probe.sh
initialDelaySeconds: 2
livenessProbe:
httpGet:
port: {{ $envAll.Values.conf.test.ext_health_check_port | default "8282" }}
path: /externalhealth
scheme: HTTP
initialDelaySeconds: 60
timeoutSeconds: 5
successThreshold: 1
livenessProbe:
exec:
command:
- /tmp/bin/probe.sh
failureThreshold: 3
readinessProbe:
httpGet:
port: {{ $envAll.Values.conf.test.ext_health_check_port | default "8282" }}
path: /externalhealth
scheme: HTTP
initialDelaySeconds: 10
timeoutSeconds: 5
successThreshold: 1
@ -127,4 +166,8 @@ spec:
configMap:
name: {{ $envAll.Values.service.name }}-bin
defaultMode: 0555
- name: dns-names
configMap:
name: {{ $envAll.Values.service.name }}-list
defaultMode: 0555
{{ end }}

View File

@ -0,0 +1,3 @@
{{- range .Values.conf.test.names_to_resolve }}
{{ . }}
{{- end }}

View File

@ -17,13 +17,15 @@ conf:
}
test:
coredns_check_port: 8080
ext_health_check_port: 8282
names_to_resolve:
- kubernetes.default.svc.cluster.local
images:
tags:
coredns: coredns/coredns:1.1.3
test: coredns/coredns:1.1.3
coredns: coredns/coredns:1.6.2
test: quay.io/airshipit/promenade:latest
pull_policy: "IfNotPresent"
labels:

View File

@ -552,8 +552,8 @@ data:
- kubernetes.default.svc.cluster.local
images:
tags:
coredns: coredns/coredns:1.1.3
test: coredns/coredns:1.1.3
coredns: coredns/coredns:1.6.2
test: quay.io/airshipit/promenade:master
source:
type: local
location: /etc/genesis/armada/assets/charts

View File

@ -588,8 +588,8 @@ data:
images:
tags:
coredns: coredns/coredns:1.1.3
test: coredns/coredns:1.1.3
coredns: coredns/coredns:1.6.2
test: quay.io/airshipit/promenade:master
source:
type: local
location: /etc/genesis/armada/assets/charts

View File

@ -443,8 +443,8 @@ data:
images:
tags:
coredns: coredns/coredns:1.1.3
test: coredns/coredns:1.1.3
coredns: coredns/coredns:1.6.2
test: quay.io/airshipit/promenade:master
source:
type: local
location: /etc/genesis/armada/assets/charts

View File

@ -477,8 +477,8 @@ data:
images:
tags:
coredns: coredns/coredns:1.1.3
test: coredns/coredns:1.1.3
coredns: coredns/coredns:1.6.2
test: quay.io/airshipit/promenade:master
source:
type: local
location: /etc/genesis/armada/assets/charts