Make healthchecks more strict

It was discovered that healthchecks aren't really reliable because they
aren't strict enough.

The current patch adds the "standard" options in order to ensure we
actually catch errors soon enough in order to return the actual state of
the checked element.

It also requires a small change for the healthcheck_port() function,
since the "piping" returned a 141 code instead of 0 due SIGPIPE being
sent at some point[1].

[1] https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q
    http://www.tldp.org/LDP/lpg/node20.html

Change-Id: If13b6ca177d47a0af29ba5e5099e040eea62876c
Closes-Bug: #1860556
Related: https://bugzilla.redhat.com/show_bug.cgi?id=1794044
This commit is contained in:
Cédric Jeanneret 2020-01-22 16:23:24 +01:00
parent a05d173fee
commit 475368e9b7
1 changed files with 25 additions and 12 deletions

View File

@ -1,4 +1,5 @@
#!/bin/bash
set -euxo pipefail
: ${HEALTHCHECK_CURL_MAX_TIME:=10}
: ${HEALTHCHECK_CURL_USER_AGENT:=curl-healthcheck}
: ${HEALTHCHECK_CURL_WRITE_OUT:='\n%{http_code} %{remote_ip}:%{remote_port} %{time_total} seconds\n'}
@ -25,6 +26,10 @@ get_user_from_process() {
}
healthcheck_curl () {
if [ $# == 0 ]; then
echo 'healthcheck_curl: no parameter provided'
return 1
fi
export NSS_SDB_USE_CACHE=no
curl -g -k -q -s -S --fail -o "${HEALTHCHECK_CURL_OUTPUT}" \
--max-time "${HEALTHCHECK_CURL_MAX_TIME}" \
@ -47,7 +52,11 @@ healthcheck_port () {
# port by using "sudo -u" to get the right output.
# Note: the privileged containers have the correct ss output with root
# user; which is why we need to run with both users, as a best effort.
(ss -ntuap; sudo -u $puser ss -ntuap) | sort -u | grep -qE ":($ports).*,pid=($pids),"
# https://bugs.launchpad.net/tripleo/+bug/1860556
# do ot use "-q" option for grep, since it returns 141 for some reason with
# set -o pipefail.
# See https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q
(ss -ntuap; sudo -u $puser ss -ntuap) | sort -u | grep -E ":($ports).*,pid=($pids),">/dev/null
}
healthcheck_listen () {
@ -96,6 +105,7 @@ get_config_val () {
# apachectl -S is slightly harder to parse and doesn't say if the vhost is serving SSL
get_url_from_vhost () {
vhost_file=$1
if test -n "${vhost_file}" && test -r "${vhost_file}" ; then
server_name=$(awk '/ServerName/ {print $2}' $vhost_file)
ssl_enabled=$(awk '/SSLEngine/ {print $2}' $vhost_file)
bind_port=$(grep -h "<VirtualHost .*>" $vhost_file | sed 's/<VirtualHost .*:\(.*\)>/\1/')
@ -108,6 +118,9 @@ get_url_from_vhost () {
wsgi_alias="${wsgi_alias}/"
fi
echo ${proto}://${server_name}:${bind_port}${wsgi_alias}
else
exit 1
fi
}
check_swift_interval () {