Add health checks for Swift services

This patch is adding health checks for background processes. The check
is performed by comparing recon cache file modification. This is far
from ideal, because recon files are shared via services of same type.
But it's the only way to at least partialy check correct behaviour
for background processes. In future there we should add timestamp info
to each recon file for all services using the recon file. Then we would
be able to find out if all background services are updating their caches
in timely fashion and stay within the border of containers.

Co-Authored-By: Thiago da Silva <thiago@redhat.com>

Change-Id: Ib6fad8311b5a728914ce9df9122194c5f7036be7
This commit is contained in:
Martin Mágr 2018-04-09 15:26:10 +02:00
parent 9b39f6caab
commit 575fde1a96
7 changed files with 152 additions and 45 deletions

View File

@ -52,6 +52,17 @@ healthcheck_socket () {
lsof -Fc -Ua $socket | grep "c$process"
}
healthcheck_file_modification () {
file_path=$1
limit_seconds=$2
curr_time=$(date +%s)
last_mod=$(stat -c '%Y' $file_path)
limit_epoch=$(( curr_time-limit_seconds ))
if [ "$limit_epoch" -gt "$last_mod" ]; then
return 1
fi
}
get_config_val () {
crudini --get "$1" "$2" "$3" 2> /dev/null || echo "$4"

View File

@ -3,17 +3,33 @@
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
conf=/etc/swift/account-server.conf
cache=/var/cache/swift/account.recon
if ! crudini --get $conf pipeline:main pipeline | grep -q healthcheck; then
echo "healthcheck is not available" >&2
exit 0
fi
if ps -e | grep --quiet swift-account-server; then
if ! crudini --get $conf pipeline:main pipeline | grep -q healthcheck; then
echo "healthcheck is not available" >&2
exit 0
fi
# swift-account-server is still eventlet
bind_host=$(get_config_val $conf DEFAULT bind_ip 127.0.0.1)
bind_port=$(get_config_val $conf DEFAULT bind_port 6002)
# Add brackets if IPv6
if [[ $bind_host =~ ":" ]]; then
bind_host="[${bind_host}]"
# swift-account-server is still eventlet
bind_host=$(get_config_val $conf DEFAULT bind_ip 127.0.0.1)
bind_port=$(get_config_val $conf DEFAULT bind_port 6002)
# Add brackets if IPv6
if [[ $bind_host =~ ":" ]]; then
bind_host="[${bind_host}]"
fi
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
else
if ps -e | grep --quiet swift-account-auditor; then
interval=$(get_config_val $conf account-auditor interval 1800)
elif ps -e | grep --quiet swift-account-reaper; then
interval=$(get_config_val $conf account-reaper interval 3600)
else
interval=$(get_config_val $conf account-replicator interval 30)
fi
if ! healthcheck_file_modification $cache $interval; then
echo "Cache file $cache was not updated within interval of $interval seconds."
exit 1
fi
fi
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck

View File

@ -3,17 +3,33 @@
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
conf=/etc/swift/container-server.conf
cache=/var/cache/swift/container.recon
if ! crudini --get $conf pipeline:main pipeline | grep -q healthcheck; then
echo "healthcheck is not available" >&2
exit 0
fi
if ps -e | grep --quiet swift-container-server; then
if ! crudini --get $conf pipeline:main pipeline | grep -q healthcheck; then
echo "healthcheck is not available" >&2
exit 0
fi
# swift-container-server is still eventlet
bind_host=$(get_config_val $conf DEFAULT bind_ip 127.0.0.1)
bind_port=$(get_config_val $conf DEFAULT bind_port 6001)
# Add brackets if IPv6
if [[ $bind_host =~ ":" ]]; then
bind_host="[${bind_host}]"
# swift-container-server is still eventlet
bind_host=$(get_config_val $conf DEFAULT bind_ip 127.0.0.1)
bind_port=$(get_config_val $conf DEFAULT bind_port 6001)
# Add brackets if IPv6
if [[ $bind_host =~ ":" ]]; then
bind_host="[${bind_host}]"
fi
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
else
if ps -e | grep --quiet swift-account-auditor; then
interval=$(get_config_val $conf container-auditor interval 1800)
elif ps -e | grep --quiet swift-account-reaper; then
interval=$(get_config_val $conf container-replicator interval 30)
else
interval=$(get_config_val $conf container-updater interval 300)
fi
if ! healthcheck_file_modification $cache $interval; then
echo "Cache file $cache was not updated within interval of $interval seconds."
exit 1
fi
fi
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck

View File

@ -0,0 +1,12 @@
#!/bin/sh
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
conf=/etc/swift/object-expirer.conf
cache=/var/cache/swift/object.recon
interval=$(get_config_val $conf object-expirer interval 300)
if ! healthcheck_file_modification $cache $interval; then
echo "Cache file $cache was not updated within interval of $interval seconds."
exit 1
fi

View File

@ -3,17 +3,42 @@
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
conf=/etc/swift/object-server.conf
cache=/var/cache/swift/object.recon
if ! crudini --get $conf pipeline:main pipeline | grep -q healthcheck; then
echo "healthcheck is not available" >&2
exit 0
fi
if ps -e | grep --quiet swift-object-server; then
if ! crudini --get $conf pipeline:main pipeline | grep -q healthcheck; then
echo "healthcheck is not available" >&2
exit 0
fi
# swift-object-server is still eventlet
bind_host=$(get_config_val $conf DEFAULT bind_ip 127.0.0.1)
bind_port=$(get_config_val $conf DEFAULT bind_port 6000)
# Add brackets if IPv6
if [[ $bind_host =~ ":" ]]; then
bind_host="[${bind_host}]"
# swift-object-server is still eventlet
bind_host=$(get_config_val $conf DEFAULT bind_ip 127.0.0.1)
bind_port=$(get_config_val $conf DEFAULT bind_port 6000)
# Add brackets if IPv6
if [[ $bind_host =~ ":" ]]; then
bind_host="[${bind_host}]"
fi
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
elif ps -e | grep --quiet rsync; then
process='rsync'
args="${@:-873}"
if ! healthcheck_listen $process $args; then
ports=${args// /,}
echo "There is no $process process, listening on port(s) $ports, running in the container."
exit 1
fi
else
if ps -e | grep --quiet swift-account-auditor; then
interval=$(get_config_val $conf object-auditor interval 300)
elif ps -e | grep --quiet swift-account-reaper; then
interval=$(get_config_val $conf object-replicator interval 300)
else
interval=$(get_config_val $conf object-updater interval 300)
fi
if ! healthcheck_file_modification $cache $interval; then
echo "Cache file $cache was not updated within interval of $interval seconds."
exit 1
fi
fi
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck

View File

@ -2,18 +2,31 @@
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
conf=/etc/swift/proxy-server.conf
if ps -e | grep --quiet swift-proxy-server; then
conf=/etc/swift/proxy-server.conf
if ! crudini --get $conf pipeline:main pipeline | grep -q healthcheck; then
echo "healthcheck is not available" >&2
exit 0
fi
if ! crudini --get $conf pipeline:main pipeline | grep -q healthcheck; then
echo "healthcheck is not available" >&2
exit 0
fi
# swift-proxy is still eventlet
bind_host=$(get_config_val $conf DEFAULT bind_ip 127.0.0.1)
bind_port=$(get_config_val $conf DEFAULT bind_port 8080)
# Add brackets if IPv6
if [[ $bind_host =~ ":" ]]; then
bind_host="[${bind_host}]"
# swift-proxy is still eventlet
bind_host=$(get_config_val $conf DEFAULT bind_ip 127.0.0.1)
bind_port=$(get_config_val $conf DEFAULT bind_port 8080)
# Add brackets if IPv6
if [[ $bind_host =~ ":" ]]; then
bind_host="[${bind_host}]"
fi
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
else
# TODO(mmagr): Remove this once swift_object_expirer container will start
# using swift-object-expirer kolla image
conf=/etc/swift/object-expirer.conf
cache=/var/cache/swift/object.recon
interval=$(get_config_val $conf object-expirer interval 300)
if ! healthcheck_file_modification $cache $interval; then
echo "Cache file $cache was not updated within interval of $interval seconds."
exit 1
fi
fi
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck

14
healthcheck/swift-rsync Executable file
View File

@ -0,0 +1,14 @@
#!/bin/bash
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
process='rsync'
args="${@:-873}"
if healthcheck_listen $process $args; then
exit 0
else
ports=${args// /,}
echo "There is no $process process, listening on port(s) $ports, running in the container."
exit 1
fi