Improve Swift healthchecks
This patch adds checks for the replicators. It also removes some unused or invalid code from other checks. Checking modification time of the recon files is not enough, these might also be changed by other Swift processes and are not a good indicator for stuck processes. Co-Authored-By: Christian Schwede <cschwede@redhat.com> Co-Authored-By: Emilien Macchi <emilien@redhat.com> Change-Id: Ib15f1ec4766bf4d64a2860422c230e4d514bc224
This commit is contained in:
parent
07c7889ca5
commit
0862fcf98f
@ -109,3 +109,16 @@ get_url_from_vhost () {
|
||||
fi
|
||||
echo ${proto}://${server_name}:${bind_port}${wsgi_alias}
|
||||
}
|
||||
|
||||
check_swift_interval () {
|
||||
service=$1
|
||||
if ps -e | grep --quiet swift-$service; then
|
||||
interval=$(get_config_val $conf $service interval 300)
|
||||
last=`grep -o "\"replication_last\": [0-9]*" $cache | cut -f 2 -d " "`
|
||||
now=`date +%s`
|
||||
if [ `expr $now - $last` -gt $interval ]; then
|
||||
echo "Last replication run did not finish within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
8
healthcheck/swift-account-replicator
Executable file
8
healthcheck/swift-account-replicator
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
|
||||
|
||||
conf=/etc/swift/account-server.conf
|
||||
cache=/var/cache/swift/account.recon
|
||||
|
||||
check_swift_interval account-replicator
|
@ -19,17 +19,4 @@ if ps -e | grep --quiet swift-account-server; then
|
||||
bind_host="[${bind_host}]"
|
||||
fi
|
||||
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
|
||||
else
|
||||
if ps -e | grep --quiet swift-account-auditor; then
|
||||
interval=$(get_config_val $conf account-auditor interval 1800)
|
||||
elif ps -e | grep --quiet swift-account-reaper; then
|
||||
interval=$(get_config_val $conf account-reaper interval 3600)
|
||||
else
|
||||
interval=$(get_config_val $conf account-replicator interval 30)
|
||||
fi
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
8
healthcheck/swift-container-replicator
Executable file
8
healthcheck/swift-container-replicator
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
|
||||
|
||||
conf=/etc/swift/container-server.conf
|
||||
cache=/var/cache/swift/container.recon
|
||||
|
||||
check_swift_interval container-replicator
|
@ -19,17 +19,4 @@ if ps -e | grep --quiet swift-container-server; then
|
||||
bind_host="[${bind_host}]"
|
||||
fi
|
||||
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
|
||||
else
|
||||
if ps -e | grep --quiet swift-account-auditor; then
|
||||
interval=$(get_config_val $conf container-auditor interval 1800)
|
||||
elif ps -e | grep --quiet swift-account-reaper; then
|
||||
interval=$(get_config_val $conf container-replicator interval 30)
|
||||
else
|
||||
interval=$(get_config_val $conf container-updater interval 300)
|
||||
fi
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
@ -1,12 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
|
||||
|
||||
conf=/etc/swift/object-expirer.conf
|
||||
cache=/var/cache/swift/object.recon
|
||||
interval=$(get_config_val $conf object-expirer interval 300)
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
8
healthcheck/swift-object-replicator
Executable file
8
healthcheck/swift-object-replicator
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
. ${HEALTHCHECK_SCRIPTS:-/usr/share/openstack-tripleo-common/healthcheck}/common.sh
|
||||
|
||||
conf=/etc/swift/object-server.conf
|
||||
cache=/var/cache/swift/object.recon
|
||||
|
||||
check_swift_interval object-replicator
|
@ -19,26 +19,4 @@ if ps -e | grep --quiet swift-object-server; then
|
||||
bind_host="[${bind_host}]"
|
||||
fi
|
||||
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
|
||||
elif ps -e | grep --quiet rsync; then
|
||||
process='rsync'
|
||||
args="${@:-873}"
|
||||
|
||||
if ! healthcheck_listen $process $args; then
|
||||
ports=${args// /,}
|
||||
echo "There is no $process process, listening on port(s) $ports, running in the container."
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
if ps -e | grep --quiet swift-account-auditor; then
|
||||
interval=$(get_config_val $conf object-auditor interval 300)
|
||||
elif ps -e | grep --quiet swift-account-reaper; then
|
||||
interval=$(get_config_val $conf object-replicator interval 300)
|
||||
else
|
||||
interval=$(get_config_val $conf object-updater interval 300)
|
||||
fi
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
@ -18,15 +18,4 @@ if pgrep -f swift-proxy-server; then
|
||||
bind_host="[${bind_host}]"
|
||||
fi
|
||||
healthcheck_curl http://${bind_host}:${bind_port}/healthcheck
|
||||
else
|
||||
# TODO(mmagr): Remove this once swift_object_expirer container will start
|
||||
# using swift-object-expirer kolla image
|
||||
conf=/etc/swift/object-expirer.conf
|
||||
cache=/var/cache/swift/object.recon
|
||||
interval=$(get_config_val $conf object-expirer interval 300)
|
||||
|
||||
if ! healthcheck_file_modification $cache $interval; then
|
||||
echo "Cache file $cache was not updated within interval of $interval seconds."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
@ -0,0 +1,5 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Adds additional healtchecks for Swift to monitor account,
|
||||
container and object replicators as well as the rsync process.
|
Loading…
Reference in New Issue
Block a user