Use timeout to check for services status

Replaces the bash loop with the timeout command in the piloted
cluster restart to minimize downtime.

Change-Id: I9067eed9626ae5aff833d7a9a9ad1e1a6c026327
Co-Authored-By: Jiri Stransky <jistr@redhat.com>
This commit is contained in:
Giulio Fidente 2015-12-18 19:02:19 +01:00 committed by Jiri Stransky
parent 4c2e66a6d0
commit 2a9985459b
1 changed files with 12 additions and 11 deletions

View File

@ -7,11 +7,14 @@ check_interval=3
function check_resource {
if [ "$#" -ne 3 ]; then
echo "ERROR: check_resource function expects 3 parameters, $# given" | tee /dev/fd/2
exit 1
fi
service=$1
state=$2
timeout=$3
tstart=$(date +%s)
tend=$(( $tstart + $timeout ))
if [ "$state" = "stopped" ]; then
match_for_incomplete='Started'
@ -19,20 +22,18 @@ function check_resource {
match_for_incomplete='Stopped'
fi
while (( $(date +%s) < $tend )); do
if timeout -k 10 $timeout crm_resource --wait; then
node_states=$(pcs status --full | grep "$service" | grep -v Clone)
if echo "$node_states" | grep -q "$match_for_incomplete"; then
echo "$service not yet $state, sleeping $check_interval seconds."
sleep $check_interval
echo "ERROR: cluster settled but $service was not in $state state, exiting." | tee /dev/fd/2
exit 1
else
echo "$service has $state"
timeout -k 10 $timeout crm_resource --wait
return
fi
done
echo "$service never $state after $timeout seconds" | tee /dev/fd/2
exit 1
else
echo "ERROR: cluster remained unstable for more than $timeout seconds, exiting." | tee /dev/fd/2
exit 1
fi
}