From 2a9985459b1ef76cfc410e79417a0d6292670387 Mon Sep 17 00:00:00 2001 From: Giulio Fidente Date: Fri, 18 Dec 2015 19:02:19 +0100 Subject: [PATCH] Use timeout to check for services status Replaces the bash loop with the timeout command in the piloted cluster restart to minimize downtime. Change-Id: I9067eed9626ae5aff833d7a9a9ad1e1a6c026327 Co-Authored-By: Jiri Stransky --- .../tasks/pacemaker_resource_restart.sh | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh index 12201097b8..dfc335bfb0 100755 --- a/extraconfig/tasks/pacemaker_resource_restart.sh +++ b/extraconfig/tasks/pacemaker_resource_restart.sh @@ -7,11 +7,14 @@ check_interval=3 function check_resource { + if [ "$#" -ne 3 ]; then + echo "ERROR: check_resource function expects 3 parameters, $# given" | tee /dev/fd/2 + exit 1 + fi + service=$1 state=$2 timeout=$3 - tstart=$(date +%s) - tend=$(( $tstart + $timeout )) if [ "$state" = "stopped" ]; then match_for_incomplete='Started' @@ -19,20 +22,18 @@ function check_resource { match_for_incomplete='Stopped' fi - while (( $(date +%s) < $tend )); do + if timeout -k 10 $timeout crm_resource --wait; then node_states=$(pcs status --full | grep "$service" | grep -v Clone) if echo "$node_states" | grep -q "$match_for_incomplete"; then - echo "$service not yet $state, sleeping $check_interval seconds." - sleep $check_interval + echo "ERROR: cluster settled but $service was not in $state state, exiting." | tee /dev/fd/2 + exit 1 else echo "$service has $state" - timeout -k 10 $timeout crm_resource --wait - return fi - done - - echo "$service never $state after $timeout seconds" | tee /dev/fd/2 - exit 1 + else + echo "ERROR: cluster remained unstable for more than $timeout seconds, exiting." | tee /dev/fd/2 + exit 1 + fi }