Files
tripleo-ha-utils/tools/ha-test-suite/include/functions
Raoul Scarazzini 609bb4ee05 Force failure after ops in rescue and fix wait opt
Today we manage tests between "blocks:" with "always:" and "rescue:" to
get the output of the commands, both stderr and stdout. Problem is that
instead of stopping we move on, and this is not useful when you want to
investigate on problems like the --wait 600 versus --wait=600 (that this
patch solves, too).

Change-Id: Icd5f6f8bdff8c563131856286e82f9a7169b5fc7
2017-09-28 06:22:10 -04:00

152 lines
3.9 KiB
Plaintext
Executable File

# Raoul Scarazzini (rasca@redhat.com)
# This script provides a testing suite from TripleO/Directory OpenStack HA (so
# with Pacemaker) environments functions to be used inside TripleO/Director
# OpenStack HA environments
function usage {
echo "Usage $0 -t <testfile> [-r <recover file>] [-u]
-t, --test <testfile> Specify which file contains the test to run
-r, --recover <recoverfile> Specify which file (if any) should be used for recovery
-u, --undercloud Test will be performed on undercloud
"
}
function check_failed_actions {
resource=$1
sudo pcs status | grep "Failed Actions:" &> /dev/null
if [ $? -eq 0 ]
then
if [ "x$resource" == "x" ]
then
echo "Cluster has failed actions:"
sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
exit 1
else
errors=$(sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | grep -A1 $resource)
if [ $? -eq 0 ]
then
echo "Resource $resource has failed actions:"
echo $errors
exit 1
else
echo "No failed actions for $resource."
return 0
fi
fi
else
[ "x$resource" == "x" ] && echo "Cluster is OK." || echo "No failed actions for $resource."
return 0
fi
}
function check_resources_process_status {
for resource in $OVERCLOUD_RESOURCES
do
echo -n "$resource -> "
case $resource in
ip-*) #ip_addr=$(pcs resource show $resource | grep Attributes | sed 's/.*ip=\(.*\) cidr.*/\1/g')
ip_addr=$(echo $resource | sed 's/ip-//g')
sudo ip a s | grep $ip_addr &> /dev/null
;;
rabbitmq) sudo /usr/sbin/rabbitmqctl cluster_status &> /dev/null
;;
redis) pidof /usr/bin/redis-server &> /dev/null
;;
galera) pidof /usr/libexec/mysqld &> /dev/null
;;
*cleanup*|delay) echo -n "no need to check if it's "
;;
*) systemctl is-active $resource &> /dev/null
;;
esac
[ $? -eq 0 ] && echo "active" || echo "inactive"
done
}
function wait_resource_status {
resource=$1
status=$2
i=1
while [ $i -lt $RESOURCE_CHANGE_STATUS_TIMEOUT ]
do
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
if [ "x$output" == "x" ]
then
return 0
break
else
echo -n "."
sleep 1
let "i++"
fi
done
check_failed_actions
exit 1
}
function check_resource_status {
resource=$1
status=$2
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
# Since we are checking a specific status, if we have output from above it
# means that for some reason the resource is not in the state we are expecting
[ "x$output" == "x" ] && return 0 || (check_failed_actions; exit 1)
}
function wait_cluster_start {
i=1
while true; do
[ $i -eq $RESOURCE_CHANGE_STATUS_TIMEOUT ] && break
# Check for failed actions
sudo pcs status | egrep "Failed" &> /dev/null
[ $? -eq 0 ] && break
# If we have stopped resources let's wait
sudo pcs status | egrep "Stopped" &> /dev/null
if [ $? -eq 0 ]
then
echo -n "."
else
echo "All cluster resources are started."
return 0
break
fi
sleep 1
let "i++"
done
# If we are here than we have problems: we hit timeout or we still have
# stopped resources
echo "Problems found. There are stopped or failed resources!"
check_failed_actions
exit 1
}
function play_on_resources {
action=$1
resources=$2
for resource in $resources
do
echo -n "$(date) - Performing action $action on resource $resource "
# Do the action on the resource
sudo pcs resource $action $resource --wait=$RESOURCE_CHANGE_STATUS_TIMEOUT
if [ $? -ne 0 ]
then
echo "FAILURE!"
check_failed_actions $resource
exit 1
else
echo "OK"
fi
done
return 0
}