Today we manage tests between "blocks:" with "always:" and "rescue:" to get the output of the commands, both stderr and stdout. Problem is that instead of stopping we move on, and this is not useful when you want to investigate on problems like the --wait 600 versus --wait=600 (that this patch solves, too). Change-Id: Icd5f6f8bdff8c563131856286e82f9a7169b5fc7
152 lines
3.9 KiB
Plaintext
Executable File
152 lines
3.9 KiB
Plaintext
Executable File
# Raoul Scarazzini (rasca@redhat.com)
|
|
# This script provides a testing suite from TripleO/Directory OpenStack HA (so
|
|
# with Pacemaker) environments functions to be used inside TripleO/Director
|
|
# OpenStack HA environments
|
|
|
|
function usage {
|
|
echo "Usage $0 -t <testfile> [-r <recover file>] [-u]
|
|
-t, --test <testfile> Specify which file contains the test to run
|
|
-r, --recover <recoverfile> Specify which file (if any) should be used for recovery
|
|
-u, --undercloud Test will be performed on undercloud
|
|
"
|
|
}
|
|
|
|
function check_failed_actions {
|
|
resource=$1
|
|
|
|
sudo pcs status | grep "Failed Actions:" &> /dev/null
|
|
if [ $? -eq 0 ]
|
|
then
|
|
if [ "x$resource" == "x" ]
|
|
then
|
|
echo "Cluster has failed actions:"
|
|
sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
|
|
exit 1
|
|
else
|
|
errors=$(sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | grep -A1 $resource)
|
|
if [ $? -eq 0 ]
|
|
then
|
|
echo "Resource $resource has failed actions:"
|
|
echo $errors
|
|
exit 1
|
|
else
|
|
echo "No failed actions for $resource."
|
|
return 0
|
|
fi
|
|
fi
|
|
else
|
|
[ "x$resource" == "x" ] && echo "Cluster is OK." || echo "No failed actions for $resource."
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
function check_resources_process_status {
|
|
for resource in $OVERCLOUD_RESOURCES
|
|
do
|
|
echo -n "$resource -> "
|
|
|
|
case $resource in
|
|
ip-*) #ip_addr=$(pcs resource show $resource | grep Attributes | sed 's/.*ip=\(.*\) cidr.*/\1/g')
|
|
ip_addr=$(echo $resource | sed 's/ip-//g')
|
|
sudo ip a s | grep $ip_addr &> /dev/null
|
|
;;
|
|
rabbitmq) sudo /usr/sbin/rabbitmqctl cluster_status &> /dev/null
|
|
;;
|
|
redis) pidof /usr/bin/redis-server &> /dev/null
|
|
;;
|
|
galera) pidof /usr/libexec/mysqld &> /dev/null
|
|
;;
|
|
*cleanup*|delay) echo -n "no need to check if it's "
|
|
;;
|
|
*) systemctl is-active $resource &> /dev/null
|
|
;;
|
|
esac
|
|
|
|
[ $? -eq 0 ] && echo "active" || echo "inactive"
|
|
|
|
done
|
|
}
|
|
|
|
function wait_resource_status {
|
|
resource=$1
|
|
status=$2
|
|
i=1
|
|
|
|
while [ $i -lt $RESOURCE_CHANGE_STATUS_TIMEOUT ]
|
|
do
|
|
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
|
|
if [ "x$output" == "x" ]
|
|
then
|
|
return 0
|
|
break
|
|
else
|
|
echo -n "."
|
|
sleep 1
|
|
let "i++"
|
|
fi
|
|
done
|
|
check_failed_actions
|
|
exit 1
|
|
}
|
|
|
|
function check_resource_status {
|
|
resource=$1
|
|
status=$2
|
|
|
|
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
|
|
# Since we are checking a specific status, if we have output from above it
|
|
# means that for some reason the resource is not in the state we are expecting
|
|
[ "x$output" == "x" ] && return 0 || (check_failed_actions; exit 1)
|
|
}
|
|
|
|
function wait_cluster_start {
|
|
i=1
|
|
while true; do
|
|
[ $i -eq $RESOURCE_CHANGE_STATUS_TIMEOUT ] && break
|
|
|
|
# Check for failed actions
|
|
sudo pcs status | egrep "Failed" &> /dev/null
|
|
[ $? -eq 0 ] && break
|
|
|
|
# If we have stopped resources let's wait
|
|
sudo pcs status | egrep "Stopped" &> /dev/null
|
|
if [ $? -eq 0 ]
|
|
then
|
|
echo -n "."
|
|
else
|
|
echo "All cluster resources are started."
|
|
return 0
|
|
break
|
|
fi
|
|
sleep 1
|
|
let "i++"
|
|
done
|
|
|
|
# If we are here than we have problems: we hit timeout or we still have
|
|
# stopped resources
|
|
echo "Problems found. There are stopped or failed resources!"
|
|
check_failed_actions
|
|
exit 1
|
|
}
|
|
|
|
function play_on_resources {
|
|
action=$1
|
|
resources=$2
|
|
|
|
for resource in $resources
|
|
do
|
|
echo -n "$(date) - Performing action $action on resource $resource "
|
|
# Do the action on the resource
|
|
sudo pcs resource $action $resource --wait=$RESOURCE_CHANGE_STATUS_TIMEOUT
|
|
if [ $? -ne 0 ]
|
|
then
|
|
echo "FAILURE!"
|
|
check_failed_actions $resource
|
|
exit 1
|
|
else
|
|
echo "OK"
|
|
fi
|
|
done
|
|
return 0
|
|
}
|