
This commit puts the ha-test-suite under the tools directory instead of having it downloaded from the outside (originally it was available at the link https://github.com/rscarazz/tripleo-director-ha-test-suite. At code level now the suite is copied directly from the local path tools/ha-test-suite and the executable now is ha-test-suite.sh. Change-Id: I087bc28a0afa3ede9b2fb698892b8306f56790a2
152 lines
3.9 KiB
Plaintext
Executable File
152 lines
3.9 KiB
Plaintext
Executable File
# Raoul Scarazzini (rasca@redhat.com)
|
|
# This script provides a testing suite from TripleO/Directory OpenStack HA (so
|
|
# with Pacemaker) environments functions to be used inside TripleO/Director
|
|
# OpenStack HA environments
|
|
|
|
function usage {
|
|
echo "Usage $0 -t <testfile> [-r <recover file>] [-u]
|
|
-t, --test <testfile> Specify which file contains the test to run
|
|
-r, --recover <recoverfile> Specify which file (if any) should be used for recovery
|
|
-u, --undercloud Test will be performed on undercloud
|
|
"
|
|
}
|
|
|
|
function check_failed_actions {
|
|
resource=$1
|
|
|
|
sudo pcs status | grep "Failed Actions:" &> /dev/null
|
|
if [ $? -eq 0 ]
|
|
then
|
|
if [ "x$resource" == "x" ]
|
|
then
|
|
echo "Cluster has failed actions:"
|
|
sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
|
|
exit 1
|
|
else
|
|
errors=$(sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | grep -A1 $resource)
|
|
if [ $? -eq 0 ]
|
|
then
|
|
echo "Resource $resource has failed actions:"
|
|
echo $errors
|
|
exit 1
|
|
else
|
|
echo "No failed actions for $resource."
|
|
return 0
|
|
fi
|
|
fi
|
|
else
|
|
[ "x$resource" == "x" ] && echo "Cluster is OK." || echo "No failed actions for $resource."
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
function check_resources_process_status {
|
|
for resource in $OVERCLOUD_RESOURCES
|
|
do
|
|
echo -n "$resource -> "
|
|
|
|
case $resource in
|
|
ip-*) #ip_addr=$(pcs resource show $resource | grep Attributes | sed 's/.*ip=\(.*\) cidr.*/\1/g')
|
|
ip_addr=$(echo $resource | sed 's/ip-//g')
|
|
sudo ip a s | grep $ip_addr &> /dev/null
|
|
;;
|
|
rabbitmq) sudo /usr/sbin/rabbitmqctl cluster_status &> /dev/null
|
|
;;
|
|
redis) pidof /usr/bin/redis-server &> /dev/null
|
|
;;
|
|
galera) pidof /usr/libexec/mysqld &> /dev/null
|
|
;;
|
|
*cleanup*|delay) echo -n "no need to check if it's "
|
|
;;
|
|
*) systemctl is-active $resource &> /dev/null
|
|
;;
|
|
esac
|
|
|
|
[ $? -eq 0 ] && echo "active" || echo "inactive"
|
|
|
|
done
|
|
}
|
|
|
|
function wait_resource_status {
|
|
resource=$1
|
|
status=$2
|
|
i=1
|
|
|
|
while [ $i -lt $RESOURCE_CHANGE_STATUS_TIMEOUT ]
|
|
do
|
|
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
|
|
if [ "x$output" == "x" ]
|
|
then
|
|
return 0
|
|
break
|
|
else
|
|
echo -n "."
|
|
sleep 1
|
|
let "i++"
|
|
fi
|
|
done
|
|
check_failed_actions
|
|
exit 1
|
|
}
|
|
|
|
function check_resource_status {
|
|
resource=$1
|
|
status=$2
|
|
|
|
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
|
|
# Since we are checking a specific status, if we have output from above it
|
|
# means that for some reason the resource is not in the state we are expecting
|
|
[ "x$output" == "x" ] && return 0 || (check_failed_actions; exit 1)
|
|
}
|
|
|
|
function wait_cluster_start {
|
|
i=1
|
|
while true; do
|
|
[ $i -eq $RESOURCE_CHANGE_STATUS_TIMEOUT ] && break
|
|
|
|
# Check for failed actions
|
|
sudo pcs status | egrep "Failed" &> /dev/null
|
|
[ $? -eq 0 ] && break
|
|
|
|
# If we have stopped resources let's wait
|
|
sudo pcs status | egrep "Stopped" &> /dev/null
|
|
if [ $? -eq 0 ]
|
|
then
|
|
echo -n "."
|
|
else
|
|
echo "All cluster resources are started."
|
|
return 0
|
|
break
|
|
fi
|
|
sleep 1
|
|
let "i++"
|
|
done
|
|
|
|
# If we are here than we have problems: we hit timeout or we still have
|
|
# stopped resources
|
|
echo "Problems found. There are stopped or failed resources!"
|
|
check_failed_actions
|
|
exit 1
|
|
}
|
|
|
|
function play_on_resources {
|
|
action=$1
|
|
resources=$2
|
|
|
|
for resource in $resources
|
|
do
|
|
echo -n "$(date) - Performing action $action on resource $resource "
|
|
# Do the action on the resource
|
|
sudo pcs resource $action $resource --wait $RESOURCE_CHANGE_STATUS_TIMEOUT
|
|
if [ $? -ne 0 ]
|
|
then
|
|
echo "FAILURE!"
|
|
check_failed_actions $resource
|
|
exit 1
|
|
else
|
|
echo "OK"
|
|
fi
|
|
done
|
|
return 0
|
|
}
|