Add node availability check for deploy initinfra script

Deploy initinfra script can fail due to "node01 not found error".
We need to wait until node01 become ready and available before
performing manipulations. Around 10% of gate script runner test
fails due to this error. Example of error can be found here[1],
example of handling - here[2].

[1] https://zuul.opendev.org/t/openstack/build/5f789f5b6f7c42af903a1892cc69ebf8
[2] https://zuul.opendev.org/t/openstack/build/be22de82d32d4c79a9f993d73fcd6d58/console

Change-Id: If525042ec1260afae278542b033abb14b97347f6
Signed-off-by: Ruslan Aliev <raliev@mirantis.com>
This commit is contained in:
Ruslan Aliev 2020-09-09 13:09:09 -05:00
parent 864ea8739b
commit 165ccb2d7a
1 changed files with 20 additions and 1 deletions

View File

@ -15,14 +15,33 @@
set -xe
export KUBECONFIG=${KUBECONFIG:-"$HOME/.airship/kubeconfig"}
export TIMEOUT=${TIMEOUT:-60}
NODENAME="node01"
# TODO need to run another config command after use-context to update kubeconfig
echo "Switch context to target cluster and set manifest"
airshipctl config use-context target-cluster-admin@target-cluster
airshipctl config set-context target-cluster-admin@target-cluster --manifest dummy_manifest
end=$(($(date +%s) + $TIMEOUT))
echo "Waiting $TIMEOUT seconds for $NODENAME to be created."
while true; do
if (kubectl --request-timeout 10s --kubeconfig $KUBECONFIG get nodes | grep -q $NODENAME) ; then
echo -e "\n$NODENAME found"
break
else
now=$(date +%s)
if [ $now -gt $end ]; then
echo -e "\n$NODENAME was not ready before TIMEOUT."
exit 1
fi
echo -n .
sleep 10
fi
done
# TODO remove taint
kubectl --kubeconfig $KUBECONFIG taint node node01 node-role.kubernetes.io/master-
kubectl --kubeconfig $KUBECONFIG taint node $NODENAME node-role.kubernetes.io/master-
echo "Deploy infra to cluster"
airshipctl phase apply initinfra --debug --wait-timeout 1000s