From fc06a7debac60e5066aa31bbfccef573618930a1 Mon Sep 17 00:00:00 2001 From: Sergii Golovatiuk Date: Fri, 16 Feb 2018 16:20:14 +0000 Subject: [PATCH] Introduce wait_backoff, fail_if_empty wrappers * wait_backoff wrapper retries the command several time increasing backoff timeout exponentially. Its log is added to to stderr. * Under some circumstances (race condition), stack may be created but it's resources are not. fail_if_empty wrapper fails if no output received. * Some openstack commands were wrapped as there is a delay between stack and resource creations. Usually it's not visible on speedy environments. However, when undercloud is located on small VM or cloud flavour is m1.medium then a race condition may occur as heat creates stack and its resources sequentially. This means there may be cases when stack is created but its resources are not. Wrapping openstack calls with wait_backoff and fail_if_empty guarantees output with proper exit code Change-Id: If10f4d58f3689bc7d817c8ac2c31e48501e937ce --- deployed-server/scripts/get-occ-config.sh | 112 +++++++++++++++------- 1 file changed, 78 insertions(+), 34 deletions(-) diff --git a/deployed-server/scripts/get-occ-config.sh b/deployed-server/scripts/get-occ-config.sh index 2aa60d1d93..a405f4493f 100755 --- a/deployed-server/scripts/get-occ-config.sh +++ b/deployed-server/scripts/get-occ-config.sh @@ -2,7 +2,7 @@ set -eux -SLEEP_TIME=5 +SLEEP_TIME=2 CONTROLLER_HOSTS=${CONTROLLER_HOSTS:-""} COMPUTE_HOSTS=${COMPUTE_HOSTS:-""} @@ -22,46 +22,92 @@ BlockStorage_hosts=${BlockStorage_hosts:-"$BLOCKSTORAGE_HOSTS"} ObjectStorage_hosts=${ObjectStorage_hosts:-"$OBJECTSTORAGE_HOSTS"} CephStorage_hosts=${CephStorage_hosts:-"$CEPHSTORAGE_HOSTS"} +####################################### +# Retry with backoff interval +####################################### +function with_backoff { + local max_attempts=${ATTEMPTS:-10} + local sleep_timeout=${SLEEP_TIME:-2} + local attempt=0 + local rc=0 + + while [ ${attempt} -lt ${max_attempts} ]; do + set +e + set -o pipefail + "$@" + rc=$? + set +o pipefail + set -e + + if [ ${rc} -eq 0 ]; then + break + fi + echo "Warning! Retrying in ${sleep_timeout} seconds ..." 1>&2 + sleep ${sleep_timeout} + attempt=$(( attempt + 1 )) + sleep_timeout=$(( sleep_timeout * 2 )) + done + + if [ ${rc} -ne 0 ]; then + echo "Warning! Return code is not 0 on the last try for ($@)" 1>&2 + fi + + return ${rc} +} + +####################################### +# Return 1 if empty output received +####################################### +function fail_if_empty { + local output="$(${@})" + if [ -z "${output}" ]; then + echo "Warning! Empty output for ($@)" 1>&2 + return 1 + else + echo "${output}" + fi +} + +function check_stack { + local stack_to_check=${1:-""} + local rc=0 + + if [ -z "${stack_to_check}" ]; then + echo No Stacks received. + return 1 + fi + + with_backoff openstack stack resource list $stack_to_check + rc=${?} + + if [ ${rc} -ne 0 ]; then + echo Stack ${stack_to_check} not yet created + fi + + return ${rc} +} + # Set the _hosts_a vars for each role defined for role in $OVERCLOUD_ROLES; do eval "hosts=\${${role}_hosts}" read -a ${role}_hosts_a <<< $hosts done -function check_stack { - local stack_to_check=${1:-""} - - if [ "$stack_to_check" = "" ]; then - echo Stack not created - return 1 - fi - - echo Checking if $1 stack is created - set +e - openstack stack resource list $stack_to_check - rc=$? - set -e - - if [ ! "$rc" = "0" ]; then - echo Stack $1 not yet created - fi - - return $rc -} - - for role in $OVERCLOUD_ROLES; do while ! check_stack $STACK_NAME; do sleep $SLEEP_TIME done - rg_stack=$(openstack stack resource show $STACK_NAME $role -c physical_resource_id -f value) + rg_stack=$(with_backoff fail_if_empty openstack stack resource show $STACK_NAME $role -c physical_resource_id -f value) while ! check_stack $rg_stack; do - sleep $SLEEP_TIME - rg_stack=$(openstack stack resource show $STACK_NAME $role -c physical_resource_id -f value) + rg_stack=$(with_backoff fail_if_empty openstack stack resource show $STACK_NAME $role -c physical_resource_id -f value) done - stacks=$(openstack stack resource list $rg_stack -c resource_name -c physical_resource_id -f json | jq -r "sort_by(.resource_name) | .[] | .physical_resource_id") + stacks=$(with_backoff fail_if_empty openstack stack resource list $rg_stack -c resource_name -c physical_resource_id -f json | jq -r "sort_by(.resource_name) | .[] | .physical_resource_id") + rc=${?} + while [ ${rc} -ne 0 ]; do + stacks=$(with_backoff fail_if_empty openstack stack resource list $rg_stack -c resource_name -c physical_resource_id -f json | jq -r "sort_by(.resource_name) | .[] | .physical_resource_id") + done i=0 @@ -71,19 +117,17 @@ for role in $OVERCLOUD_ROLES; do server_resource_name="NovaCompute" fi - server_stack=$(openstack stack resource show $stack $server_resource_name -c physical_resource_id -f value) + server_stack=$(with_backoff fail_if_empty openstack stack resource show $stack $server_resource_name -c physical_resource_id -f value) while ! check_stack $server_stack; do - sleep $SLEEP_TIME - server_stack=$(openstack stack resource show $stack $server_resource_name -c physical_resource_id -f value) + server_stack=$(with_backoff fail_if_empty openstack stack resource show $stack $server_resource_name -c physical_resource_id -f value) done while true; do - deployed_server_metadata_url=$(openstack stack resource metadata $server_stack deployed-server | jq -r '.["os-collect-config"].request.metadata_url') - if [ "$deployed_server_metadata_url" = "null" ]; then - continue - else + deployed_server_metadata_url=$(with_backoff openstack stack resource metadata $server_stack deployed-server | jq -r '.["os-collect-config"].request.metadata_url') + if [ "$deployed_server_metadata_url" != "null" ]; then break fi + sleep $SLEEP_TIME done echo "======================"