(fix) Added retries for genesis deploy

Added retries for rsync the genesis.sh to the genesis node, in case genesis
is rebooted as part of pre-genesis stage. This fix waits and retries until
genesis node reboot is done and is reachable.

Added wait in genesis.sh to wait for genesis node to come up, in case it
was rebooted in the previous stage.

Added retries in shipyard upload configdocs, to handle transient
timeouts.

Change-Id: I538f2c7b1543e6775ad580ccd3dc0b5cc88d68b1
This commit is contained in:
Ahmad Mahmoudi 2020-05-04 17:08:34 +00:00
parent 868529d9fc
commit 0521b46bce
3 changed files with 35 additions and 8 deletions

View File

@ -133,6 +133,7 @@ shipyard_action_wait() {
exit 0
fi
log "$(shipyard_cmd describe "${ACTION_ID}")"
sleep "${poll_time}"
done
}

View File

@ -18,13 +18,26 @@ set -e
source "${GATE_UTILS}"
# Copies script and virtmgr private key to genesis VM
rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"
# waits for the genesis node to complete reboot, if it is rebooted during
# genesis setup stage.
GENESIS_RSYNC_RETRIES=${GENESIS_RSYNC_RETRIES:-10}
retries=0
while ! rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"; do
if [[ "${retries}" < "${GENESIS_RSYNC_RETRIES}" ]]; then
log "Genesis node is not reachable yet. Retrying in 30 seconds."
retries=$((retries+1))
sleep 30
continue
fi
log_error "Genesis was not reachable after max retries: "${GENESIS_RSYNC_RETRIES}"."
exit 1
done
set -o pipefail
ssh_cmd_raw "${GENESIS_NAME}" "PROMENADE_ENCRYPTION_KEY=${PROMENADE_ENCRYPTION_KEY} /root/airship/genesis.sh" 2>&1 | tee -a "${LOG_FILE}"
set +o pipefail
if ! ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 ; then
if ! ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 ; then
log_warn "Using some non-cached docker images. This will slow testing."
ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}"
ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}"
fi

View File

@ -52,12 +52,25 @@ check_configdocs_result(){
fi
}
CREATE_CONFIGDOCS_RETRIES=${CREATE_CONFIGDOCS_RETRIES:-5}
create_configdocs_design() {
for ((i=0; i<${CREATE_CONFIGDOCS_RETRIES}; i++)); do
if check_configdocs_result "$(shipyard_cmd create configdocs "$@")"; then
log "Create confidocs succeeded."
return 0
fi
log "Failed on atemp $i, retrying ..."
sleep 30
done
log "Create configdocs failed after $i retries."
return 1
}
# Copy site design to genesis node
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/site"
rsync_cmd "${DEFINITION_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/site/"
sleep 120
check_configdocs_result "$(shipyard_cmd create configdocs design "--directory=${BUILD_WORK_DIR}/site" --replace)"
create_configdocs_design design --directory="${BUILD_WORK_DIR}/site" --replace
# Skip certs/gate if already part of site manifests
if [[ -n "${USE_EXISTING_SECRETS}" ]]
@ -70,14 +83,14 @@ if [[ "${OMIT_CERTS}" == "0" ]]
then
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/certs"
rsync_cmd "${CERT_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/certs/"
check_configdocs_result "$(shipyard_cmd create configdocs certs "--directory=${BUILD_WORK_DIR}/certs" --append)"
create_configdocs_design certs --directory="${BUILD_WORK_DIR}/certs" --append
fi
if [[ "${OMIT_GATE}" == "0" ]]
then
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/gate"
rsync_cmd "${GATE_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/gate/"
check_configdocs_result "$(shipyard_cmd create configdocs gate "--directory=${BUILD_WORK_DIR}/gate" --append)"
create_configdocs_design gate --directory="${BUILD_WORK_DIR}/gate" --append
fi
check_configdocs_result "$(shipyard_cmd commit configdocs)"