(fix) Added retries for genesis deploy

Added retries for rsync the genesis.sh to the genesis node, in case genesis
is rebooted as part of pre-genesis stage. This fix waits and retries until
genesis node reboot is done and is reachable.

Added wait in genesis.sh to wait for genesis node to come up, in case it
was rebooted in the previous stage.

Added retries in shipyard upload configdocs, to handle transient
timeouts.

Change-Id: I538f2c7b1543e6775ad580ccd3dc0b5cc88d68b1
This commit is contained in:
Ahmad Mahmoudi 2020-05-04 17:08:34 +00:00
parent 1678cf635f
commit 7d40d128cd
2 changed files with 35 additions and 8 deletions

View File

@ -18,13 +18,26 @@ set -e
source "${GATE_UTILS}"
# Copies script and virtmgr private key to genesis VM
rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"
# waits for the genesis node to complete reboot, if it is rebooted during
# genesis setup stage.
GENESIS_RSYNC_RETRIES=${GENESIS_RSYNC_RETRIES:-10}
retries=0
while ! rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"; do
if [[ "${retries}" < "${GENESIS_RSYNC_RETRIES}" ]]; then
log "Genesis node is not reachable yet. Retrying in 30 seconds."
retries=$((retries+1))
sleep 30
continue
fi
log_error "Genesis was not reachable after max retries: "${GENESIS_RSYNC_RETRIES}"."
exit 1
done
set -o pipefail
ssh_cmd_raw "${GENESIS_NAME}" "PROMENADE_ENCRYPTION_KEY=${PROMENADE_ENCRYPTION_KEY} /root/airship/genesis.sh" 2>&1 | tee -a "${LOG_FILE}"
set +o pipefail
if ! ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 ; then
if ! ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 ; then
log_warn "Using some non-cached docker images. This will slow testing."
ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}"
ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}"
fi

View File

@ -52,12 +52,26 @@ check_configdocs_result(){
fi
}
CREATE_CONFIGDOCS_RETRIES=${CREATE_CONFIGDOCS_RETRIES:-5}
create_configdocs_design() {
for ((i=0; i<${CREATE_CONFIGDOCS_RETRIES}; i++)); do
log "Creating configdocs, retry $i."
if check_configdocs_result "$(shipyard_cmd create configdocs design --directory="${BUILD_WORK_DIR}/site" --replace)"; then
log "Create confidocs succeeded."
return 0
fi
log "Create configdocs failed, retrying in 30 seconds."
sleep 30
done
log "Create configdocs failed after $i retires."
return 1
}
# Copy site design to genesis node
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/site"
rsync_cmd "${DEFINITION_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/site/"
sleep 120
check_configdocs_result "$(shipyard_cmd create configdocs design "--directory=${BUILD_WORK_DIR}/site" --replace)"
create_configdocs_design
# Skip certs/gate if already part of site manifests
if [[ -n "${USE_EXISTING_SECRETS}" ]]
@ -70,14 +84,14 @@ if [[ "${OMIT_CERTS}" == "0" ]]
then
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/certs"
rsync_cmd "${CERT_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/certs/"
check_configdocs_result "$(shipyard_cmd create configdocs certs "--directory=${BUILD_WORK_DIR}/certs" --append)"
create_configdocs_design
fi
if [[ "${OMIT_GATE}" == "0" ]]
then
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/gate"
rsync_cmd "${GATE_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/gate/"
check_configdocs_result "$(shipyard_cmd create configdocs gate "--directory=${BUILD_WORK_DIR}/gate" --append)"
create_configdocs_design
fi
check_configdocs_result "$(shipyard_cmd commit configdocs)"