(fix) Added retries for genesis deploy
Added retries for rsync the genesis.sh to the genesis node, in case genesis is rebooted as part of pre-genesis stage. This fix waits and retries until genesis node reboot is done and is reachable. Added wait in genesis.sh to wait for genesis node to come up, in case it was rebooted in the previous stage. Added retries in shipyard upload configdocs, to handle transient timeouts. Change-Id: I538f2c7b1543e6775ad580ccd3dc0b5cc88d68b1
This commit is contained in:
parent
1678cf635f
commit
7d40d128cd
|
@ -18,13 +18,26 @@ set -e
|
|||
source "${GATE_UTILS}"
|
||||
|
||||
# Copies script and virtmgr private key to genesis VM
|
||||
rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"
|
||||
# waits for the genesis node to complete reboot, if it is rebooted during
|
||||
# genesis setup stage.
|
||||
GENESIS_RSYNC_RETRIES=${GENESIS_RSYNC_RETRIES:-10}
|
||||
retries=0
|
||||
while ! rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"; do
|
||||
if [[ "${retries}" < "${GENESIS_RSYNC_RETRIES}" ]]; then
|
||||
log "Genesis node is not reachable yet. Retrying in 30 seconds."
|
||||
retries=$((retries+1))
|
||||
sleep 30
|
||||
continue
|
||||
fi
|
||||
log_error "Genesis was not reachable after max retries: "${GENESIS_RSYNC_RETRIES}"."
|
||||
exit 1
|
||||
done
|
||||
|
||||
set -o pipefail
|
||||
ssh_cmd_raw "${GENESIS_NAME}" "PROMENADE_ENCRYPTION_KEY=${PROMENADE_ENCRYPTION_KEY} /root/airship/genesis.sh" 2>&1 | tee -a "${LOG_FILE}"
|
||||
set +o pipefail
|
||||
|
||||
if ! ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 ; then
|
||||
if ! ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 ; then
|
||||
log_warn "Using some non-cached docker images. This will slow testing."
|
||||
ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}"
|
||||
ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}"
|
||||
fi
|
||||
|
|
|
@ -52,12 +52,26 @@ check_configdocs_result(){
|
|||
fi
|
||||
}
|
||||
|
||||
CREATE_CONFIGDOCS_RETRIES=${CREATE_CONFIGDOCS_RETRIES:-5}
|
||||
|
||||
create_configdocs_design() {
|
||||
for ((i=0; i<${CREATE_CONFIGDOCS_RETRIES}; i++)); do
|
||||
log "Creating configdocs, retry $i."
|
||||
if check_configdocs_result "$(shipyard_cmd create configdocs design --directory="${BUILD_WORK_DIR}/site" --replace)"; then
|
||||
log "Create confidocs succeeded."
|
||||
return 0
|
||||
fi
|
||||
log "Create configdocs failed, retrying in 30 seconds."
|
||||
sleep 30
|
||||
done
|
||||
log "Create configdocs failed after $i retires."
|
||||
return 1
|
||||
}
|
||||
|
||||
# Copy site design to genesis node
|
||||
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/site"
|
||||
rsync_cmd "${DEFINITION_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/site/"
|
||||
|
||||
sleep 120
|
||||
check_configdocs_result "$(shipyard_cmd create configdocs design "--directory=${BUILD_WORK_DIR}/site" --replace)"
|
||||
create_configdocs_design
|
||||
|
||||
# Skip certs/gate if already part of site manifests
|
||||
if [[ -n "${USE_EXISTING_SECRETS}" ]]
|
||||
|
@ -70,14 +84,14 @@ if [[ "${OMIT_CERTS}" == "0" ]]
|
|||
then
|
||||
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/certs"
|
||||
rsync_cmd "${CERT_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/certs/"
|
||||
check_configdocs_result "$(shipyard_cmd create configdocs certs "--directory=${BUILD_WORK_DIR}/certs" --append)"
|
||||
create_configdocs_design
|
||||
fi
|
||||
|
||||
if [[ "${OMIT_GATE}" == "0" ]]
|
||||
then
|
||||
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/gate"
|
||||
rsync_cmd "${GATE_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/gate/"
|
||||
check_configdocs_result "$(shipyard_cmd create configdocs gate "--directory=${BUILD_WORK_DIR}/gate" --append)"
|
||||
create_configdocs_design
|
||||
fi
|
||||
|
||||
check_configdocs_result "$(shipyard_cmd commit configdocs)"
|
||||
|
|
Loading…
Reference in New Issue