(fix) Added retries for genesis deploy

Added retries for rsync the genesis.sh to the genesis node, in case genesis
is rebooted as part of pre-genesis stage. This fix waits and retries until
genesis node reboot is done and is reachable.

Added wait in genesis.sh to wait for genesis node to come up, in case it
was rebooted in the previous stage.

Added retries in shipyard upload configdocs, to handle transient
timeouts.

Change-Id: I538f2c7b1543e6775ad580ccd3dc0b5cc88d68b1
This commit is contained in:
Ahmad Mahmoudi 2020-05-04 17:08:34 +00:00
parent 1678cf635f
commit 82d73de598
2 changed files with 47 additions and 8 deletions

View File

@ -18,13 +18,40 @@ set -e
source "${GATE_UTILS}"
# Copies script and virtmgr private key to genesis VM
rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"
# waits for the genesis node to complete reboot, if it is rebooted during
# genesis setup stage.
GENESIS_RSYNC_RETRIES=${GENESIS_RSYNC_RETRIES:-10}
retries=0
while ! rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"; do
if [[ "${retries}" < "${GENESIS_RSYNC_RETRIES}" ]]; then
log "Genesis node is not reachable yet. Retrying in 30 seconds."
retries=$((retries+1))
sleep 30
continue
fi
log_error "Genesis was not reachable after max retries: "${GENESIS_RSYNC_RETRIES}"."
exit 1
done
# wait for the mini-mirror container to be started and in running state
MM_RUNNING_RETRIES=${MM_RUNNING_RETRIES:-10}
until [[ $(ssh_cmd "${GENESIS_NAME}" docker ps -aq -f "name=mini-mirror" -f "status=running") ]]; do
if [[ "${retries}" < "MM_RUNNING_RETRIES" ]]; then
log_warn "Mini-mirror container is not running yet. Retrying in 30 seconds."
retries=$((retries+1))
sleep 30
continue
fi
log_error "Mini-mirror is not running after ${retries} retries."
exit 1
done
set -o pipefail
ssh_cmd_raw "${GENESIS_NAME}" "PROMENADE_ENCRYPTION_KEY=${PROMENADE_ENCRYPTION_KEY} /root/airship/genesis.sh" 2>&1 | tee -a "${LOG_FILE}"
set +o pipefail
if ! ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 ; then
if ! ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 ; then
log_warn "Using some non-cached docker images. This will slow testing."
ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}"
ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}"
fi

View File

@ -52,12 +52,24 @@ check_configdocs_result(){
fi
}
CREATE_CONFIGDOCS_RETRIES=${CREATE_CONFIGDOCS_RETRIES:-5}
create_configdocs_design() {
for ((i=0; i<${CREATE_CONFIGDOCS_RETRIES}; i++)); do
log "Creating configdocs, retry $i."
if check_configdocs_result "$(shipyard_cmd create configdocs design --directory="${BUILD_WORK_DIR}/site" --replace)"; then
log "Create confidocs succeeded."
break
fi
log "Create configdocs failed, retrying in 30 seconds."
sleep 30
done
}
# Copy site design to genesis node
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/site"
rsync_cmd "${DEFINITION_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/site/"
sleep 120
check_configdocs_result "$(shipyard_cmd create configdocs design "--directory=${BUILD_WORK_DIR}/site" --replace)"
create_configdocs_design
# Skip certs/gate if already part of site manifests
if [[ -n "${USE_EXISTING_SECRETS}" ]]
@ -70,14 +82,14 @@ if [[ "${OMIT_CERTS}" == "0" ]]
then
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/certs"
rsync_cmd "${CERT_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/certs/"
check_configdocs_result "$(shipyard_cmd create configdocs certs "--directory=${BUILD_WORK_DIR}/certs" --append)"
create_configdocs_design
fi
if [[ "${OMIT_GATE}" == "0" ]]
then
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/gate"
rsync_cmd "${GATE_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/gate/"
check_configdocs_result "$(shipyard_cmd create configdocs gate "--directory=${BUILD_WORK_DIR}/gate" --append)"
create_configdocs_design
fi
check_configdocs_result "$(shipyard_cmd commit configdocs)"