From 0521b46bce6775107e978cf66f2fd53268720251 Mon Sep 17 00:00:00 2001 From: Ahmad Mahmoudi Date: Mon, 4 May 2020 17:08:34 +0000 Subject: [PATCH] (fix) Added retries for genesis deploy Added retries for rsync the genesis.sh to the genesis node, in case genesis is rebooted as part of pre-genesis stage. This fix waits and retries until genesis node reboot is done and is reachable. Added wait in genesis.sh to wait for genesis node to come up, in case it was rebooted in the previous stage. Added retries in shipyard upload configdocs, to handle transient timeouts. Change-Id: I538f2c7b1543e6775ad580ccd3dc0b5cc88d68b1 --- .../airship_gate/lib/airship.sh | 1 + .../airship_gate/stages/genesis.sh | 19 ++++++++++++--- .../stages/shipyard-load-design.sh | 23 +++++++++++++++---- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/tools/deployment/seaworthy-virt/airship_gate/lib/airship.sh b/tools/deployment/seaworthy-virt/airship_gate/lib/airship.sh index 26f4dbb78..7b4f1b628 100644 --- a/tools/deployment/seaworthy-virt/airship_gate/lib/airship.sh +++ b/tools/deployment/seaworthy-virt/airship_gate/lib/airship.sh @@ -133,6 +133,7 @@ shipyard_action_wait() { exit 0 fi + log "$(shipyard_cmd describe "${ACTION_ID}")" sleep "${poll_time}" done } diff --git a/tools/deployment/seaworthy-virt/airship_gate/stages/genesis.sh b/tools/deployment/seaworthy-virt/airship_gate/stages/genesis.sh index c78a68924..b08c1595a 100755 --- a/tools/deployment/seaworthy-virt/airship_gate/stages/genesis.sh +++ b/tools/deployment/seaworthy-virt/airship_gate/stages/genesis.sh @@ -18,13 +18,26 @@ set -e source "${GATE_UTILS}" # Copies script and virtmgr private key to genesis VM -rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/" +# waits for the genesis node to complete reboot, if it is rebooted during +# genesis setup stage. +GENESIS_RSYNC_RETRIES=${GENESIS_RSYNC_RETRIES:-10} +retries=0 +while ! rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"; do + if [[ "${retries}" < "${GENESIS_RSYNC_RETRIES}" ]]; then + log "Genesis node is not reachable yet. Retrying in 30 seconds." + retries=$((retries+1)) + sleep 30 + continue + fi + log_error "Genesis was not reachable after max retries: "${GENESIS_RSYNC_RETRIES}"." + exit 1 +done set -o pipefail ssh_cmd_raw "${GENESIS_NAME}" "PROMENADE_ENCRYPTION_KEY=${PROMENADE_ENCRYPTION_KEY} /root/airship/genesis.sh" 2>&1 | tee -a "${LOG_FILE}" set +o pipefail -if ! ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 ; then +if ! ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 ; then log_warn "Using some non-cached docker images. This will slow testing." - ssh_cmd n0 docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}" + ssh_cmd "${GENESIS_NAME}" docker images | tail -n +2 | grep -v registry:5000 | tee -a "${LOG_FILE}" fi diff --git a/tools/deployment/seaworthy-virt/airship_gate/stages/shipyard-load-design.sh b/tools/deployment/seaworthy-virt/airship_gate/stages/shipyard-load-design.sh index f957a010a..6ddd65ece 100755 --- a/tools/deployment/seaworthy-virt/airship_gate/stages/shipyard-load-design.sh +++ b/tools/deployment/seaworthy-virt/airship_gate/stages/shipyard-load-design.sh @@ -52,12 +52,25 @@ check_configdocs_result(){ fi } +CREATE_CONFIGDOCS_RETRIES=${CREATE_CONFIGDOCS_RETRIES:-5} + +create_configdocs_design() { + for ((i=0; i<${CREATE_CONFIGDOCS_RETRIES}; i++)); do + if check_configdocs_result "$(shipyard_cmd create configdocs "$@")"; then + log "Create confidocs succeeded." + return 0 + fi + log "Failed on atemp $i, retrying ..." + sleep 30 + done + log "Create configdocs failed after $i retries." + return 1 +} + # Copy site design to genesis node ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/site" rsync_cmd "${DEFINITION_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/site/" - -sleep 120 -check_configdocs_result "$(shipyard_cmd create configdocs design "--directory=${BUILD_WORK_DIR}/site" --replace)" +create_configdocs_design design --directory="${BUILD_WORK_DIR}/site" --replace # Skip certs/gate if already part of site manifests if [[ -n "${USE_EXISTING_SECRETS}" ]] @@ -70,14 +83,14 @@ if [[ "${OMIT_CERTS}" == "0" ]] then ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/certs" rsync_cmd "${CERT_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/certs/" - check_configdocs_result "$(shipyard_cmd create configdocs certs "--directory=${BUILD_WORK_DIR}/certs" --append)" + create_configdocs_design certs --directory="${BUILD_WORK_DIR}/certs" --append fi if [[ "${OMIT_GATE}" == "0" ]] then ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/gate" rsync_cmd "${GATE_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/gate/" - check_configdocs_result "$(shipyard_cmd create configdocs gate "--directory=${BUILD_WORK_DIR}/gate" --append)" + create_configdocs_design gate --directory="${BUILD_WORK_DIR}/gate" --append fi check_configdocs_result "$(shipyard_cmd commit configdocs)"