Browse Source

(fix) Added retries for genesis deploy

Added retries for rsync the genesis.sh to the genesis node, in case genesis
is rebooted as part of pre-genesis stage. This fix waits and retries until
genesis node reboot is done and is reachable.

Added wait in genesis.sh to wait for genesis node to come up, in case it
was rebooted in the previous stage.

Added retries in shipyard upload configdocs, to handle transient
timeouts.

Change-Id: I538f2c7b1543e6775ad580ccd3dc0b5cc88d68b1
changes/07/725707/4
Ahmad Mahmoudi 2 months ago
parent
commit
308bc7ccf4
2 changed files with 38 additions and 9 deletions
  1. +12
    -1
      tools/deployment/seaworthy-virt/airship_gate/stages/genesis.sh
  2. +26
    -8
      tools/deployment/seaworthy-virt/airship_gate/stages/shipyard-load-design.sh

+ 12
- 1
tools/deployment/seaworthy-virt/airship_gate/stages/genesis.sh View File

@@ -18,7 +18,18 @@ set -e
source "${GATE_UTILS}"

# Copies script and virtmgr private key to genesis VM
rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"
# waits for the genesis node to complete reboot, if it is rebooted during
# genesis setup stage.
retries=${GENESIS_RSYNC_RETRIES:-10}
while ! rsync_cmd "${SCRIPT_DEPOT}/genesis.sh" "${GENESIS_NAME}:/root/airship/"; do
retries=$((retries-1))
if [[ "${reties}" == "0" ]]; then
log_error "Genesis is not reachable after ${retries} retries."
break
fi
log "Genesis node is not reachable yet. Retrying in 30 seconds."
sleep 30
done

set -o pipefail
ssh_cmd_raw "${GENESIS_NAME}" "PROMENADE_ENCRYPTION_KEY=${PROMENADE_ENCRYPTION_KEY} /root/airship/genesis.sh" 2>&1 | tee -a "${LOG_FILE}"


+ 26
- 8
tools/deployment/seaworthy-virt/airship_gate/stages/shipyard-load-design.sh View File

@@ -52,12 +52,26 @@ check_configdocs_result(){
fi
}

CREATE_CONFIGDOCS_RETRIES=${CREATE_CONFIGDOCS_RETRIES:-5}

create_configdocs_design() {
for ((i=0; i<${CREATE_CONFIGDOCS_RETRIES}; i++)); do
log "Creating configdocs, retry $i."
if check_configdocs_result "$(shipyard_cmd create configdocs design --directory="${BUILD_WORK_DIR}/site" --replace)"; then
log "Create confidocs succeeded."
break
fi
log "Create configdocs failed, retrying in 5 seconds."
sleep 5
done
}

# Copy site design to genesis node
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/site"
rsync_cmd "${DEFINITION_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/site/"

sleep 120
check_configdocs_result "$(shipyard_cmd create configdocs design "--directory=${BUILD_WORK_DIR}/site" --replace)"
rsync_cmd "${DEFINITION_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/site/" &
rsync_proc_id=$!
wait $rsync_pro_id
create_configdocs_design

# Skip certs/gate if already part of site manifests
if [[ -n "${USE_EXISTING_SECRETS}" ]]
@@ -69,15 +83,19 @@ fi
if [[ "${OMIT_CERTS}" == "0" ]]
then
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/certs"
rsync_cmd "${CERT_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/certs/"
check_configdocs_result "$(shipyard_cmd create configdocs certs "--directory=${BUILD_WORK_DIR}/certs" --append)"
rsync_cmd "${CERT_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/certs/" &
rsync_proc_id=$!
wait $rsync_proc_i
create_configdocs_design
fi

if [[ "${OMIT_GATE}" == "0" ]]
then
ssh_cmd "${BUILD_NAME}" mkdir -p "${BUILD_WORK_DIR}/gate"
rsync_cmd "${GATE_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/gate/"
check_configdocs_result "$(shipyard_cmd create configdocs gate "--directory=${BUILD_WORK_DIR}/gate" --append)"
rsync_cmd "${GATE_DEPOT}"/*.yaml "${BUILD_NAME}:${BUILD_WORK_DIR}/gate/" &
rsync_proc_id=$!
wait $rsynch_proc_id
create_configdocs_design
fi

check_configdocs_result "$(shipyard_cmd commit configdocs)"

Loading…
Cancel
Save