(fix) Check sync of only active rack controllers

- The import resources job waits for rack controllers to sync
  the imported resources. In an environment that rack pods have
  been scheduled away from a node, it leaves a dead rack controller
  in the MAAS database. We cannot gate on dead controllers syncing
  as rackd is no longer running, so now only look at rack controllers
  with a running rackd.

Change-Id: I5ca16a0c97ed201a08844ca7c82c2cbb7d059aa7
This commit is contained in:
Hussey, Scott (sh8121) 2019-06-07 08:55:50 -05:00
parent 0b95ba4d41
commit c84a5b64de
1 changed files with 110 additions and 61 deletions

View File

@ -22,83 +22,121 @@ TRY_LIMIT=${TRY_LIMIT:-1}
JOB_TIMEOUT=${JOB_TIMEOUT:-900}
RETRY_TIMER=${RETRY_TIMER:-30}
function start_import {
check_for_download
function timer {
retry_wait=$1
shift
if [[ $? -eq 0 ]]
while [[ ${JOB_TIMEOUT} -gt 0 ]]
do
"$@"
rc=$?
if [ $rc -eq 0 ]
then
echo "Already have images, skipping import."
return 0
return $rc
else
JOB_TIMEOUT=$(($JOB_TIMEOUT - $retry_wait))
sleep $retry_wait
fi
done
while [[ ${import_tries} -lt $TRY_LIMIT ]]
do
import_tries=$(($import_tries + 1))
echo "Starting image import try ${import_tries}..."
maas ${ADMIN_USERNAME} boot-resources import
sleep 30 # Seems MAAS needs time to sync up
check_for_download
if [[ $? -eq 0 ]]
then
echo "Image import success!"
return 0
fi
done
return 1
return 124
}
function import_resources {
check_for_download
rc=$?
if [ $rc -ne 0 ]
then
echo "Starting image import try ${import_tries}..."
maas ${ADMIN_USERNAME} boot-resources import
sleep 30
check_for_download
rc=$?
fi
return $rc
}
function start_import {
timer "$RETRY_TIMER" import_resources
}
function check_for_download {
while [[ ${JOB_TIMEOUT} -gt 0 ]]; do
if maas ${ADMIN_USERNAME} boot-resources is-importing | grep -q 'true';
then
echo -e '\nBoot resources currently importing\n'
let JOB_TIMEOUT-=${RETRY_TIMER}
sleep ${RETRY_TIMER}
else
synced_imgs=$(maas ${ADMIN_USERNAME} boot-resources read | tail -n +1 | jq ".[] | select( .type | contains(\"Synced\")) | .name " | grep -c $MAAS_DEFAULT_DISTRO)
if [[ $synced_imgs -gt 0 ]]
then
echo 'Boot resources have completed importing'
return 0
else
echo 'Import failed!'
return 1
fi
fi
done
echo "Timeout waiting for import!"
if maas ${ADMIN_USERNAME} boot-resources is-importing | grep -q 'true';
then
echo -e '\nBoot resources currently importing\n'
return 1
else
synced_imgs=$(maas ${ADMIN_USERNAME} boot-resources read | tail -n +1 | jq ".[] | select( .type | contains(\"Synced\")) | .name " | grep -c $MAAS_DEFAULT_DISTRO)
if [[ $synced_imgs -gt 0 ]]
then
echo 'Boot resources have completed importing'
return 0
else
echo 'Import failed!'
return 1
fi
fi
}
function check_then_set_single {
option="$1"
value="$2"
cur_val=$(maas ${ADMIN_USERNAME} maas get-config name=${option} | tail -1 | tr -d '"')
desired_val=$(echo ${value} | tr -d '"')
if [[ $cur_val != $desired_val ]]
then
echo "Setting MAAS option ${option} to ${desired_val}"
maas ${ADMIN_USERNAME} maas set-config name=${option} value=${desired_val}
return $?
else
echo "MAAS option ${option} already set to ${cur_val}"
return 0
fi
}
function check_then_set {
option=$1
value=$2
while [[ ${JOB_TIMEOUT} -gt 0 ]]
do
cur_val=$(maas ${ADMIN_USERNAME} maas get-config name=${option} | tail -1 | tr -d '"')
desired_val=$(echo ${value} | tr -d '"')
timer "$RETRY_TIMER" check_then_set_single "$option" "$value"
}
if [[ $cur_val != $desired_val ]]
then
echo "Setting MAAS option ${option} to ${desired_val}"
maas ${ADMIN_USERNAME} maas set-config name=${option} value=${desired_val}
if [[ $? -gt 0 ]]
then
let JOB_TIMEOUT-=${RETRY_TIMER}
sleep ${RETRY_TIMER}
else
return $?
fi
else
echo "MAAS option ${option} already set to ${cur_val}"
return 0
fi
# Get rack controllers reporting a healthy rackd
function get_active_rack_controllers {
maas ${ADMIN_USERNAME} rack-controllers read | jq -r 'map({"system_id":.system_id,"service_set":(.service_set[] | select(.name=="rackd"))}) | map(select(.service_set.status == "running")) | .[] | .system_id'
}
function check_for_rack_sync_single {
sync_list=""
rack_list=$(get_active_rack_controllers)
for rack_id in ${rack_list}
do
selected_imgs=$(maas ${ADMIN_USERNAME} rack-controller list-boot-images ${rack_id} | tail -n +1 | jq ".images[] | select( .name | contains(\"${MAAS_DEFAULT_DISTRO}\")) | .name")
synced_ctlr=$(maas ${ADMIN_USERNAME} rack-controller list-boot-images ${rack_id} | tail -n +1 | jq '.status == "synced"')
if [[ $synced_ctlr == "true" && ! -z ${selected_imgs} ]]
then
sync_list=$(echo -e "${sync_list}\n${rack_id}" | sort | uniq)
else
maas ${ADMIN_USERNAME} rack-controller import-boot-images ${rack_id}
fi
if [[ $(echo -e "${rack_list}" | sort | uniq | grep -v '^$' ) == $(echo -e "${sync_list}" | sort | uniq | grep -v '^$') ]]
then
return 0
fi
done
return 1
}
function check_for_rack_sync {
timer "$RETRY_TIMER" check_for_rack_sync_single
}
function configure_proxy {
check_then_set enable_http_proxy ${MAAS_PROXY_ENABLED}
check_then_set use_peer_proxy ${MAAS_PEER_PROXY_ENABLED}
@ -117,8 +155,9 @@ function configure_dns {
}
function configure_images {
check_for_rack_sync
if [[ $? -eq 1 ]]
if [[ $? -eq 124 ]]
then
echo "Timed out waiting for rack controller sync."
return 1
@ -147,8 +186,17 @@ function configure_boot_sources {
fi
}
KEY=$(maas-region apikey --username=${ADMIN_USERNAME})
maas login ${ADMIN_USERNAME} ${MAAS_ENDPOINT} $KEY
function maas_login {
KEY=$(maas-region apikey --username=${ADMIN_USERNAME})
if [ -z "$KEY" ]
then
return 1
fi
maas login ${ADMIN_USERNAME} ${MAAS_ENDPOINT} $KEY
return $?
}
timer "$RETRY_TIMER" maas_login
configure_proxy
configure_ntp
@ -157,6 +205,7 @@ configure_dns
# make call to import images
configure_boot_sources
start_import
if [[ $? -eq 0 ]]
then
configure_images