9507d97d2a
Download_mirror.sh takes 15 hours to download all the rpms and tarballs required to build StarlingX into a fresh workspace. It should be much faster than that. Replace the current serial download algorithm with one that is parallel. I'll cap it at 8 parallel downloads for now. I'm a little worried about overwelming CENGN. This is sufficient to drop download times from 15 to 3 hours for a fresh workspace, and 30 min to 5 min to refresh an existing workspace. Closes-Bug: 1918477 Signed-off-by: Scott Little <scott.little@windriver.com> Change-Id: I469b4fee3cb304fe2984aa697ce2dc6cec52e79e
455 lines
12 KiB
Bash
Executable File
455 lines
12 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# download RPMs/SRPMs from different sources.
|
|
# this script was originated by Brian Avery, and later updated by Yong Hu
|
|
|
|
# set -o errexit
|
|
# set -o nounset
|
|
|
|
# By default, we use "sudo" and we don't use a local yum.conf. These can
|
|
# be overridden via flags.
|
|
|
|
SUDOCMD="sudo -E"
|
|
RELEASEVER="--releasever=7"
|
|
YUMCONFOPT=""
|
|
|
|
DL_RPMS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
|
|
|
|
source $DL_RPMS_DIR/utils.sh
|
|
|
|
usage() {
|
|
echo "$0 [-n] [-c <yum.conf>] [-s|-S|-u|-U] [-x] <rpms_list> <match_level> "
|
|
echo ""
|
|
echo "Options:"
|
|
echo " -n: Do not use sudo when performing operations"
|
|
echo " -c: Use an alternate yum.conf rather than the system file"
|
|
echo " -x: Clean log files only, do not run."
|
|
echo " rpm_list: a list of RPM files to be downloaded."
|
|
echo " match_level: value could be L1, L2 or L3:"
|
|
echo " L1: use name, major version and minor version:"
|
|
echo " vim-7.4.160-2.el7 to search vim-7.4.160-2.el7.src.rpm"
|
|
echo " L2: use name and major version:"
|
|
echo " using vim-7.4.160 to search vim-7.4.160-2.el7.src.rpm"
|
|
echo " L3: use name:"
|
|
echo " using vim to search vim-7.4.160-2.el7.src.rpm"
|
|
echo " K1: Use Koji rather than yum repos as a source."
|
|
echo " Koji has a longer retention period than epel mirrors."
|
|
echo ""
|
|
echo " Download Source Options: Only select one of these."
|
|
echo " -s: Download from StarlingX mirror only"
|
|
echo " -S: Download from StarlingX mirror, upstream as backup (default)"
|
|
echo " -u: Download from original upstream sources only"
|
|
echo " -U: Download from original upstream sources, StarlingX mirror as backup"
|
|
echo ""
|
|
echo "Returns: 0 = All files downloaded successfully"
|
|
echo " 1 = Some files could not be downloaded"
|
|
echo " 2 = Bad arguements or other error"
|
|
echo ""
|
|
}
|
|
|
|
|
|
CLEAN_LOGS_ONLY=0
|
|
dl_rc=0
|
|
|
|
# Permitted values of dl_source
|
|
dl_from_stx_mirror="stx_mirror"
|
|
dl_from_upstream="upstream"
|
|
dl_from_stx_then_upstream="$dl_from_stx_mirror $dl_from_upstream"
|
|
dl_from_upstream_then_stx="$dl_from_upstream $dl_from_stx_mirror"
|
|
|
|
# Download from what source?
|
|
# dl_from_stx_mirror = StarlingX mirror only
|
|
# dl_from_upstream = Original upstream source only
|
|
# dl_from_stx_then_upstream = Either source, STX prefered (default)"
|
|
# dl_from_upstream_then_stx = Either source, UPSTREAM prefered"
|
|
dl_source="$dl_from_stx_then_upstream"
|
|
dl_flag=""
|
|
|
|
distro="centos"
|
|
|
|
MULTIPLE_DL_FLAG_ERROR_MSG="Error: Please use only one of: -s,-S,-u,-U"
|
|
|
|
multiple_dl_flag_check () {
|
|
if [ "$dl_flag" != "" ]; then
|
|
echo "$MULTIPLE_DL_FLAG_ERROR_MSG"
|
|
usage
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Parse option flags
|
|
while getopts "c:nxD:sSuUh" o; do
|
|
case "${o}" in
|
|
n)
|
|
# No-sudo
|
|
SUDOCMD=""
|
|
;;
|
|
x)
|
|
# Clean only
|
|
CLEAN_LOGS_ONLY=1
|
|
;;
|
|
c)
|
|
# Use an alternate yum.conf
|
|
YUMCONFOPT="-c $OPTARG"
|
|
grep -q "releasever=" $OPTARG && RELEASEVER="--$(grep releasever= ${OPTARG})"
|
|
;;
|
|
D)
|
|
distro="${OPTARG}"
|
|
;;
|
|
|
|
s)
|
|
# Download from StarlingX mirror only. Do not use upstream sources.
|
|
multiple_dl_flag_check
|
|
dl_source="$dl_from_stx_mirror"
|
|
dl_flag="-s"
|
|
;;
|
|
S)
|
|
# Download from StarlingX mirror first, only use upstream source as a fallback.
|
|
multiple_dl_flag_check
|
|
dl_source="$dl_from_stx_then_upstream"
|
|
dl_flag="-S"
|
|
;;
|
|
u)
|
|
# Download from upstream only. Do not use StarlingX mirror.
|
|
multiple_dl_flag_check
|
|
dl_source="$dl_from_upstream"
|
|
dl_flag="-u"
|
|
;;
|
|
U)
|
|
# Download from upstream first, only use StarlingX mirror as a fallback.
|
|
multiple_dl_flag_check
|
|
dl_source="$dl_from_upstream_then_stx"
|
|
dl_flag="-U"
|
|
;;
|
|
|
|
h)
|
|
# Help
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
usage
|
|
exit 2
|
|
;;
|
|
esac
|
|
done
|
|
shift $((OPTIND-1))
|
|
|
|
if [ $# -lt 2 ]; then
|
|
usage
|
|
exit 2
|
|
fi
|
|
|
|
if [ "$1" == "" ]; then
|
|
echo "Need to supply the rpm file list"
|
|
exit 2;
|
|
else
|
|
rpms_list=$1
|
|
echo "using $rpms_list as the download name lists"
|
|
fi
|
|
|
|
match_level="L1"
|
|
|
|
if [ ! -z "$2" -a "$2" != " " ];then
|
|
match_level=$2
|
|
fi
|
|
|
|
timestamp=$(date +%F_%H%M)
|
|
echo $timestamp
|
|
|
|
export DL_MIRROR_LOG_DIR="${DL_MIRROR_LOG_DIR:-./logs}"
|
|
export DL_MIRROR_OUTPUT_DIR="${DL_MIRROR_OUTPUT_DIR:-./output/stx/CentOS/}"
|
|
|
|
MDIR_SRC="${DL_MIRROR_OUTPUT_DIR}/Source"
|
|
mkdir -p "$MDIR_SRC"
|
|
MDIR_BIN="${DL_MIRROR_OUTPUT_DIR}/Binary"
|
|
mkdir -p "$MDIR_BIN"
|
|
|
|
LOGSDIR="${DL_MIRROR_LOG_DIR}"
|
|
from=$(get_from $rpms_list)
|
|
LOG="$LOGSDIR/${match_level}_failmoved_url_${from}.log"
|
|
MISSING_SRPMS="$LOGSDIR/${match_level}_srpms_missing_${from}.log"
|
|
MISSING_RPMS="$LOGSDIR/${match_level}_rpms_missing_${from}.log"
|
|
FOUND_SRPMS="$LOGSDIR/${match_level}_srpms_found_${from}.log"
|
|
FOUND_RPMS="$LOGSDIR/${match_level}_rpms_found_${from}.log"
|
|
cat /dev/null > $LOG
|
|
cat /dev/null > $MISSING_SRPMS
|
|
cat /dev/null > $MISSING_RPMS
|
|
cat /dev/null > $FOUND_SRPMS
|
|
cat /dev/null > $FOUND_RPMS
|
|
|
|
|
|
if [ $CLEAN_LOGS_ONLY -eq 1 ];then
|
|
exit 0
|
|
fi
|
|
|
|
STOP_SCHEDULING=0
|
|
FOUND_ERRORS=0
|
|
MAX_WORKERS=8
|
|
workers=0
|
|
max_workers=$MAX_WORKERS
|
|
|
|
# An array that maps worker index to pid, or to two special values
|
|
# 'Idle' indicates no running thread.
|
|
# 'Busy' indicates the worker is allocated, but it's pid isn't known yet.
|
|
declare -A dl_env
|
|
|
|
#
|
|
# init_dl_env: Init the array that maps worker index to pid.
|
|
#
|
|
init_dl_env () {
|
|
local i=0
|
|
local stop
|
|
|
|
stop=$((max_workers-1))
|
|
for i in $(seq 0 $stop); do
|
|
dl_env[$i]='Idle'
|
|
done
|
|
}
|
|
|
|
#
|
|
# get_idle_dl_env: Find an idle worker, mark it allocated
|
|
# and return it's index.
|
|
get_idle_dl_env () {
|
|
local i=0
|
|
local stop
|
|
|
|
stop=$((max_workers-1))
|
|
if [ $stop -ge 255 ]; then
|
|
stop=254
|
|
fi
|
|
|
|
for i in $(seq 0 $stop); do
|
|
if [ ${dl_env[$i]} == 'Idle' ]; then
|
|
dl_env[$i]='Busy'
|
|
return $i
|
|
fi
|
|
done
|
|
|
|
return 255
|
|
}
|
|
|
|
#
|
|
# set_dl_env_pid: Set the pid of a previously allocated worker
|
|
#
|
|
set_dl_env_pid () {
|
|
local idx=$1
|
|
local val=$2
|
|
dl_env[$idx]=$val
|
|
}
|
|
|
|
#
|
|
# release_dl_env: Mark a worker as idle. Call after reaping the thread.
|
|
#
|
|
release_dl_env () {
|
|
local idx=$1
|
|
dl_env[$idx]='Idle'
|
|
}
|
|
|
|
#
|
|
# reaper: Look for worker threads that have exited.
|
|
# Check/log it's exit code, and release the worker.
|
|
# Return the number of threads reaped.
|
|
#
|
|
reaper () {
|
|
local reaped=0
|
|
local last_reaped=-1
|
|
local i=0
|
|
local stop
|
|
local p=0
|
|
local ret=0
|
|
|
|
stop=$((max_workers-1))
|
|
if [ $stop -ge 255 ]; then
|
|
stop=254
|
|
fi
|
|
|
|
while [ $reaped -gt $last_reaped ]; do
|
|
last_reaped=$reaped
|
|
for i in $(seq 0 $stop); do
|
|
p=${dl_env[$i]}
|
|
if [ "$p" == "Idle" ] || [ "$p" == "Busy" ]; then
|
|
continue
|
|
fi
|
|
# echo "test $i $p"
|
|
kill -0 $p &> /dev/null
|
|
if [ $? -ne 0 ]; then
|
|
wait $p
|
|
ret=$?
|
|
workers=$((workers-1))
|
|
reaped=$((reaped+1))
|
|
release_dl_env $i
|
|
if [ $ret -ne 0 ]; then
|
|
sleep 1
|
|
echo "ERROR: $FUNCNAME (${LINENO}): Failed to download in 'b$i'"
|
|
cat "$DL_MIRROR_LOG_DIR/$i" >> $DL_MIRROR_LOG_DIR/errors
|
|
echo "ERROR: $FUNCNAME (${LINENO}): Failed to download in 'b$i'" >> $DL_MIRROR_LOG_DIR/errors
|
|
echo "" >> $DL_MIRROR_LOG_DIR/errors
|
|
FOUND_ERRORS=1
|
|
fi
|
|
fi
|
|
done
|
|
done
|
|
return $reaped
|
|
}
|
|
|
|
#
|
|
# download_worker: Download one file.
|
|
# This is the entry point for a worker thread.
|
|
#
|
|
download_worker () {
|
|
local dl_idx=$1
|
|
local ff="$2"
|
|
local _level=$3
|
|
|
|
local rpm_name=""
|
|
local dest_dir=""
|
|
local rc=0
|
|
local dl_result=1
|
|
local lvl=""
|
|
local download_cmd=""
|
|
local download_url=""
|
|
local SFILE=""
|
|
local _arch=""
|
|
|
|
_arch=$(get_arch_from_rpm $ff)
|
|
rpm_name="$(get_rpm_name $ff)"
|
|
dest_dir="$(get_dest_directory $_arch)"
|
|
|
|
if [ ! -e $dest_dir/$rpm_name ]; then
|
|
for dl_src in $dl_source; do
|
|
case $dl_src in
|
|
$dl_from_stx_mirror)
|
|
lvl=$dl_from_stx_mirror
|
|
;;
|
|
$dl_from_upstream)
|
|
lvl=$_level
|
|
;;
|
|
*)
|
|
echo "Error: Unknown dl_source '$dl_src'"
|
|
continue
|
|
;;
|
|
esac
|
|
|
|
download_cmd="$(get_download_cmd $ff $lvl)"
|
|
|
|
echo "Looking for $rpm_name"
|
|
echo "--> run: $download_cmd"
|
|
if $download_cmd ; then
|
|
download_url="$(get_url $ff $lvl)"
|
|
SFILE="$(get_rpm_level_name $rpm_name $lvl)"
|
|
process_result "$_arch" "$dest_dir" "$download_url" "$SFILE"
|
|
dl_result=0
|
|
break
|
|
else
|
|
echo "Warning: $rpm_name not found"
|
|
fi
|
|
done
|
|
|
|
if [ $dl_result -eq 1 ]; then
|
|
echo "Error: $rpm_name not found"
|
|
echo "missing_srpm:$rpm_name" >> $LOG
|
|
echo $rpm_name >> $MISSING_SRPMS
|
|
rc=1
|
|
fi
|
|
else
|
|
echo "Already have $dest_dir/$rpm_name"
|
|
fi
|
|
return $rc
|
|
}
|
|
|
|
# Function to download different types of RPMs in different ways
|
|
download () {
|
|
local _file=$1
|
|
local _level=$2
|
|
local _list=""
|
|
local _from=""
|
|
|
|
local _arch=""
|
|
|
|
|
|
FOUND_ERRORS=0
|
|
_list=$(cat $_file)
|
|
_from=$(get_from $_file)
|
|
|
|
echo "now the rpm will come from: $_from"
|
|
for ff in $_list; do
|
|
# Free up a worker if none available
|
|
while [ $workers -ge $max_workers ]; do
|
|
reaper
|
|
reaped=$?
|
|
if [ $reaped -eq 0 ]; then
|
|
sleep 0.1
|
|
fi
|
|
done
|
|
|
|
# Allocate a worker. b=the worker index
|
|
workers=$((workers+1))
|
|
get_idle_dl_env
|
|
b=$?
|
|
if [ $b -ge 255 ]; then
|
|
echo "get_idle_dl_env failed to find a free slot"
|
|
exit 1
|
|
fi
|
|
PREFIX="b$b"
|
|
|
|
# Launch a thread in the background
|
|
( download_worker $b $ff $_level 2>&1 | sed "s#^#${PREFIX}: #" | tee $DL_MIRROR_LOG_DIR/$b; exit ${PIPESTATUS[0]} ) &
|
|
|
|
# Record the pid of background process
|
|
pp=$!
|
|
set_dl_env_pid $b $pp
|
|
done
|
|
|
|
# Wait for remaining workers to exit
|
|
while [ $workers -gt 0 ]; do
|
|
reaper
|
|
reaped=$?
|
|
if [ $reaped -eq 0 ]; then
|
|
sleep 0.1
|
|
fi
|
|
done
|
|
|
|
return $FOUND_ERRORS
|
|
}
|
|
|
|
|
|
# Init the pool of worker threads
|
|
init_dl_env
|
|
|
|
|
|
# Prime the cache
|
|
loop_count=0
|
|
max_loop_count=5
|
|
echo "${SUDOCMD} yum ${YUMCONFOPT} ${RELEASEVER} makecache"
|
|
while ! ${SUDOCMD} yum ${YUMCONFOPT} ${RELEASEVER} makecache fast ; do
|
|
# To protect against intermittent 404 errors, we'll retry
|
|
# a few times. The suspected issue is pulling repodata
|
|
# from multiple source that are temporarily inconsistent.
|
|
loop_count=$((loop_count + 1))
|
|
if [ $loop_count -gt $max_loop_count ]; then
|
|
break
|
|
fi
|
|
echo "makecache retry: $loop_count"
|
|
|
|
# Wipe the inconsistent data from the last try
|
|
echo "yum ${YUMCONFOPT} ${RELEASEVER} clean all"
|
|
yum ${YUMCONFOPT} ${RELEASEVER} clean all
|
|
done
|
|
|
|
|
|
# Download files
|
|
if [ -s "$rpms_list" ];then
|
|
echo "--> start searching $rpms_list"
|
|
download $rpms_list $match_level
|
|
if [ $? -ne 0 ]; then
|
|
dl_rc=1
|
|
fi
|
|
fi
|
|
|
|
echo "Done!"
|
|
|
|
exit $dl_rc
|