tripleo-ci/scripts/getthelogs
Bogdan Dobrelya 400860df14 Allow getthelogs to fetch job logs from rdoproject
CI job logs from logs.rdoproject.org use another
console log name and there is no ending '/log' in the URLs.

It would be nice to be able to collect those logs for
tripleo CI jobs as well.

Change-Id: I137cd3eb749f24cb72bb675faca625a0f1f68000
Signed-off-by: Bogdan Dobrelya <bdobreli@redhat.com>
2018-01-22 13:16:33 +01:00

78 lines
2.1 KiB
Bash
Executable File

#!/bin/bash
set -eu -o pipefail
function usage(){
echo "Helper script for downloading tripleo-ci jobs logs"
echo
echo "Example:"
echo "getthelogs http://logs.openstack.org/00/123456/7/check/gate-tripleo-ci-foo/d3adbeef"
echo
echo "Downloads the logs and starts a shell from the logs root directory"
}
function finish(){
rc=${rc:-$?}
trap - EXIT
cd $TDIR/../
echo "Download job exited ${rc}"
PS1="JOBLOGS ]\$ " bash --noprofile --norc
}
function get_dirs(){
local drop="\b(etc|ara|ara_oooq|docs|build|stackviz|sudoers.d|lib|config-data|extra)\b"
local directories=""
directories=$(curl -s "$1" 2> /dev/null | grep -E "\[DIR" | grep -vE "${drop}" | sed -e "s,.*href=\"\([^\"]*\)\".*,${1}\1,g")
if [ -n "$directories" ]; then
for d in $directories; do
directories="$directories $(get_dirs $d/)"
done
echo $directories
else
echo ""
fi
return 0
}
[[ "${1:--}" =~ ^\s+?- ]] && (usage; exit 1)
type -p wget 2>&1 >/dev/null || ( echo "Please install a wget tool!"; exit 127 )
trap finish EXIT SIGINT SIGTERM
WORKERS=6
BASEURL=${1%/}
SC=$(dirname $BASEURL | grep -o \/ | wc -w)
if [[ $BASEURL =~ 'logs.rdoproject' && SC -le 9 ]] ||\
[[ $BASEURL =~ 'logs.rdoproject.org/openstack-periodic' && SC -le 5 ]]; then
console="$BASEURL/console.txt.gz"
elif [[ ! $(basename $BASEURL) == 'logs' && SC -le 7 ]]; then
console="$BASEURL/job-output.txt.gz"
BASEURL=${BASEURL}/logs
else
console=''
fi
TDIR=${BASEURL##*http://}
TDIR=${TDIR##*https://}
TDIR=/tmp/${TDIR}
mkdir -p $TDIR
cd /tmp
echo "Target dir for download: $TDIR"
echo Will download logs from the following URLs:
list_to_get="$console $(get_dirs $BASEURL/)"
for d in $list_to_get; do
echo $d
done
rm -f wget-jobs.txt
for d in $list_to_get; do
args="\"-nv -nc --no-use-server-timestamps \
--accept-regex='\.txt\.gz$|messages$' \
--reject='index.html*' \
--recursive -l 10 --domains logs.openstack.org,logs.rdoproject.org \
--no-parent \
-erobots=off --wait 0.25 ${d}\""
echo "${args}" >> wget-jobs.txt
done
cat wget-jobs.txt | sed -n '{p;p}' | shuf > wget-jobs-shuf.txt
cat wget-jobs-shuf.txt | xargs -r -n1 -P ${WORKERS} -I{} sh -c "wget {}"