Remove timeout logic

The new workflow relies on zuul timeout for run playbook.
Critical tasks as collect-logs should be in post-run
which may include a dedicated post-timeout.

Implements sprint17 card [1]
[1] https://trello.com/c/AerVRkBH/887-replace-bash-environment-variables-in-tociquickstartsh-with-ansible-variables-from-zuul-inventory-or-zuul-job-config-s17

Change-Id: Ia98111e8c444460762a903914762ec0bc05a6009
This commit is contained in:
Quique Llorente 2018-08-06 10:07:44 +02:00 committed by Rafael Folco
parent 5bdbc2c960
commit c889115e1a
4 changed files with 59 additions and 105 deletions

View File

@ -36,12 +36,10 @@ fi
export TRIPLEO_ROOT=${TRIPLEO_ROOT:-"/opt/stack/new"}
export WORKSPACE=${WORKSPACE:-"$TRIPLEO_ROOT/workspace"}
export DEVSTACK_GATE_TIMEOUT=${DEVSTACK_GATE_TIMEOUT:-"180"}
export PATH=/sbin:/usr/sbin:$PATH
export UNDERCLOUD_VM_NAME=instack
export REMAINING_TIME=${REMAINING_TIME:-180}
export NODEPOOL_DOCKER_REGISTRY_PROXY=${NODEPOOL_DOCKER_REGISTRY_PROXY:-""}
# post ci chores to run at the end of ci

View File

@ -72,86 +72,68 @@ function is_featureset {
[ $(shyaml get-value "${type}" "False"< "${featureset_file}") = "True" ]
}
function run_with_timeout {
# First parameter is the START_JOB_TIME
# Second is the command to be executed
JOB_TIME=$1
shift
COMMAND=$@
# Leave 20 minutes for quickstart logs collection for ovb only
if [[ "$TOCI_JOBTYPE" =~ "ovb" ]]; then
RESERVED_LOG_TIME=20
else
RESERVED_LOG_TIME=3
fi
# Use $REMAINING_TIME of infra to calculate maximum time for remaining part of job
REMAINING_TIME=${REMAINING_TIME:-180}
TIME_FOR_COMMAND=$(( REMAINING_TIME - ($(date +%s) - JOB_TIME)/60 - $RESERVED_LOG_TIME))
if [[ $TIME_FOR_COMMAND -lt 1 ]]; then
return 143
fi
/usr/bin/timeout --preserve-status ${TIME_FOR_COMMAND}m ${COMMAND}
}
function create_collect_logs_script {
cat <<-EOF > $LOGS_DIR/collect_logs.sh
#!/bin/bash
set -x
cat > $LOGS_DIR/collect_logs.sh <<END
#!/bin/bash
set -x
export NODEPOOL_PROVIDER=${NODEPOOL_PROVIDER:-''}
export STATS_TESTENV=${STATS_TESTENV:-''}
export STATS_OOOQ=${STATS_OOOQ:-''}
export START_JOB_TIME=${START_JOB_TIME:-''}
export ZUUL_PIPELINE=${ZUUL_PIPELINE:-''}
export DEVSTACK_GATE_TIMEOUT=${DEVSTACK_GATE_TIMEOUT:-''}
export REMAINING_TIME=${REMAINING_TIME:-''}
export LOCAL_WORKING_DIR="$WORKSPACE/.quickstart"
export OPT_WORKDIR=$LOCAL_WORKING_DIR
export WORKING_DIR="$HOME"
export LOGS_DIR=$WORKSPACE/logs
export VIRTUAL_ENV_DISABLE_PROMPT=1
export ANSIBLE_CONFIG=$OOOQ_DIR/ansible.cfg
export ARA_DATABASE=sqlite:///${LOCAL_WORKING_DIR}/ara.sqlite
export ZUUL_CHANGES=${ZUUL_CHANGES:-''}
export NODES_FILE={{ nodes_file }}
export TOCI_JOBTYPE=$TOCI_JOBTYPE
export STABLE_RELEASE=${STABLE_RELEASE:-''}
export QUICKSTART_RELEASE=${QUICKSTART_RELEASE:-''}
export NODEPOOL_PROVIDER=${NODEPOOL_PROVIDER:-''}
export STATS_TESTENV=${STATS_TESTENV:-''}
export STATS_OOOQ=${STATS_OOOQ:-''}
export ZUUL_PIPELINE=${ZUUL_PIPELINE:-''}
export LOCAL_WORKING_DIR="$WORKSPACE/.quickstart"
export OPT_WORKDIR=$LOCAL_WORKING_DIR
export WORKING_DIR="$HOME"
export LOGS_DIR=$WORKSPACE/logs
export VIRTUAL_ENV_DISABLE_PROMPT=1
export ANSIBLE_CONFIG=$OOOQ_DIR/ansible.cfg
export ARA_DATABASE=sqlite:///${LOCAL_WORKING_DIR}/ara.sqlite
export ZUUL_CHANGES=${ZUUL_CHANGES:-''}
export NODES_FILE=${NODES_FILE:-''}
export TOCI_JOBTYPE=$TOCI_JOBTYPE
export STABLE_RELEASE=${STABLE_RELEASE:-''}
export QUICKSTART_RELEASE=${QUICKSTART_RELEASE:-''}
set +u
source $LOCAL_WORKING_DIR/bin/activate
set -u
source $OOOQ_DIR/ansible_ssh_env.sh
set +u
source $LOCAL_WORKING_DIR/bin/activate
set -u
source $OOOQ_DIR/ansible_ssh_env.sh
/usr/bin/timeout --preserve-status 40m $QUICKSTART_COLLECTLOGS_CMD > $LOGS_DIR/quickstart_collect_logs.log || \
echo "WARNING: quickstart collect-logs failed, check quickstart_collectlogs.log for details"
sudo unbound-control dump_cache > /tmp/dns_cache.txt
sudo chown {{ ansible_user_id }}: /tmp/dns_cache.txt
cat /tmp/dns_cache.txt | gzip - > $LOGS_DIR/dns_cache.txt.gz
cp $LOGS_DIR/undercloud/var/log/postci.txt.gz $LOGS_DIR/ || true
mkdir -p $LOGS_DIR/quickstart_files
find $LOCAL_WORKING_DIR -maxdepth 1 -type f -not -name "*sqlite" | while read i; do gzip -cf \$i > $LOGS_DIR/quickstart_files/\$(basename \$i).txt.gz; done
if [[ -e $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
fi
$QUICKSTART_COLLECTLOGS_CMD > $LOGS_DIR/quickstart_collect_logs.log || \
echo "WARNING: quickstart collect-logs failed, check quickstart_collectlogs.log for details"
# Copy tempest.html to root dir
cp $LOGS_DIR/undercloud/home/$USER/tempest/tempest.html.gz ${LOGS_DIR} || true
cp $LOGS_DIR/undercloud/var/log/postci.txt.gz $LOGS_DIR/ || true
# Copy tempest and .testrepository directory to /opt/stack/new/tempest and
# unzip
sudo mkdir -p /opt/stack/new
sudo cp -Rf $LOGS_DIR/undercloud/home/$USER/tempest /opt/stack/new || true
sudo gzip -d -r /opt/stack/new/tempest/.testrepository || true
if [[ -e $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
fi
# record the size of the logs directory
# -L, --dereference dereference all symbolic links
# Note: tail -n +1 is to prevent the error 'Broken Pipe' e.g. 'sort: write failed: standard output: Broken pipe'
# Copy tempest.html to root dir
cp $LOGS_DIR/undercloud/home/$USER/tempest/tempest.html.gz ${LOGS_DIR} || true
du -L -ch $LOGS_DIR/* | tail -n +1 | sort -rh | head -n 200 &> $LOGS_DIR/log-size.txt || true
EOF
# Copy tempest and .testrepository directory to /opt/stack/new/tempest and
# unzip
sudo mkdir -p /opt/stack/new
sudo cp -Rf $LOGS_DIR/undercloud/home/$USER/tempest /opt/stack/new || true
sudo gzip -d -r /opt/stack/new/tempest/.testrepository || true
# record the size of the logs directory
# -L, --dereference dereference all symbolic links
# Note: tail -n +1 is to prevent the error 'Broken Pipe' e.g. 'sort: write failed: standard output: Broken pipe'
du -L -ch $LOGS_DIR/* | tail -n +1 | sort -rh | head -n 200 &> $LOGS_DIR/log-size.txt || true
END
}

View File

@ -62,7 +62,6 @@ export NODECOUNT=0
export OVERCLOUD_HOSTS=
export CONTROLLER_HOSTS=
export SUBNODES_SSH_KEY=
TIMEOUT_SECS=$((DEVSTACK_GATE_TIMEOUT*60))
export EXTRA_VARS=${EXTRA_VARS:-""}
export VXLAN_VARS=${VXLAN_VARS:-""}
export EXTRANODE=""
@ -170,7 +169,7 @@ if [ -z "${TE_DATAFILE:-}" -a "{{ environment_infra }}" = "ovb" ] ; then
NETISO_ENV="multi-nic"
# provision env in rh cloud, then start quickstart
./testenv-client -b $GEARDSERVER:4730 -t $TIMEOUT_SECS \
./testenv-client -b $GEARDSERVER:4730 -t {{ zuul.timeout }} \
--envsize $NODECOUNT --ucinstance $UCINSTANCEID \
--net-iso $NETISO_ENV $EXTRANODE -- ./toci_quickstart.sh
elif [ "{{ environment_infra }}" = "ovb" ] ; then

View File

@ -141,51 +141,26 @@ done
{% else %}
## LOGS COLLECTION PREPARE
create_collect_logs_script
for playbook in {{ " ".join(playbooks) }}; do
echo "${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}"
run_with_timeout $START_JOB_TIME $QUICKSTART_INSTALL_CMD \
"${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}" \
for playbook in {{ " ".join(playbooks) }}; do
echo ${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}
$QUICKSTART_INSTALL_CMD \
${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG} \
{{ nodes_args }} \
{{ featureset_conf }} \
{{ env_vars }} \
{{ extra_vars }} \
{{ vxlan_vars }} \
$DEFAULT_ARGS \
--extra-vars ci_job_end_time=$(( START_JOB_TIME + REMAINING_TIME*60 )) \
--extra-vars @{{ workspace }}/logs/zuul-variables.yaml \
$LOCAL_WORKING_DIR/playbooks/$playbook "${PLAYBOOKS_ARGS[$playbook]:-}" \
$LOCAL_WORKING_DIR/playbooks/$playbook ${PLAYBOOKS_ARGS[$playbook]:-} \
2>&1 | tee -a $LOGS_DIR/quickstart_install.log && exit_value=0 || exit_value=$?
# Print status of playbook run
[[ "$exit_value" == 0 ]] && echo "Playbook run of $playbook passed successfully"
[[ "$exit_value" != 0 ]] && echo "Playbook run of $playbook failed" && break
done
done
[[ "$exit_value" == 0 ]] && echo "Playbook run passed successfully" || echo "Playbook run failed"
## LOGS COLLECTION RUN (if applicable)
if [[ "${NODEPOOL_PROVIDER:-''}" == "rdo-cloud-tripleo" ]] || [[ "${NODEPOOL_PROVIDER:-''}" == "tripleo-test-cloud-rh1" ]]; then
if [[ "$TOCI_JOBTYPE" =~ "ovb" ]]; then
bash $LOGS_DIR/collect_logs.sh
# rename script to not to run it in multinode jobs
mv $LOGS_DIR/collect_logs.sh $LOGS_DIR/ovb_collect_logs.sh
fi
fi
{% endif %}
popd
sudo unbound-control dump_cache > /tmp/dns_cache.txt
sudo chown {{ ansible_user_id }}: /tmp/dns_cache.txt
cat /tmp/dns_cache.txt | gzip - > $LOGS_DIR/dns_cache.txt.gz
{% if periodic|bool %}
if [[ -e {{ workspace }}/hash_info.sh ]] ; then
echo export JOB_EXIT_VALUE=$exit_value >> {{ workspace }}/hash_info.sh
fi
{% endif %}
mkdir -p $LOGS_DIR/quickstart_files
find $LOCAL_WORKING_DIR -maxdepth 1 -type f -not -name "*sqlite" | while read i; do gzip -cf $i > $LOGS_DIR/quickstart_files/$(basename $i).txt.gz; done
echo 'Quickstart completed.'
exit $exit_value