Add dry run option to toci_quickstart - playbook order in tact

With dry run, playbooks are not executed.
The env variables, releases script output and playbooks
commands are written out to files for debug.

The review also moves the collect logs lines to a
function for easier organization

Documentation for the dry run option and variable
files is added.

Change-Id: I0bad5ee3150d94115bed018de9488590305a2b80
This commit is contained in:
Ronelle Landy 2018-05-27 17:53:11 -04:00
parent aa8d2d8454
commit 3dab6f0df0
4 changed files with 144 additions and 83 deletions

View File

@ -49,12 +49,39 @@ Quickstart itself.</p>
<ul>
<li><a href='undercloud/home/zuul/'>undercloud/home/zuul/</a>
- the source and log output of all templated shell scripts</li>
<li><a href='undercloud/var/log/'>undercloud/var/log/</a> -
directories and files copied from /var/log on the undercloud.
If other overcloud/subnodes exist, similar $node/var/log
directories will also exist in these logs.</li>
<li><a href='undercloud/var/log/extra/'>undercloud/var/log/extra/</a> -
extra system details like package list, and cpu info gathered from the
undercloud</li>
<li><a href='undercloud/var/lib/mistral'>undercloud/var/lib/mistral</a> - output of all ansible used by config-download to drive the overcloud deployment</li>
<li><a href='undercloud/var/lib/mistral'>undercloud/var/lib/mistral</a>
- output of all ansible used by config-download to drive the overcloud deployment</li>
<li><a href='stackwiz'>stackwiz</a> - tempest test results</li>
<li><a href='docs/build/'>docs/build/</a> - autogenerated documentation</li>
</ul>
<h1>Variables used in the job run</h1>
<p>The logs contain files showing variables used in the job run.</p>
<ul>
<li><a href='undercloud/var/log/extra/dump_variables_vars.json.txt.gz'>undercloud/var/log/extra/dump_variables_vars.json.txt.gz</a>
- contains the variables used in the running the actual test</li>
<li><a href='releases.sh'>releases.sh</a>
- the output of the script setting release-related variables</li>
<li><a href='playbook_executions.log'>playbook_executions.log</a>
- prints out the complete commands, with all expanded arguments,
to run the playbooks</li>
</ul>
<h1>Dry Run option</h1>
<p>As a debugging step, a job can be run manually with '-dryrun'
appended to the job name. When the "playbook dry run" option is invoked,
the playbooks will not execute and collect logs will not run but
certain log files, including 'toci_env_args_output.log', which
contains the environment variables used in the job, and
playbook_executions.log will still be produced in the logs
directory for inspection. This option serves to assist with
debugging and to test the testing scripts themselves.</p>
</body>
</html>

View File

@ -88,3 +88,71 @@ function run_with_timeout {
/usr/bin/timeout --preserve-status ${TIME_FOR_COMMAND}m ${COMMAND}
}
function collect_logs {
cat <<-EOF > $LOGS_DIR/collect_logs.sh
#!/bin/bash
set -x
export NODEPOOL_PROVIDER=${NODEPOOL_PROVIDER:-''}
export STATS_TESTENV=${STATS_TESTENV:-''}
export STATS_OOOQ=${STATS_OOOQ:-''}
export START_JOB_TIME=${START_JOB_TIME:-''}
export ZUUL_PIPELINE=${ZUUL_PIPELINE:-''}
export DEVSTACK_GATE_TIMEOUT=${DEVSTACK_GATE_TIMEOUT:-''}
export REMAINING_TIME=${REMAINING_TIME:-''}
export LOCAL_WORKING_DIR="$WORKSPACE/.quickstart"
export OPT_WORKDIR=$LOCAL_WORKING_DIR
export WORKING_DIR="$HOME"
export LOGS_DIR=$WORKSPACE/logs
export VIRTUAL_ENV_DISABLE_PROMPT=1
export ANSIBLE_CONFIG=$OOOQ_DIR/ansible.cfg
export ARA_DATABASE=sqlite:///${LOCAL_WORKING_DIR}/ara.sqlite
export ZUUL_CHANGES=${ZUUL_CHANGES:-''}
export NODES_FILE=${NODES_FILE:-''}
export TOCI_JOBTYPE=$TOCI_JOBTYPE
export STABLE_RELEASE=${STABLE_RELEASE:-''}
export QUICKSTART_RELEASE=${QUICKSTART_RELEASE:-''}
set +u
source $LOCAL_WORKING_DIR/bin/activate
set -u
source $OOOQ_DIR/ansible_ssh_env.sh
/usr/bin/timeout --preserve-status 40m $QUICKSTART_COLLECTLOGS_CMD > $LOGS_DIR/quickstart_collect_logs.log || \
echo "WARNING: quickstart collect-logs failed, check quickstart_collectlogs.log for details"
cp $LOGS_DIR/undercloud/var/log/postci.txt.gz $LOGS_DIR/ || true
if [[ -e $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
fi
# Copy tempest.html to root dir
cp $LOGS_DIR/undercloud/home/$USER/tempest/tempest.html.gz ${LOGS_DIR} || true
# Copy tempest and .testrepository directory to /opt/stack/new/tempest and
# unzip
sudo mkdir -p /opt/stack/new
sudo cp -Rf $LOGS_DIR/undercloud/home/$USER/tempest /opt/stack/new || true
sudo gzip -d -r /opt/stack/new/tempest/.testrepository || true
# record the size of the logs directory
# -L, --dereference dereference all symbolic links
# Note: tail -n +1 is to prevent the error 'Broken Pipe' e.g. 'sort: write failed: standard output: Broken pipe'
du -L -ch $LOGS_DIR/* | tail -n +1 | sort -rh | head -n 200 &> $LOGS_DIR/log-size.txt || true
EOF
if [[ "${NODEPOOL_PROVIDER:-''}" == "rdo-cloud-tripleo" ]] || [[ "${NODEPOOL_PROVIDER:-''}" == "tripleo-test-cloud-rh1" ]]; then
if [[ "$TOCI_JOBTYPE" =~ "ovb" ]]; then
bash $LOGS_DIR/collect_logs.sh
# rename script to not to run it in multinode jobs
mv $LOGS_DIR/collect_logs.sh $LOGS_DIR/ovb_collect_logs.sh
fi
fi
}

View File

@ -117,6 +117,8 @@ TIMEOUT_SECS=$((DEVSTACK_GATE_TIMEOUT*60))
export EXTRA_VARS=${EXTRA_VARS:-""}
export NODES_ARGS=""
export EXTRANODE=""
# Set playbook execution status
export PLAYBOOK_DRY_RUN=${PLAYBOOK_DRY_RUN:=0}
export COLLECT_CONF="$TRIPLEO_ROOT/tripleo-ci/toci-quickstart/config/collect-logs.yml"
LOCAL_WORKING_DIR="$WORKSPACE/.quickstart"
LWD=$LOCAL_WORKING_DIR
@ -221,6 +223,9 @@ for JOB_TYPE_PART in $(sed 's/-/ /g' <<< "${TOCI_JOBTYPE:-}") ; do
;;
gate)
;;
dryrun)
PLAYBOOK_DRY_RUN=1
;;
*)
# the rest should be node configuration
NODES_FILE="$TRIPLEO_ROOT/tripleo-quickstart/config/nodes/$JOB_TYPE_PART.yml"

View File

@ -121,94 +121,55 @@ set -u
source $OOOQ_DIR/ansible_ssh_env.sh
[[ -n ${STATS_OOOQ:-''} ]] && export STATS_OOOQ=$(( $(date +%s) - STATS_OOOQ ))
for playbook in $PLAYBOOKS; do
echo "${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}"
run_with_timeout $START_JOB_TIME $QUICKSTART_INSTALL_CMD \
"${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}" \
$NODES_ARGS \
$FEATURESET_CONF \
$ENV_VARS \
$EXTRA_VARS \
$DEFAULT_ARGS \
--extra-vars ci_job_end_time=$(( START_JOB_TIME + REMAINING_TIME*60 )) \
$LOCAL_WORKING_DIR/playbooks/$playbook "${PLAYBOOKS_ARGS[$playbook]:-}" \
2>&1 | tee -a $LOGS_DIR/quickstart_install.log && exit_value=0 || exit_value=$?
# Print status of playbook run
[[ "$exit_value" == 0 ]] && echo "Playbook run of $playbook passed successfully"
[[ "$exit_value" != 0 ]] && echo "Playbook run of $playbook failed" && break
done
[[ "$exit_value" == 0 ]] && echo "Playbook run passed successfully" || echo "Playbook run failed"
## LOGS COLLECTION
cat <<EOF > $LOGS_DIR/collect_logs.sh
#!/bin/bash
set -x
export NODEPOOL_PROVIDER=${NODEPOOL_PROVIDER:-''}
export STATS_TESTENV=${STATS_TESTENV:-''}
export STATS_OOOQ=${STATS_OOOQ:-''}
export START_JOB_TIME=${START_JOB_TIME:-''}
export ZUUL_PIPELINE=${ZUUL_PIPELINE:-''}
export DEVSTACK_GATE_TIMEOUT=${DEVSTACK_GATE_TIMEOUT:-''}
export REMAINING_TIME=${REMAINING_TIME:-''}
export LOCAL_WORKING_DIR="$WORKSPACE/.quickstart"
export OPT_WORKDIR=$LOCAL_WORKING_DIR
export WORKING_DIR="$HOME"
export LOGS_DIR=$WORKSPACE/logs
export VIRTUAL_ENV_DISABLE_PROMPT=1
export ANSIBLE_CONFIG=$OOOQ_DIR/ansible.cfg
export ARA_DATABASE=sqlite:///${LOCAL_WORKING_DIR}/ara.sqlite
export ZUUL_CHANGES=${ZUUL_CHANGES:-''}
export NODES_FILE=${NODES_FILE:-''}
export TOCI_JOBTYPE=$TOCI_JOBTYPE
export STABLE_RELEASE=${STABLE_RELEASE:-''}
export QUICKSTART_RELEASE=${QUICKSTART_RELEASE:-''}
set +u
source $LOCAL_WORKING_DIR/bin/activate
set -u
source $OOOQ_DIR/ansible_ssh_env.sh
/usr/bin/timeout --preserve-status 40m $QUICKSTART_COLLECTLOGS_CMD > $LOGS_DIR/quickstart_collect_logs.log || \
echo "WARNING: quickstart collect-logs failed, check quickstart_collectlogs.log for details"
cp $LOGS_DIR/undercloud/var/log/postci.txt.gz $LOGS_DIR/ || true
if [[ -e $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
# Debug step capture env variables
if [[ "$PLAYBOOK_DRY_RUN" == "1" ]]; then
echo "-- Capture Environment Variables Used ---------"
echo "$(env)" | tee -a $LOGS_DIR/toci_env_args_output.log
declare -p | tee -a $LOGS_DIR/toci_env_args_output.log
fi
# Copy tempest.html to root dir
cp $LOGS_DIR/undercloud/home/$USER/tempest/tempest.html.gz ${LOGS_DIR} || true
echo "-- Playbooks Output --------------------------"
for playbook in $PLAYBOOKS; do
echo "$QUICKSTART_INSTALL_CMD \
${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG} \
$NODES_ARGS \
$FEATURESET_CONF \
$ENV_VARS \
$EXTRA_VARS \
$DEFAULT_ARGS \
$LOCAL_WORKING_DIR/playbooks/$playbook ${PLAYBOOKS_ARGS[$playbook]:-}" \
| sed 's/--/\n--/g' \
| tee -a $LOGS_DIR/playbook_executions.log
echo "# --------------------------------------- " \
| tee -a $LOGS_DIR/playbook_executions.log
done
# Copy tempest and .testrepository directory to /opt/stack/new/tempest and
# unzip
sudo mkdir -p /opt/stack/new
sudo cp -Rf $LOGS_DIR/undercloud/home/$USER/tempest /opt/stack/new || true
sudo gzip -d -r /opt/stack/new/tempest/.testrepository || true
if [[ "$PLAYBOOK_DRY_RUN" == "1" ]]; then
exit_value=0
else
for playbook in $PLAYBOOKS; do
echo "${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}"
run_with_timeout $START_JOB_TIME $QUICKSTART_INSTALL_CMD \
"${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}" \
$NODES_ARGS \
$FEATURESET_CONF \
$ENV_VARS \
$EXTRA_VARS \
$DEFAULT_ARGS \
--extra-vars ci_job_end_time=$(( START_JOB_TIME + REMAINING_TIME*60 )) \
$LOCAL_WORKING_DIR/playbooks/$playbook "${PLAYBOOKS_ARGS[$playbook]:-}" \
2>&1 | tee -a $LOGS_DIR/quickstart_install.log && exit_value=0 || exit_value=$?
# record the size of the logs directory
# -L, --dereference dereference all symbolic links
# Note: tail -n +1 is to prevent the error 'Broken Pipe' e.g. 'sort: write failed: standard output: Broken pipe'
# Print status of playbook run
[[ "$exit_value" == 0 ]] && echo "Playbook run of $playbook passed successfully"
[[ "$exit_value" != 0 ]] && echo "Playbook run of $playbook failed" && break
done
du -L -ch $LOGS_DIR/* | tail -n +1 | sort -rh | head -n 200 &> $LOGS_DIR/log-size.txt || true
EOF
[[ "$exit_value" == 0 ]] && echo "Playbook run passed successfully" || echo "Playbook run failed"
## LOGS COLLECTION
collect_logs
if [[ "${NODEPOOL_PROVIDER:-''}" == "rdo-cloud-tripleo" ]] || [[ "${NODEPOOL_PROVIDER:-''}" == "tripleo-test-cloud-rh1" ]]; then
if [[ "$TOCI_JOBTYPE" =~ "ovb" ]]; then
bash $LOGS_DIR/collect_logs.sh
# rename script to not to run it in multinode jobs
mv $LOGS_DIR/collect_logs.sh $LOGS_DIR/ovb_collect_logs.sh
fi
fi
popd