Browse Source

Merge "Remove timeout logic"

changes/83/607183/7
Zuul 1 year ago
parent
commit
3262fab03c
4 changed files with 68 additions and 114 deletions
  1. +0
    -2
      playbooks/tripleo-ci/templates/common_vars.bash.j2
  2. +61
    -79
      playbooks/tripleo-ci/templates/oooq_common_functions.sh.j2
  3. +1
    -2
      playbooks/tripleo-ci/templates/toci_gate_test.sh.j2
  4. +6
    -31
      playbooks/tripleo-ci/templates/toci_quickstart.sh.j2

+ 0
- 2
playbooks/tripleo-ci/templates/common_vars.bash.j2 View File

@@ -36,12 +36,10 @@ fi

export TRIPLEO_ROOT=${TRIPLEO_ROOT:-"/opt/stack/new"}
export WORKSPACE=${WORKSPACE:-"$TRIPLEO_ROOT/workspace"}
export DEVSTACK_GATE_TIMEOUT=${DEVSTACK_GATE_TIMEOUT:-"180"}
export PATH=/sbin:/usr/sbin:$PATH

export UNDERCLOUD_VM_NAME=instack

export REMAINING_TIME=${REMAINING_TIME:-180}
export NODEPOOL_DOCKER_REGISTRY_PROXY=${NODEPOOL_DOCKER_REGISTRY_PROXY:-""}

# post ci chores to run at the end of ci

+ 61
- 79
playbooks/tripleo-ci/templates/oooq_common_functions.sh.j2 View File

@@ -72,86 +72,68 @@ function is_featureset {
[ $(shyaml get-value "${type}" "False"< "${featureset_file}") = "True" ]
}

function run_with_timeout {
# First parameter is the START_JOB_TIME
# Second is the command to be executed
JOB_TIME=$1
shift
COMMAND=$@
# Leave 20 minutes for quickstart logs collection for ovb only
if [[ "$TOCI_JOBTYPE" =~ "ovb" ]]; then
RESERVED_LOG_TIME=20
else
RESERVED_LOG_TIME=3
fi
# Use $REMAINING_TIME of infra to calculate maximum time for remaining part of job
REMAINING_TIME=${REMAINING_TIME:-180}
TIME_FOR_COMMAND=$(( REMAINING_TIME - ($(date +%s) - JOB_TIME)/60 - $RESERVED_LOG_TIME))

if [[ $TIME_FOR_COMMAND -lt 1 ]]; then
return 143
fi
/usr/bin/timeout --preserve-status ${TIME_FOR_COMMAND}m ${COMMAND}
}

function create_collect_logs_script {
cat <<-EOF > $LOGS_DIR/collect_logs.sh
#!/bin/bash
set -x

export NODEPOOL_PROVIDER=${NODEPOOL_PROVIDER:-''}
export STATS_TESTENV=${STATS_TESTENV:-''}
export STATS_OOOQ=${STATS_OOOQ:-''}
export START_JOB_TIME=${START_JOB_TIME:-''}
export ZUUL_PIPELINE=${ZUUL_PIPELINE:-''}
export DEVSTACK_GATE_TIMEOUT=${DEVSTACK_GATE_TIMEOUT:-''}
export REMAINING_TIME=${REMAINING_TIME:-''}
export LOCAL_WORKING_DIR="$WORKSPACE/.quickstart"
export OPT_WORKDIR=$LOCAL_WORKING_DIR
export WORKING_DIR="$HOME"
export LOGS_DIR=$WORKSPACE/logs
export VIRTUAL_ENV_DISABLE_PROMPT=1
export ANSIBLE_CONFIG=$OOOQ_DIR/ansible.cfg
export ARA_DATABASE=sqlite:///${LOCAL_WORKING_DIR}/ara.sqlite
export ZUUL_CHANGES=${ZUUL_CHANGES:-''}
export NODES_FILE={{ nodes_file }}
export TOCI_JOBTYPE=$TOCI_JOBTYPE
export STABLE_RELEASE=${STABLE_RELEASE:-''}
export QUICKSTART_RELEASE=${QUICKSTART_RELEASE:-''}

set +u
source $LOCAL_WORKING_DIR/bin/activate
set -u
source $OOOQ_DIR/ansible_ssh_env.sh

/usr/bin/timeout --preserve-status 40m $QUICKSTART_COLLECTLOGS_CMD > $LOGS_DIR/quickstart_collect_logs.log || \
echo "WARNING: quickstart collect-logs failed, check quickstart_collectlogs.log for details"

cp $LOGS_DIR/undercloud/var/log/postci.txt.gz $LOGS_DIR/ || true

if [[ -e $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
fi

# Copy tempest.html to root dir
cp $LOGS_DIR/undercloud/home/$USER/tempest/tempest.html.gz ${LOGS_DIR} || true

# Copy tempest and .testrepository directory to /opt/stack/new/tempest and
# unzip
sudo mkdir -p /opt/stack/new
sudo cp -Rf $LOGS_DIR/undercloud/home/$USER/tempest /opt/stack/new || true
sudo gzip -d -r /opt/stack/new/tempest/.testrepository || true

# record the size of the logs directory
# -L, --dereference dereference all symbolic links
# Note: tail -n +1 is to prevent the error 'Broken Pipe' e.g. 'sort: write failed: standard output: Broken pipe'

du -L -ch $LOGS_DIR/* | tail -n +1 | sort -rh | head -n 200 &> $LOGS_DIR/log-size.txt || true
EOF
cat > $LOGS_DIR/collect_logs.sh <<END
#!/bin/bash
set -x

export NODEPOOL_PROVIDER=${NODEPOOL_PROVIDER:-''}
export STATS_TESTENV=${STATS_TESTENV:-''}
export STATS_OOOQ=${STATS_OOOQ:-''}
export ZUUL_PIPELINE=${ZUUL_PIPELINE:-''}
export LOCAL_WORKING_DIR="$WORKSPACE/.quickstart"
export OPT_WORKDIR=$LOCAL_WORKING_DIR
export WORKING_DIR="$HOME"
export LOGS_DIR=$WORKSPACE/logs
export VIRTUAL_ENV_DISABLE_PROMPT=1
export ANSIBLE_CONFIG=$OOOQ_DIR/ansible.cfg
export ARA_DATABASE=sqlite:///${LOCAL_WORKING_DIR}/ara.sqlite
export ZUUL_CHANGES=${ZUUL_CHANGES:-''}
export NODES_FILE=${NODES_FILE:-''}
export TOCI_JOBTYPE=$TOCI_JOBTYPE
export STABLE_RELEASE=${STABLE_RELEASE:-''}
export QUICKSTART_RELEASE=${QUICKSTART_RELEASE:-''}

set +u
source $LOCAL_WORKING_DIR/bin/activate
set -u
source $OOOQ_DIR/ansible_ssh_env.sh

sudo unbound-control dump_cache > /tmp/dns_cache.txt
sudo chown {{ ansible_user_id }}: /tmp/dns_cache.txt
cat /tmp/dns_cache.txt | gzip - > $LOGS_DIR/dns_cache.txt.gz

mkdir -p $LOGS_DIR/quickstart_files
find $LOCAL_WORKING_DIR -maxdepth 1 -type f -not -name "*sqlite" | while read i; do gzip -cf \$i > $LOGS_DIR/quickstart_files/\$(basename \$i).txt.gz; done

$QUICKSTART_COLLECTLOGS_CMD > $LOGS_DIR/quickstart_collect_logs.log || \
echo "WARNING: quickstart collect-logs failed, check quickstart_collectlogs.log for details"

cp $LOGS_DIR/undercloud/var/log/postci.txt.gz $LOGS_DIR/ || true

if [[ -e $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/tempest/testrepository.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/pingtest.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
elif [[ -e $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ]]; then
cp $LOGS_DIR/undercloud/home/$USER/undercloud_sanity.subunit.gz ${LOGS_DIR}/testrepository.subunit.gz
fi

# Copy tempest.html to root dir
cp $LOGS_DIR/undercloud/home/$USER/tempest/tempest.html.gz ${LOGS_DIR} || true

# Copy tempest and .testrepository directory to /opt/stack/new/tempest and
# unzip
sudo mkdir -p /opt/stack/new
sudo cp -Rf $LOGS_DIR/undercloud/home/$USER/tempest /opt/stack/new || true
sudo gzip -d -r /opt/stack/new/tempest/.testrepository || true

# record the size of the logs directory
# -L, --dereference dereference all symbolic links
# Note: tail -n +1 is to prevent the error 'Broken Pipe' e.g. 'sort: write failed: standard output: Broken pipe'

du -L -ch $LOGS_DIR/* | tail -n +1 | sort -rh | head -n 200 &> $LOGS_DIR/log-size.txt || true
END

}


+ 1
- 2
playbooks/tripleo-ci/templates/toci_gate_test.sh.j2 View File

@@ -62,7 +62,6 @@ export NODECOUNT=0
export OVERCLOUD_HOSTS=
export CONTROLLER_HOSTS=
export SUBNODES_SSH_KEY=
TIMEOUT_SECS=$((DEVSTACK_GATE_TIMEOUT*60))
export EXTRA_VARS=${EXTRA_VARS:-""}
export VXLAN_VARS=${VXLAN_VARS:-""}
export EXTRANODE=""
@@ -170,7 +169,7 @@ if [ -z "${TE_DATAFILE:-}" -a "{{ environment_infra }}" = "ovb" ] ; then
NETISO_ENV="multi-nic"

# provision env in rh cloud, then start quickstart
./testenv-client -b $GEARDSERVER:4730 -t $TIMEOUT_SECS \
./testenv-client -b $GEARDSERVER:4730 -t {{ zuul.timeout }} \
--envsize $NODECOUNT --ucinstance $UCINSTANCEID \
--net-iso $NETISO_ENV $EXTRANODE -- ./toci_quickstart.sh
elif [ "{{ environment_infra }}" = "ovb" ] ; then

+ 6
- 31
playbooks/tripleo-ci/templates/toci_quickstart.sh.j2 View File

@@ -141,51 +141,26 @@ done
{% else %}
## LOGS COLLECTION PREPARE
create_collect_logs_script
for playbook in {{ " ".join(playbooks) }}; do
echo "${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}"
run_with_timeout $START_JOB_TIME $QUICKSTART_INSTALL_CMD \
"${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}" \
for playbook in {{ " ".join(playbooks) }}; do
echo ${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG}
$QUICKSTART_INSTALL_CMD \
${RELEASE_ARGS[$playbook]:=$QUICKSTART_DEFAULT_RELEASE_ARG} \
{{ nodes_args }} \
{{ featureset_conf }} \
{{ env_vars }} \
{{ extra_vars }} \
{{ vxlan_vars }} \
$DEFAULT_ARGS \
--extra-vars ci_job_end_time=$(( START_JOB_TIME + REMAINING_TIME*60 )) \
--extra-vars @{{ workspace }}/logs/zuul-variables.yaml \
$LOCAL_WORKING_DIR/playbooks/$playbook "${PLAYBOOKS_ARGS[$playbook]:-}" \
$LOCAL_WORKING_DIR/playbooks/$playbook ${PLAYBOOKS_ARGS[$playbook]:-} \
2>&1 | tee -a $LOGS_DIR/quickstart_install.log && exit_value=0 || exit_value=$?

# Print status of playbook run
[[ "$exit_value" == 0 ]] && echo "Playbook run of $playbook passed successfully"
[[ "$exit_value" != 0 ]] && echo "Playbook run of $playbook failed" && break
done
done
[[ "$exit_value" == 0 ]] && echo "Playbook run passed successfully" || echo "Playbook run failed"

## LOGS COLLECTION RUN (if applicable)
if [[ "${NODEPOOL_PROVIDER:-''}" == "rdo-cloud-tripleo" ]] || [[ "${NODEPOOL_PROVIDER:-''}" == "tripleo-test-cloud-rh1" ]]; then
if [[ "$TOCI_JOBTYPE" =~ "ovb" ]]; then
bash $LOGS_DIR/collect_logs.sh
# rename script to not to run it in multinode jobs
mv $LOGS_DIR/collect_logs.sh $LOGS_DIR/ovb_collect_logs.sh
fi
fi

{% endif %}

popd

sudo unbound-control dump_cache > /tmp/dns_cache.txt
sudo chown {{ ansible_user_id }}: /tmp/dns_cache.txt
cat /tmp/dns_cache.txt | gzip - > $LOGS_DIR/dns_cache.txt.gz

{% if periodic|bool %}
if [[ -e {{ workspace }}/hash_info.sh ]] ; then
echo export JOB_EXIT_VALUE=$exit_value >> {{ workspace }}/hash_info.sh
fi
{% endif %}

mkdir -p $LOGS_DIR/quickstart_files
find $LOCAL_WORKING_DIR -maxdepth 1 -type f -not -name "*sqlite" | while read i; do gzip -cf $i > $LOGS_DIR/quickstart_files/$(basename $i).txt.gz; done
echo 'Quickstart completed.'
exit $exit_value

Loading…
Cancel
Save