a0478d194e
Currently we prepare an available secondary disk, but do not
use it. The preparation taking place now was useful when the
backing file system for LXC was 'dir', but it is now machinectl
which uses a btrfs partition instead.
This patch implements the preparation of a btrfs sparse file
if there is no secondary disk, or the preparation of a secondary
btrfs disk if there is one.
The user_variables override for lxc_host_machine_volume_size
is removed as the default value in the role is perfectly adequate
for AIO deployment needs.
We also add some diagnostic information to help troubleshoot
issues if they arise.
Change-Id: Icadbcfd420bbe7c14a04606790cab09a1dce03ce
(cherry picked from commit 882d98c94b
)
400 lines
14 KiB
Bash
Executable File
400 lines
14 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# Copyright 2014, Rackspace US, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
## Vars ----------------------------------------------------------------------
|
|
LINE='----------------------------------------------------------------------'
|
|
MAX_RETRIES=${MAX_RETRIES:-5}
|
|
ANSIBLE_PARAMETERS=${ANSIBLE_PARAMETERS:-""}
|
|
STARTTIME="${STARTTIME:-$(date +%s)}"
|
|
COMMAND_LOGS=${COMMAND_LOGS:-"/openstack/log/ansible_cmd_logs"}
|
|
|
|
ZUUL_PROJECT="${ZUUL_PROJECT:-}"
|
|
GATE_EXIT_LOG_COPY="${GATE_EXIT_LOG_COPY:-false}"
|
|
GATE_EXIT_LOG_GZIP="${GATE_EXIT_LOG_GZIP:-true}"
|
|
GATE_EXIT_RUN_ARA="${GATE_EXIT_RUN_ARA:-true}"
|
|
GATE_EXIT_RUN_DSTAT="${GATE_EXIT_RUN_DSTAT:-true}"
|
|
# If this is a gate node from OpenStack-Infra Store all logs into the
|
|
# execution directory after gate run.
|
|
if [[ -n "$ZUUL_PROJECT" ]]; then
|
|
GATE_EXIT_LOG_COPY=true
|
|
fi
|
|
|
|
|
|
# The default SSHD configuration has MaxSessions = 10. If a deployer changes
|
|
# their SSHD config, then the ANSIBLE_FORKS may be set to a higher number. We
|
|
# set the value to 10 or the number of CPU's, whichever is less. This is to
|
|
# balance between performance gains from the higher number, and CPU
|
|
# consumption. If ANSIBLE_FORKS is already set to a value, then we leave it
|
|
# alone.
|
|
# ref: https://bugs.launchpad.net/openstack-ansible/+bug/1479812
|
|
if [ -z "${ANSIBLE_FORKS:-}" ]; then
|
|
CPU_NUM=$(grep -c ^processor /proc/cpuinfo)
|
|
if [ ${CPU_NUM} -lt "10" ]; then
|
|
ANSIBLE_FORKS=${CPU_NUM}
|
|
else
|
|
ANSIBLE_FORKS=10
|
|
fi
|
|
fi
|
|
|
|
|
|
## Functions -----------------------------------------------------------------
|
|
# Determine the distribution we are running on, so that we can configure it
|
|
# appropriately.
|
|
function determine_distro {
|
|
source /etc/os-release 2>/dev/null
|
|
export DISTRO_ID="${ID}"
|
|
export DISTRO_NAME="${NAME}"
|
|
export DISTRO_VERSION_ID="${VERSION_ID}"
|
|
}
|
|
|
|
function ssh_key_create {
|
|
# Ensure that the ssh key exists and is an authorized_key
|
|
key_path="${HOME}/.ssh"
|
|
key_file="${key_path}/id_rsa"
|
|
|
|
# Ensure that the .ssh directory exists and has the right mode
|
|
if [ ! -d ${key_path} ]; then
|
|
mkdir -p ${key_path}
|
|
chmod 700 ${key_path}
|
|
fi
|
|
|
|
# Ensure a full keypair exists
|
|
if [ ! -f "${key_file}" -o ! -f "${key_file}.pub" ]; then
|
|
|
|
# Regenrate public key if private key exists
|
|
if [ -f "${key_file}" ]; then
|
|
ssh-keygen -f ${key_file} -y > ${key_file}.pub
|
|
fi
|
|
|
|
# Delete public key if private key missing
|
|
if [ ! -f "${key_file}" ]; then
|
|
rm -f ${key_file}.pub
|
|
fi
|
|
|
|
# Regenerate keypair if both keys missing
|
|
if [ ! -f "${key_file}" -a ! -f "${key_file}.pub" ]; then
|
|
ssh-keygen -t rsa -f ${key_file} -N ''
|
|
fi
|
|
|
|
fi
|
|
|
|
# Ensure that the public key is included in the authorized_keys
|
|
# for the default root directory and the current home directory
|
|
key_content=$(cat "${key_file}.pub")
|
|
if ! grep -q "${key_content}" ${key_path}/authorized_keys; then
|
|
echo "${key_content}" | tee -a ${key_path}/authorized_keys
|
|
fi
|
|
}
|
|
|
|
function exit_state {
|
|
set +x
|
|
TOTALSECONDS="$(( $(date +%s) - STARTTIME ))"
|
|
info_block "Run Time = ${TOTALSECONDS} seconds || $((TOTALSECONDS / 60)) minutes"
|
|
if [ "${1}" == 0 ];then
|
|
info_block "Status: Success"
|
|
else
|
|
info_block "Status: Failure"
|
|
fi
|
|
exit ${1}
|
|
}
|
|
|
|
function exit_success {
|
|
set +x
|
|
exit_state 0
|
|
}
|
|
|
|
function exit_fail {
|
|
set +x
|
|
log_instance_info
|
|
info_block "Error Info - $@"
|
|
exit_state 1
|
|
}
|
|
|
|
function gate_job_exit_tasks {
|
|
# This environment variable captures the exit code
|
|
# which was present when the trap was initiated.
|
|
# This would be the success/failure of the test.
|
|
TEST_EXIT_CODE=${TEST_EXIT_CODE:-$?}
|
|
|
|
# Specify a default location to capture logs into,
|
|
# just in case one is not provided (eg: when not run
|
|
# by zuul).
|
|
GATE_LOG_DIR=${GATE_LOG_DIR:-/opt/openstack-ansible/logs}
|
|
|
|
# If this is a gate node from OpenStack-Infra Store all logs into the
|
|
# execution directory after gate run.
|
|
if [ "$GATE_EXIT_LOG_COPY" == true ]; then
|
|
if [ "$GATE_EXIT_RUN_DSTAT" == true ]; then
|
|
generate_dstat_charts || true
|
|
fi
|
|
|
|
mkdir -p "${GATE_LOG_DIR}/host" "${GATE_LOG_DIR}/openstack"
|
|
RSYNC_OPTS="--archive --safe-links --ignore-errors --quiet --no-perms --no-owner --no-group"
|
|
rsync $RSYNC_OPTS /var/log/ "${GATE_LOG_DIR}/host" || true
|
|
rsync $RSYNC_OPTS /openstack/log/ "${GATE_LOG_DIR}/openstack" || true
|
|
# Rename all files gathered to have a .txt suffix so that the compressed
|
|
# files are viewable via a web browser in OpenStack-CI.
|
|
# except tempest results testrepository.subunit and testr_results.html
|
|
find "${GATE_LOG_DIR}/" -type f -not -name "testrepository.subunit" -not -name "testr_results.html" -not -name "dstat.html" -exec mv {} {}.txt \;
|
|
|
|
# Generate the ARA report if enabled
|
|
if [ "$GATE_EXIT_RUN_ARA" == true ]; then
|
|
|
|
# Define the ARA path for reusability
|
|
ARA_CMD="/opt/ansible-runtime/bin/ara"
|
|
|
|
# Create the ARA log directory and store the sqlite source database
|
|
mkdir ${GATE_LOG_DIR}/ara
|
|
rsync $RSYNC_OPTS "${HOME}/.ara/ansible.sqlite" "${GATE_LOG_DIR}/ara/"
|
|
|
|
# In order to reduce the quantity of unnecessary log content
|
|
# being kept in OpenStack-Infra we only generate the ARA report
|
|
# when the test result is a failure.
|
|
if [[ "${TEST_EXIT_CODE}" != "0" ]] && [[ "${TEST_EXIT_CODE}" != "true" ]]; then
|
|
echo "Generating ARA report due to non-zero exit code (${TEST_EXIT_CODE})."
|
|
${ARA_CMD} generate html "${GATE_LOG_DIR}/ara" || true
|
|
else
|
|
echo "Not generating ARA report due to test pass."
|
|
fi
|
|
# We still want the subunit report though, as that reflects
|
|
# success/failure in OpenStack Health
|
|
${ARA_CMD} generate subunit "${GATE_LOG_DIR}/ara/testrepository.subunit" || true
|
|
fi
|
|
# Compress the files gathered so that they do not take up too much space.
|
|
# We use 'command' to ensure that we're not executing with some sort of alias.
|
|
if [ "$GATE_EXIT_LOG_GZIP" == true ]; then
|
|
command gzip --best --recursive "${GATE_LOG_DIR}/"
|
|
fi
|
|
# Ensure that the files are readable by all users, including the non-root
|
|
# OpenStack-CI jenkins user.
|
|
chmod -R ugo+rX "${GATE_LOG_DIR}"
|
|
chown -R $(whoami) "${GATE_LOG_DIR}"
|
|
fi
|
|
}
|
|
|
|
function run_dstat {
|
|
if [ "$GATE_EXIT_RUN_DSTAT" == true ]; then
|
|
case ${DISTRO_ID} in
|
|
centos|rhel)
|
|
# Prefer dnf over yum for CentOS.
|
|
which dnf &>/dev/null && RHT_PKG_MGR='dnf' || RHT_PKG_MGR='yum'
|
|
$RHT_PKG_MGR -y install dstat
|
|
;;
|
|
ubuntu)
|
|
apt-get update
|
|
DEBIAN_FRONTEND=noninteractive apt-get -y install dstat
|
|
;;
|
|
opensuse)
|
|
zypper -n install -l dstat
|
|
;;
|
|
esac
|
|
|
|
# https://stackoverflow.com/a/20338327 executing in ()& decouples the dstat
|
|
# process from scripts-library to prevent hung builds if dstat fails to exit
|
|
# for any reason.
|
|
(dstat -tcmsdn --top-cpu --top-mem --top-bio --nocolor --output /openstack/log/instance-info/dstat.csv \
|
|
< /dev/null > /openstack/log/instance-info/dstat.log 2>&1 &)
|
|
fi
|
|
}
|
|
|
|
function generate_dstat_charts {
|
|
kill $(pgrep -f dstat)
|
|
if [[ ! -d /opt/dstat_graph ]]; then
|
|
git clone https://github.com/Dabz/dstat_graph /opt/dstat_graph
|
|
fi
|
|
pushd /opt/dstat_graph
|
|
/usr/bin/env bash -e ./generate_page.sh /openstack/log/instance-info/dstat.csv >> /openstack/log/instance-info/dstat.html
|
|
popd
|
|
}
|
|
|
|
function print_info {
|
|
PROC_NAME="- [ $@ ] -"
|
|
printf "\n%s%s\n" "$PROC_NAME" "${LINE:${#PROC_NAME}}"
|
|
}
|
|
|
|
function info_block {
|
|
echo "${LINE}"
|
|
print_info "$@"
|
|
echo "${LINE}"
|
|
}
|
|
|
|
function log_instance_info {
|
|
set +x
|
|
# Get host information post initial setup and reset verbosity
|
|
if [ ! -d "/openstack/log/instance-info" ];then
|
|
mkdir -p "/openstack/log/instance-info"
|
|
fi
|
|
get_instance_info
|
|
set -x
|
|
}
|
|
|
|
function get_repos_info {
|
|
for i in /etc/apt/sources.list /etc/apt/sources.list.d/* /etc/yum.conf /etc/yum.repos.d/* /etc/zypp/repos.d/*; do
|
|
if [ -f "${i}" ]; then
|
|
echo -e "\n$i"
|
|
cat $i
|
|
fi
|
|
done
|
|
}
|
|
|
|
# Get instance info
|
|
function get_instance_info {
|
|
TS="$(date +"%H-%M-%S")"
|
|
(cat /etc/resolv.conf && \
|
|
which systemd-resolve &> /dev/null && \
|
|
systemd-resolve --statistics && \
|
|
cat /etc/systemd/resolved.conf) > \
|
|
"/openstack/log/instance-info/host_dns_info_${TS}.log" || true
|
|
if [ "$(which tracepath)" ]; then
|
|
{ tracepath "8.8.8.8" -m 5 2>/dev/null || tracepath "8.8.8.8"; } > \
|
|
"/openstack/log/instance-info/host_tracepath_info_${TS}.log" || true
|
|
fi
|
|
if [ "$(which tracepath6)" ]; then
|
|
{ tracepath6 "2001:4860:4860::8888" -m 5 2>/dev/null || tracepath6 "2001:4860:4860::8888"; } >> \
|
|
"/openstack/log/instance-info/host_tracepath_info_${TS}.log" || true
|
|
fi
|
|
if [ "$(which lxc-ls)" ]; then
|
|
lxc-ls --fancy > \
|
|
"/openstack/log/instance-info/host_lxc_container_info_${TS}.log" || true
|
|
fi
|
|
if [ "$(which lxc-checkconfig)" ]; then
|
|
lxc-checkconfig > \
|
|
"/openstack/log/instance-info/host_lxc_config_info_${TS}.log" || true
|
|
fi
|
|
(iptables -vnL && iptables -t nat -vnL && iptables -t mangle -vnL) > \
|
|
"/openstack/log/instance-info/host_firewall_info_${TS}.log" || true
|
|
if [ "$(which ansible)" ]; then
|
|
ANSIBLE_HOST_KEY_CHECKING=False \
|
|
ansible -i "localhost," localhost -m setup > \
|
|
"/openstack/log/instance-info/host_system_info_${TS}.log" || true
|
|
fi
|
|
get_repos_info > \
|
|
"/openstack/log/instance-info/host_repo_info_${TS}.log" || true
|
|
|
|
determine_distro
|
|
case ${DISTRO_ID} in
|
|
centos|rhel|fedora|opensuse)
|
|
rpm -qa > \
|
|
"/openstack/log/instance-info/host_packages_info_${TS}.log" || true
|
|
;;
|
|
ubuntu|debian)
|
|
dpkg-query --list > \
|
|
"/openstack/log/instance-info/host_packages_info_${TS}.log" || true
|
|
;;
|
|
esac
|
|
|
|
# Storage reports
|
|
btrfs filesystem usage /var/lib/machines > \
|
|
"/openstack/log/instance-info/machines_usage_${TS}.log" || true
|
|
btrfs filesystem show /var/lib/machines >> \
|
|
"/openstack/log/instance-info/machines_show_${TS}.log" || true
|
|
btrfs filesystem df /var/lib/machines >> \
|
|
"/openstack/log/instance-info/machines_df_${TS}.log" || true
|
|
btrfs qgroup show --human-readable -pcre --iec /var/lib/machines >> \
|
|
"/openstack/log/instance-info/machines_quotas_${TS}.log" || true
|
|
df -h > "/openstack/log/instance-info/report_fs_df_${TS}.log" || true
|
|
}
|
|
|
|
function get_pip {
|
|
|
|
# The python executable to use when executing get-pip is passed
|
|
# as a parameter to this function.
|
|
GETPIP_PYTHON_EXEC_PATH="${1:-$(which python)}"
|
|
|
|
# Download the get-pip script using the primary or secondary URL
|
|
GETPIP_CMD="curl --silent --show-error --retry 5"
|
|
GETPIP_FILE="/opt/get-pip.py"
|
|
# If GET_PIP_URL is set, then just use it
|
|
if [ -n "${GET_PIP_URL:-}" ]; then
|
|
${GETPIP_CMD} ${GET_PIP_URL} > ${GETPIP_FILE}
|
|
else
|
|
# Otherwise, try the two standard URL's
|
|
${GETPIP_CMD} https://bootstrap.pypa.io/get-pip.py > ${GETPIP_FILE}\
|
|
|| ${GETPIP_CMD} https://raw.githubusercontent.com/pypa/get-pip/master/get-pip.py > ${GETPIP_FILE}
|
|
fi
|
|
|
|
${GETPIP_PYTHON_EXEC_PATH} ${GETPIP_FILE} \
|
|
pip setuptools wheel \
|
|
--constraint global-requirement-pins.txt \
|
|
|| ${GETPIP_PYTHON_EXEC_PATH} ${GETPIP_FILE} \
|
|
pip setuptools wheel \
|
|
--constraint global-requirement-pins.txt \
|
|
--isolated
|
|
}
|
|
|
|
function get_bowling_ball_tests {
|
|
# Retrieve the latest bowling ball test script in case we don't already have it.
|
|
if [ -f scripts/rolling_tests.py ]; then
|
|
return
|
|
fi
|
|
|
|
curl --silent https://raw.githubusercontent.com/openstack/openstack-ansible-ops/master/bowling_ball/rolling_tests.py > scripts/rolling_tests.py
|
|
}
|
|
|
|
function start_bowling_ball_tests {
|
|
# The tests will pull Keystone information from the env vars defined in openrc
|
|
source ~/openrc
|
|
# Get the list of services to test for from the script, so we're not hard coding.
|
|
for SERVICE in $(python ./scripts/rolling_tests.py list | cut -d - -f 1); do
|
|
# Start the scripts in the background and wait between each invocation.
|
|
# Without the wait, they do not all launch.
|
|
python ./scripts/rolling_tests.py $SERVICE &
|
|
sleep 1
|
|
echo "Started $SERVICE test in background"
|
|
done
|
|
}
|
|
|
|
function kill_bowling_ball_tests {
|
|
pkill -f rolling_tests
|
|
}
|
|
|
|
function print_bowling_ball_results {
|
|
grep "failure rate" /var/log/*_rolling.log
|
|
}
|
|
|
|
## Signal traps --------------------------------------------------------------
|
|
# Trap all Death Signals and Errors
|
|
trap "exit_fail ${LINENO} $? 'Received STOP Signal'" SIGHUP SIGINT SIGTERM
|
|
trap "exit_fail ${LINENO} $?" ERR
|
|
|
|
## Pre-flight check ----------------------------------------------------------
|
|
# Make sure only root can run our script
|
|
if [ "$(id -u)" != "0" ]; then
|
|
info_block "This script must be run as root"
|
|
exit_state 1
|
|
fi
|
|
|
|
# Check that we are in the root path of the cloned repo
|
|
if [ ! -d "etc" -a ! -d "scripts" -a ! -d "playbooks" ]; then
|
|
info_block "** ERROR **"
|
|
echo "Please execute this script from the root directory of the cloned source code."
|
|
echo -e "Example: /opt/openstack-ansible/\n"
|
|
exit_state 1
|
|
fi
|
|
|
|
|
|
## Exports -------------------------------------------------------------------
|
|
# Export known paths
|
|
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH}"
|
|
|
|
# Export the home directory just in case it's not set
|
|
export HOME="/root"
|
|
|
|
if [[ -f "/usr/local/bin/openstack-ansible.rc" ]];then
|
|
source "/usr/local/bin/openstack-ansible.rc"
|
|
fi
|