#! /bin/bash # # Copyright (c) 2013-2014 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # ########################################################################################## DEBUG=false # Fail Codes PASS=0 FAIL=1 RETRY=2 FAIL_NODETYPE=3 FAIL_TIMEOUT=10 FAIL_TIMEOUT1=11 FAIL_TIMEOUT2=12 FAIL_TIMEOUT3=13 FAIL_TIMEOUT4=14 FAIL_TIMEOUT5=15 FAIL_TIMEOUT6=16 FAIL_TIMEOUT7=17 FAIL_TIMEOUT8=18 FAIL_TIMEOUT9=19 FAIL_PASSWORD=30 FAIL_PERMISSION=31 FAIL_CLEANUP=32 FAIL_UNREACHABLE=33 FAIL_HOSTNAME=34 FAIL_INACTIVE=35 FAIL_PERMISSION_SKIP=36 FAIL_OUT_OF_SPACE=37 FAIL_INSUFFICIENT_SPACE=38 FAIL_OUT_OF_SPACE_LOCAL=39 FAIL_CREATE=39 # Warnings are above 200 WARN_WARNING=200 WARN_HOSTNAME=201 # Failure Strings FAIL_OUT_OF_SPACE_STR="No space left on device" FAIL_TAR_OUT_OF_SPACE_STR="tar: Error is not recoverable" FAIL_INSUFFICIENT_SPACE_STR="Not enough space on device" # The minimum amount of % free space on /scratch to allow collect to proceed MIN_PERCENT_SPACE_REQUIRED=75 # Log file path/names COLLECT_LOG=/var/log/collect.log COLLECT_ERROR_LOG=/tmp/collect_error.log # Load up the nova openrc file if its possible KEYRING_PATH="/opt/platform/.keyring" if [ -e ${KEYRING_PATH} ] ; then CRED=`find /opt/platform/.keyring -name .CREDENTIAL` if [ ! -z "${CRED}" ] ; then NOVAOPENRC="/etc/nova/openrc" if [ -e ${NOVAOPENRC} ] ; then ACTIVE=true source ${NOVAOPENRC} 2>/dev/null 1>/dev/null fi fi fi # get the node and subfunction types nodetype="" subfunction="" PLATFORM_CONF=/etc/platform/platform.conf if [ -e ${PLATFORM_CONF} ] ; then source ${PLATFORM_CONF} fi if [ "${nodetype}" != "controller" -a "${nodetype}" != "worker" -a "${nodetype}" != "storage" ] ; then logger -t ${COLLECT_TAG} "could not identify nodetype ($nodetype)" exit $FAIL_NODETYPE fi NODETYPE=$nodetype SUBFUNCTION=$subfunction # Setup an expect command completion file. # This is used to force serialization of expect # sequences and highlight command completion collect_done="collect done" cmd_done_sig="expect done" cmd_done_file="/usr/local/sbin/expect_done" # Compression Commands TAR_ZIP_CMD="tar -cvzf" TAR_UZIP_CMD="tar -xvzf" TAR_CMD="tar -cvhf" UNTAR_CMD="tar -xvf" ZIP_CMD="gzip" NICE_CMD="/usr/bin/nice -n19" IONICE_CMD="/usr/bin/ionice -c2 -n7" COLLECT_TAG="COLLECT" STARTDATE_OPTION="--start-date" ENDDATE_OPTION="--end-date" PROCESS_DETAIL_CMD="ps -e -H -o ruser,tid,pid,ppid,flags,stat,policy,rtprio,nice,priority,rss:10,vsz:10,sz:10,psr,stime,tty,cputime,wchan:14,cmd" BUILD_INFO_CMD="cat /etc/build.info" ################################################################################ # Log Debug, Info or Error log message to syslog ################################################################################ function log { logger -t ${COLLECT_TAG} $@ } function ilog { echo "$@" logger -t ${COLLECT_TAG} $@ #logger -p local3.info -t ${COLLECT_TAG} $@ } function elog { echo "Error: $@" logger -t ${COLLECT_TAG} $@ } function wlog { echo "Warning: $@" logger -t ${COLLECT_TAG} $@ } function set_debug_mode() { DEBUG=${1} } function dlog() { if [ "$DEBUG" == true ] ; then logger -t ${COLLECT_TAG} $@ echo "Debug: $@" fi } function delimiter() { echo "--------------------------------------------------------------------" >> ${1} 2>>${COLLECT_ERROR_LOG} echo "`date` : ${myhostname} : ${2}" >> ${1} 2>>${COLLECT_ERROR_LOG} echo "--------------------------------------------------------------------" >> ${1} 2>>${COLLECT_ERROR_LOG} } function log_slabinfo() { PAGE_SIZE=$(getconf PAGE_SIZE) cat /proc/slabinfo | awk -v page_size_B=${PAGE_SIZE} ' BEGIN {page_KiB = page_size_B/1024; TOT_KiB = 0;} (NF == 17) { gsub(/[<>]/, ""); printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8s\n", $2, $3, $4, $5, $6, $7, $8, $10, $11, $12, $13, $15, $16, $17, "KiB"); } (NF == 16) { num_objs=$3; obj_per_slab=$5; pages_per_slab=$6; KiB = (obj_per_slab > 0) ? page_KiB*num_objs/obj_per_slab*pages_per_slab : 0; TOT_KiB += KiB; printf("%-22s %11d %8d %8d %10d %12d %1s %5d %10d %12d %1s %12d %9d %11d %8d\n", $1, $2, $3, $4, $5, $6, $7, $9, $10, $11, $12, $14, $15, $16, KiB); } END { printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8d\n", "TOTAL", "-", "-", "-", "-", "-", ":", "-", "-", "-", ":", "-", "-", "-", TOT_KiB); } ' >> ${1} 2>>${COLLECT_ERROR_LOG} } ########################################################################### # # Name : collect_errors # # Description: search COLLECT_ERROR_LOG for "No space left on device" logs # Return 0 if no such logs are found. # Return 1 if such logs are found # # Assumptions: Caller should assume a non-zero return as an indication of # a corrupt or incomplete collect log # # Create logs and screen echos that record the error for the user. # # May look for other errors in the future # ########################################################################### listOfOutOfSpaceErrors=( "${FAIL_OUT_OF_SPACE_STR}" "${FAIL_TAR_OUT_OF_SPACE_STR}" "${FAIL_INSUFFICIENT_SPACE_STR}" ) function collect_errors() { local host=${1} local RC=0 if [ -e "${COLLECT_ERROR_LOG}" ] ; then ## now loop through known space related error strings index=0 while [ "x${listOfOutOfSpaceErrors[index]}" != "x" ] do grep -q "${listOfOutOfSpaceErrors[index]}" ${COLLECT_ERROR_LOG} if [ "$?" == "0" ] ; then string="failed to collect from ${host} (reason:${FAIL_OUT_OF_SPACE}:${FAIL_OUT_OF_SPACE_STR})" # /var/log/user.log it logger -t ${COLLECT_TAG} "${string}" # logs that show up in the foreground echo "${string}" echo "Increase available space in ${host}:${COLLECT_BASE_DIR} and retry operation." # return error code RC=1 break fi index=$(($index+1)) done fi return ${RC} }