integ/tools/collector/scripts/collect_utils
Eric MacDonald adda44b54e Make platform.conf variables visible to collect.d service scripts
A number of collect.d service scripts are not gathering
all the info that is needed during the collect operation.

Many collect.d service scripts use nodetype, subfunction,
vswitch_type and other platform.conf variables to determine
what to collect.

However, these variables are seen to be empty causing the
service scripts to make incorrect collection decisions.

This update modifies the scoping of loaded platform.conf
variables so that they are visible to all of collect,
including the collect.d service scripts.

Test Plan:

PASS: Verify platform.conf variables are visible to collect.d service scripts.
PASS: Verify file list before and after update.
PASS: Run collect all and verify missing content present.
PASS: Run collect host and verify missing content present.

Change-Id: Ia6f45afd3f3fc676b0d3eaca16dc8bbe68356c8e
Closes-Bug: 1839389
2019-08-13 13:47:23 -04:00

238 lines
6.1 KiB
Bash
Executable File

#! /bin/bash
#
# Copyright (c) 2013-2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
##########################################################################################
DEBUG=false
# Fail Codes
PASS=0
FAIL=1
RETRY=2
FAIL_NODETYPE=3
FAIL_TIMEOUT=10
FAIL_TIMEOUT1=11
FAIL_TIMEOUT2=12
FAIL_TIMEOUT3=13
FAIL_TIMEOUT4=14
FAIL_TIMEOUT5=15
FAIL_TIMEOUT6=16
FAIL_TIMEOUT7=17
FAIL_TIMEOUT8=18
FAIL_TIMEOUT9=19
FAIL_PASSWORD=30
FAIL_PERMISSION=31
FAIL_CLEANUP=32
FAIL_UNREACHABLE=33
FAIL_HOSTNAME=34
FAIL_INACTIVE=35
FAIL_PERMISSION_SKIP=36
FAIL_OUT_OF_SPACE=37
FAIL_INSUFFICIENT_SPACE=38
FAIL_OUT_OF_SPACE_LOCAL=39
FAIL_CREATE=39
# Warnings are above 200
WARN_WARNING=200
WARN_HOSTNAME=201
# Failure Strings
FAIL_OUT_OF_SPACE_STR="No space left on device"
FAIL_TAR_OUT_OF_SPACE_STR="tar: Error is not recoverable"
FAIL_INSUFFICIENT_SPACE_STR="Not enough space on device"
# The minimum amount of % free space on /scratch to allow collect to proceed
MIN_PERCENT_SPACE_REQUIRED=75
# Log file path/names
COLLECT_LOG=/var/log/collect.log
COLLECT_ERROR_LOG=/tmp/collect_error.log
function source_openrc_if_needed
{
# get the node and subfunction types
nodetype=""
subfunction=""
PLATFORM_CONF=/etc/platform/platform.conf
if [ -e ${PLATFORM_CONF} ] ; then
source ${PLATFORM_CONF}
fi
if [ "${nodetype}" != "controller" -a "${nodetype}" != "worker" -a "${nodetype}" != "storage" ] ; then
logger -t ${COLLECT_TAG} "could not identify nodetype ($nodetype)"
exit $FAIL_NODETYPE
fi
ACTIVE=false
if [ "$nodetype" == "controller" ] ; then
# get local host activity state
OPENRC="/etc/platform/openrc"
if [ -e "${OPENRC}" ] ; then
OS_USERNAME=""
source ${OPENRC}
if [ "${OS_USERNAME}" != "" ] ; then
ACTIVE=true
fi
fi
fi
}
# Setup an expect command completion file.
# This is used to force serialization of expect
# sequences and highlight command completion
collect_done="collect done"
cmd_done_sig="expect done"
cmd_done_file="/usr/local/sbin/expect_done"
# Compression Commands
TAR_ZIP_CMD="tar -cvzf"
TAR_UZIP_CMD="tar -xvzf"
TAR_CMD="tar -cvhf"
UNTAR_CMD="tar -xvf"
ZIP_CMD="gzip"
NICE_CMD="/usr/bin/nice -n19"
IONICE_CMD="/usr/bin/ionice -c2 -n7"
COLLECT_TAG="COLLECT"
STARTDATE_OPTION="--start-date"
ENDDATE_OPTION="--end-date"
PROCESS_DETAIL_CMD="ps -e -H -o ruser,tid,pid,ppid,flags,stat,policy,rtprio,nice,priority,rss:10,vsz:10,sz:10,psr,stime,tty,cputime,wchan:14,cmd"
BUILD_INFO_CMD="cat /etc/build.info"
################################################################################
# Log Debug, Info or Error log message to syslog
################################################################################
function log
{
logger -t ${COLLECT_TAG} $@
}
function ilog
{
echo "$@"
logger -t ${COLLECT_TAG} $@
#logger -p local3.info -t ${COLLECT_TAG} $@
}
function elog
{
echo "Error: $@"
logger -t ${COLLECT_TAG} $@
}
function wlog
{
echo "Warning: $@"
logger -t ${COLLECT_TAG} $@
}
function set_debug_mode()
{
DEBUG=${1}
}
function dlog()
{
if [ "$DEBUG" == true ] ; then
logger -t ${COLLECT_TAG} $@
echo "Debug: $@"
fi
}
function delimiter()
{
echo "--------------------------------------------------------------------" >> ${1} 2>>${COLLECT_ERROR_LOG}
echo "`date` : ${myhostname} : ${2}" >> ${1} 2>>${COLLECT_ERROR_LOG}
echo "--------------------------------------------------------------------" >> ${1} 2>>${COLLECT_ERROR_LOG}
}
function log_slabinfo()
{
PAGE_SIZE=$(getconf PAGE_SIZE)
cat /proc/slabinfo | awk -v page_size_B=${PAGE_SIZE} '
BEGIN {page_KiB = page_size_B/1024; TOT_KiB = 0;}
(NF == 17) {
gsub(/[<>]/, "");
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8s\n",
$2, $3, $4, $5, $6, $7, $8, $10, $11, $12, $13, $15, $16, $17, "KiB");
}
(NF == 16) {
num_objs=$3; obj_per_slab=$5; pages_per_slab=$6;
KiB = (obj_per_slab > 0) ? page_KiB*num_objs/obj_per_slab*pages_per_slab : 0;
TOT_KiB += KiB;
printf("%-22s %11d %8d %8d %10d %12d %1s %5d %10d %12d %1s %12d %9d %11d %8d\n",
$1, $2, $3, $4, $5, $6, $7, $9, $10, $11, $12, $14, $15, $16, KiB);
}
END {
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8d\n",
"TOTAL", "-", "-", "-", "-", "-", ":", "-", "-", "-", ":", "-", "-", "-", TOT_KiB);
}
' >> ${1} 2>>${COLLECT_ERROR_LOG}
}
###########################################################################
#
# Name : collect_errors
#
# Description: search COLLECT_ERROR_LOG for "No space left on device" logs
# Return 0 if no such logs are found.
# Return 1 if such logs are found
#
# Assumptions: Caller should assume a non-zero return as an indication of
# a corrupt or incomplete collect log
#
# Create logs and screen echos that record the error for the user.
#
# May look for other errors in the future
#
###########################################################################
listOfOutOfSpaceErrors=(
"${FAIL_OUT_OF_SPACE_STR}"
"${FAIL_TAR_OUT_OF_SPACE_STR}"
"${FAIL_INSUFFICIENT_SPACE_STR}"
)
function collect_errors()
{
local host=${1}
local RC=0
if [ -e "${COLLECT_ERROR_LOG}" ] ; then
## now loop through known space related error strings
index=0
while [ "x${listOfOutOfSpaceErrors[index]}" != "x" ]
do
grep -q "${listOfOutOfSpaceErrors[index]}" ${COLLECT_ERROR_LOG}
if [ "$?" == "0" ] ; then
string="failed to collect from ${host} (reason:${FAIL_OUT_OF_SPACE}:${FAIL_OUT_OF_SPACE_STR})"
# /var/log/user.log it
logger -t ${COLLECT_TAG} "${string}"
# logs that show up in the foreground
echo "${string}"
echo "Increase available space in ${host}:${COLLECT_BASE_DIR} and retry operation."
# return error code
RC=1
break
fi
index=$(($index+1))
done
fi
return ${RC}
}