
A number of collect.d service scripts are not gathering all the info that is needed during the collect operation. Many collect.d service scripts use nodetype, subfunction, vswitch_type and other platform.conf variables to determine what to collect. However, these variables are seen to be empty causing the service scripts to make incorrect collection decisions. This update modifies the scoping of loaded platform.conf variables so that they are visible to all of collect, including the collect.d service scripts. Test Plan: PASS: Verify platform.conf variables are visible to collect.d service scripts. PASS: Verify file list before and after update. PASS: Run collect all and verify missing content present. PASS: Run collect host and verify missing content present. Change-Id: Ia6f45afd3f3fc676b0d3eaca16dc8bbe68356c8e Closes-Bug: 1839389
238 lines
6.1 KiB
Bash
Executable File
238 lines
6.1 KiB
Bash
Executable File
#! /bin/bash
|
|
#
|
|
# Copyright (c) 2013-2019 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
##########################################################################################
|
|
|
|
DEBUG=false
|
|
|
|
# Fail Codes
|
|
PASS=0
|
|
FAIL=1
|
|
RETRY=2
|
|
|
|
FAIL_NODETYPE=3
|
|
|
|
FAIL_TIMEOUT=10
|
|
FAIL_TIMEOUT1=11
|
|
FAIL_TIMEOUT2=12
|
|
FAIL_TIMEOUT3=13
|
|
FAIL_TIMEOUT4=14
|
|
FAIL_TIMEOUT5=15
|
|
FAIL_TIMEOUT6=16
|
|
FAIL_TIMEOUT7=17
|
|
FAIL_TIMEOUT8=18
|
|
FAIL_TIMEOUT9=19
|
|
|
|
FAIL_PASSWORD=30
|
|
FAIL_PERMISSION=31
|
|
FAIL_CLEANUP=32
|
|
FAIL_UNREACHABLE=33
|
|
FAIL_HOSTNAME=34
|
|
FAIL_INACTIVE=35
|
|
FAIL_PERMISSION_SKIP=36
|
|
FAIL_OUT_OF_SPACE=37
|
|
FAIL_INSUFFICIENT_SPACE=38
|
|
FAIL_OUT_OF_SPACE_LOCAL=39
|
|
FAIL_CREATE=39
|
|
|
|
# Warnings are above 200
|
|
WARN_WARNING=200
|
|
WARN_HOSTNAME=201
|
|
|
|
# Failure Strings
|
|
FAIL_OUT_OF_SPACE_STR="No space left on device"
|
|
FAIL_TAR_OUT_OF_SPACE_STR="tar: Error is not recoverable"
|
|
FAIL_INSUFFICIENT_SPACE_STR="Not enough space on device"
|
|
|
|
# The minimum amount of % free space on /scratch to allow collect to proceed
|
|
MIN_PERCENT_SPACE_REQUIRED=75
|
|
|
|
# Log file path/names
|
|
COLLECT_LOG=/var/log/collect.log
|
|
COLLECT_ERROR_LOG=/tmp/collect_error.log
|
|
|
|
function source_openrc_if_needed
|
|
{
|
|
# get the node and subfunction types
|
|
nodetype=""
|
|
subfunction=""
|
|
PLATFORM_CONF=/etc/platform/platform.conf
|
|
if [ -e ${PLATFORM_CONF} ] ; then
|
|
source ${PLATFORM_CONF}
|
|
fi
|
|
|
|
if [ "${nodetype}" != "controller" -a "${nodetype}" != "worker" -a "${nodetype}" != "storage" ] ; then
|
|
logger -t ${COLLECT_TAG} "could not identify nodetype ($nodetype)"
|
|
exit $FAIL_NODETYPE
|
|
fi
|
|
|
|
ACTIVE=false
|
|
if [ "$nodetype" == "controller" ] ; then
|
|
# get local host activity state
|
|
OPENRC="/etc/platform/openrc"
|
|
if [ -e "${OPENRC}" ] ; then
|
|
OS_USERNAME=""
|
|
source ${OPENRC}
|
|
if [ "${OS_USERNAME}" != "" ] ; then
|
|
ACTIVE=true
|
|
fi
|
|
fi
|
|
fi
|
|
}
|
|
|
|
|
|
# Setup an expect command completion file.
|
|
# This is used to force serialization of expect
|
|
# sequences and highlight command completion
|
|
collect_done="collect done"
|
|
cmd_done_sig="expect done"
|
|
cmd_done_file="/usr/local/sbin/expect_done"
|
|
|
|
# Compression Commands
|
|
TAR_ZIP_CMD="tar -cvzf"
|
|
TAR_UZIP_CMD="tar -xvzf"
|
|
TAR_CMD="tar -cvhf"
|
|
UNTAR_CMD="tar -xvf"
|
|
ZIP_CMD="gzip"
|
|
NICE_CMD="/usr/bin/nice -n19"
|
|
IONICE_CMD="/usr/bin/ionice -c2 -n7"
|
|
COLLECT_TAG="COLLECT"
|
|
|
|
STARTDATE_OPTION="--start-date"
|
|
ENDDATE_OPTION="--end-date"
|
|
|
|
|
|
PROCESS_DETAIL_CMD="ps -e -H -o ruser,tid,pid,ppid,flags,stat,policy,rtprio,nice,priority,rss:10,vsz:10,sz:10,psr,stime,tty,cputime,wchan:14,cmd"
|
|
BUILD_INFO_CMD="cat /etc/build.info"
|
|
|
|
################################################################################
|
|
# Log Debug, Info or Error log message to syslog
|
|
################################################################################
|
|
function log
|
|
{
|
|
logger -t ${COLLECT_TAG} $@
|
|
}
|
|
|
|
function ilog
|
|
{
|
|
echo "$@"
|
|
logger -t ${COLLECT_TAG} $@
|
|
#logger -p local3.info -t ${COLLECT_TAG} $@
|
|
}
|
|
|
|
function elog
|
|
{
|
|
echo "Error: $@"
|
|
logger -t ${COLLECT_TAG} $@
|
|
}
|
|
|
|
function wlog
|
|
{
|
|
echo "Warning: $@"
|
|
logger -t ${COLLECT_TAG} $@
|
|
}
|
|
|
|
function set_debug_mode()
|
|
{
|
|
DEBUG=${1}
|
|
}
|
|
|
|
function dlog()
|
|
{
|
|
if [ "$DEBUG" == true ] ; then
|
|
logger -t ${COLLECT_TAG} $@
|
|
echo "Debug: $@"
|
|
fi
|
|
}
|
|
|
|
|
|
function delimiter()
|
|
{
|
|
echo "--------------------------------------------------------------------" >> ${1} 2>>${COLLECT_ERROR_LOG}
|
|
echo "`date` : ${myhostname} : ${2}" >> ${1} 2>>${COLLECT_ERROR_LOG}
|
|
echo "--------------------------------------------------------------------" >> ${1} 2>>${COLLECT_ERROR_LOG}
|
|
}
|
|
|
|
function log_slabinfo()
|
|
{
|
|
PAGE_SIZE=$(getconf PAGE_SIZE)
|
|
cat /proc/slabinfo | awk -v page_size_B=${PAGE_SIZE} '
|
|
BEGIN {page_KiB = page_size_B/1024; TOT_KiB = 0;}
|
|
(NF == 17) {
|
|
gsub(/[<>]/, "");
|
|
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8s\n",
|
|
$2, $3, $4, $5, $6, $7, $8, $10, $11, $12, $13, $15, $16, $17, "KiB");
|
|
}
|
|
(NF == 16) {
|
|
num_objs=$3; obj_per_slab=$5; pages_per_slab=$6;
|
|
KiB = (obj_per_slab > 0) ? page_KiB*num_objs/obj_per_slab*pages_per_slab : 0;
|
|
TOT_KiB += KiB;
|
|
printf("%-22s %11d %8d %8d %10d %12d %1s %5d %10d %12d %1s %12d %9d %11d %8d\n",
|
|
$1, $2, $3, $4, $5, $6, $7, $9, $10, $11, $12, $14, $15, $16, KiB);
|
|
}
|
|
END {
|
|
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8d\n",
|
|
"TOTAL", "-", "-", "-", "-", "-", ":", "-", "-", "-", ":", "-", "-", "-", TOT_KiB);
|
|
}
|
|
' >> ${1} 2>>${COLLECT_ERROR_LOG}
|
|
}
|
|
###########################################################################
|
|
#
|
|
# Name : collect_errors
|
|
#
|
|
# Description: search COLLECT_ERROR_LOG for "No space left on device" logs
|
|
# Return 0 if no such logs are found.
|
|
# Return 1 if such logs are found
|
|
#
|
|
# Assumptions: Caller should assume a non-zero return as an indication of
|
|
# a corrupt or incomplete collect log
|
|
#
|
|
# Create logs and screen echos that record the error for the user.
|
|
#
|
|
# May look for other errors in the future
|
|
#
|
|
###########################################################################
|
|
|
|
listOfOutOfSpaceErrors=(
|
|
"${FAIL_OUT_OF_SPACE_STR}"
|
|
"${FAIL_TAR_OUT_OF_SPACE_STR}"
|
|
"${FAIL_INSUFFICIENT_SPACE_STR}"
|
|
)
|
|
|
|
function collect_errors()
|
|
{
|
|
local host=${1}
|
|
local RC=0
|
|
|
|
if [ -e "${COLLECT_ERROR_LOG}" ] ; then
|
|
|
|
## now loop through known space related error strings
|
|
index=0
|
|
while [ "x${listOfOutOfSpaceErrors[index]}" != "x" ]
|
|
do
|
|
grep -q "${listOfOutOfSpaceErrors[index]}" ${COLLECT_ERROR_LOG}
|
|
if [ "$?" == "0" ] ; then
|
|
|
|
string="failed to collect from ${host} (reason:${FAIL_OUT_OF_SPACE}:${FAIL_OUT_OF_SPACE_STR})"
|
|
|
|
# /var/log/user.log it
|
|
logger -t ${COLLECT_TAG} "${string}"
|
|
|
|
# logs that show up in the foreground
|
|
echo "${string}"
|
|
echo "Increase available space in ${host}:${COLLECT_BASE_DIR} and retry operation."
|
|
|
|
# return error code
|
|
RC=1
|
|
break
|
|
fi
|
|
index=$(($index+1))
|
|
done
|
|
fi
|
|
return ${RC}
|
|
}
|