integ/tools/engtools/parsers/common/parse-all.sh
Martin Chen dba551a518 Fix linters error and enable linters zuul gate
Fix below linters errors
E010 The "do" should be on same line as for
E010 The "do" should be on same line as while
E011 Then keyword is not on same line as if or elif keyword
E020 Function declaration not in format ^function name {$

Ignore:
E041 Arithmetic expansion using $[ is deprecated for $((
E042 local declaration hides errors
E043 Arithmetic compound has inconsistent return semantics
E044 Use [[ for non-POSIX comparisions

Story: 2003366
Task: 24423

Change-Id: I8b6b72e702d3e89d1813772d6bf16819e28e818c
Signed-off-by: Martin Chen <haochuan.z.chen@intel.com>
2018-09-07 01:50:28 +08:00

365 lines
15 KiB
Bash
Executable File

#!/bin/bash
#Copyright (c) 2016-2017 Wind River Systems, Inc.
#
#SPDX-License-Identifier: Apache-2.0
#
# This script is used to parse all stats data. It is designed to be called by either
# parse-controllers.sh or parse-computes.sh and not used as a standalone script.
# If the input node is a controller, it will parse controller specific postgres &
# and rabbitmq stats first. If the input node is a compute, it will pars the compute
# specific vswitch stats first.
#
# The following parsing steps are common to all hosts and are executed in the specified order:
# - Parse occtop
# - Parse memtop
# - Parse memstats (summary)
# - Parse netstats
# - Parse schedtop (summary)
# - Parse iostats
# - Parse diskstats
# - Parse filestats (summary)
# - Parse process level schedtop (optional step, configured in lab.conf)
# - Generate tarball
if [[ $# != 1 ]]; then
echo "ERROR: This script is meant to be called by either parse-controllers.sh or parse-computes.sh script."
echo "To run it separately, copy the script to the host directory that contains *.bz2 files."
echo "It takes a single argument - the name of the host directory (e.g. ./parse-all.sh controller-0)."
exit 1
fi
source ../lab.conf
source ./host.conf
PARSERDIR=$(dirname $0)
. ${PARSERDIR}/parse-util.sh
NODE=$1
CURDATE=$(date)
DATESTAMP=$(date +%b-%d)
function sedit {
local FILETOSED=$1
sed -i -e "s/ */ /g" ${FILETOSED}
sed -i -e "s/ /,/g" ${FILETOSED}
# Remove any trailing comma
sed -i "s/,$//" ${FILETOSED}
}
function get_filename_from_mountname {
local name=$1
local fname
if test "${name#*"scratch"}" != "${name}"; then
fname="scratch"
elif test "${name#*"log"}" != "${name}"; then
fname="log"
elif test "${name#*"backup"}" != "${name}"; then
fname="backup"
elif test "${name#*"ceph/mon"}" != "${name}"; then
fname="cephmon"
elif test "${name#*"conversion"}" != "${name}"; then
fname="img-conversion"
elif test "${name#*"platform"}" != "${name}"; then
fname="platform"
elif test "${name#*"postgres"}" != "${name}"; then
fname="postgres"
elif test "${name#*"cgcs"}" != "${name}"; then
fname="cgcs"
elif test "${name#*"rabbitmq"}" != "${name}"; then
fname="rabbitmq"
elif test "${name#*"instances"}" != "${name}"; then
fname="pv"
elif test "${name#*"ceph/osd"}" != "${name}"; then
# The ceph disk partition has the following mount name convention
# /var/lib/ceph/osd/ceph-0
fname=`basename ${name}`
fi
echo $fname
}
function parse_process_schedtop_data {
# Logic has been moved to a separate script so that parsing process level schedtop
# can be run either as part of parse-all.sh script or independently.
LOG "Process level schedtop parsing is turned on in lab.conf. Parsing schedtop detail..."
cd ..
./parse-schedtop.sh ${NODE}
cd ${NODE}
}
function parse_controller_specific {
# Parsing Postgres data, removing data from previous run if there are any. Generate summary
# data for each database and detail data for specified tables
LOG "Parsing postgres data for ${NODE}"
if [ -z "${DATABASE_LIST}" ]; then
WARNLOG "DATABASE_LIST is not set in the lab.conf file. Use default setting"
DATABASE_LIST="cinder glance keystone nova neutron ceilometer heat sysinv aodh postgres nova_api"
fi
for DB in ${DATABASE_LIST}; do
rm /tmp/${DB}*.csv
done
../parse_postgres *postgres.bz2 >postgres-summary-${NODE}-${DATESTAMP}.txt
for DB in ${DATABASE_LIST}; do
cp /tmp/${DB}_size.csv postgres_${DB}_size.csv
done
for TABLE in ${TABLE_LIST}; do
cp /tmp/${TABLE}.csv postgres_${TABLE}.csv
done
# Parsing RabbitMQ data
LOG "Parsing rabbitmq data for ${NODE}"
../parse-rabbitmq.sh rabbitmq-${NODE}.csv
for QUEUE in ${RABBITMQ_QUEUE_LIST}; do
# If node is not a controller node then parse-rabbitmq-queue.sh should skip
../parse-rabbitmq-queue.sh rabbitmq-${QUEUE}-${NODE}.csv ${QUEUE}
done
}
function parse_compute_specific {
LOG "Parsing vswitch data for ${NODE}"
../parse-vswitch.sh ${NODE}
}
function parse_occtop_data {
LOG "Parsing occtop data for ${NODE}"
bzcat *occtop.bz2 >occtop-${NODE}-${DATESTAMP}.txt
cp occtop-${NODE}-${DATESTAMP}.txt tmp.txt
sedit tmp.txt
# Get the highest column count
column_count=$(awk -F "," '{print NF}' tmp.txt | sort -nu | tail -n 1)
grep '^[0-9]' tmp.txt |cut -d, -f1,2 | awk -F "," '{print $1" "$2}' > tmpdate.txt
grep '^[0-9]' tmp.txt |cut -d, -f3-$column_count > tmpcore.txt
paste -d, tmpdate.txt tmpcore.txt > tmp2.txt
# Generate header based on the number of columns. The Date/Time column consists of date and time fields
header="Date/Time,Total"
count=$(($column_count-3))
for i in $(seq 0 $(($count-1))); do
header="$header,$i"
done
# Generate detailed CSV with Date/Time, Total CPU occupancy and individual core occupancies e.g.
# Date/Time,Total,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35
# 2016-11-22 00:29:16.523,759.5,21.4,18.9,43.8,24.5,23.1,25.3,28.1,25.5,20.5,27.8,26.8,32.7,27.3,25.1,21.1,23.2,21.7,36.4,23.3,16.6,15.3,13.9,14.4,15.0,14.7,14.4,16.4,13.8,17.0,17.8,19.0,15.1,14.0,13.2,14.5,17.8
echo "${header}" > occtop-${NODE}-detailed.csv
cat tmp2.txt >> occtop-${NODE}-detailed.csv
# Generate simple CSV file which is used to generate host CPU occupancy chart. Platform cores are
# defined in the host.conf. The simple CSV contains only the Date/Time and Total platform CPU occupancy e.g.
# Date/Time,Total
# 2016-11-22 00:29:16.523,94.9
# 2016-11-22 00:30:16.526,71.3
if [ -z "${PLATFORM_CPU_LIST}" ]; then
# A controller node in standard system. In this case, all cores are dedicated to platform use.
# Simply extract the Date/Time and Total CPU occupancy
cut -d, -f1,2 occtop-${NODE}-detailed.csv > occtop-${NODE}.csv
else
# A CPE, compute or storage node. The cores dedicated to platform use are specified in the config.
echo "Date/Time,Total" > occtop-${NODE}.csv
while read -r line || [[ -n "$line" ]]; do
IFS="," read -r -a arr <<< "${line}"
total=0
for CORE in ${PLATFORM_CPU_LIST}; do
# Add 2 to the index as occupancy of each individual core starts after Date/Time and Total
idx=$(($CORE+2))
total=`echo $total + ${arr[$idx]} | bc`
done
echo "${arr[0]},${total}" >> occtop-${NODE}.csv
done < tmp2.txt
fi
# Remove temporary files
rm tmp.txt tmp2.txt tmpdate.txt tmpcore.txt
}
function parse_memtop_data {
LOG "Parsing memtop data for ${NODE}"
bzcat *memtop.bz2 > memtop-${NODE}-${DATESTAMP}.txt
cp memtop-${NODE}-${DATESTAMP}.txt tmp.txt
sedit tmp.txt
# After dumping all memtop bz2 output into one text file and in-place sed, grab only relevant data
# for CSV output. Generate both detailed and simple CSV files. Simple output will be used to generate
# chart.
grep '^[0-9]' tmp.txt | awk -F "," '{print $1" "$2","$3","$4","$5","$6","$7","$8","$9","$10","$11","$12","$13","$14","$15","$16","$17","$18}' > tmp2.txt
echo "Date/Time,Total,Used,Free,Cached,Buf,Slab,CAS,CLim,Dirty,WBack,Anon,Avail,0:Avail,0:HFree,1:Avail,1:HFree" > memtop-${NODE}-detailed.csv
cat tmp2.txt >> memtop-${NODE}-detailed.csv
echo "Date/Time,Total,Anon" > memtop-${NODE}.csv
cut -d, -f1-2,12 tmp2.txt >> memtop-${NODE}.csv
# Remove temporary files
rm tmp.txt tmp2.txt
}
function parse_netstats_data {
LOG "Parsing netstats data for ${NODE}"
# First generate the summary data then detail data for specified interfaces
../parse_netstats *netstats.bz2 > netstats-summary-${NODE}-${DATESTAMP}.txt
if [ -z "${NETSTATS_INTERFACE_LIST}" ]; then
ERRLOG "NETSTATS_INTERFACE_LIST is not set in host.conf. Skipping detail netstats..."
else
for INTERFACE in ${NETSTATS_INTERFACE_LIST}; do
echo "Date/Time,Interface,Rx PPS,Rx Mbps,Rx Packet Size,Tx PPS,Tx Mbps,Tx Packet Size" > netstats-${NODE}-${INTERFACE}.csv
../parse_netstats *netstats.bz2 | grep " ${INTERFACE} " > tmp.txt
sed -i -e "s/|/ /g" tmp.txt
sed -i -e "s/ */ /g;s/ */ /g" tmp.txt
sed -i -e "s/ /,/g" tmp.txt
# Remove the leading comma
sed -i 's/,//' tmp.txt
while read -r line || [[ -n "$line" ]]; do
IFS="," read -r -a arr <<< "${line}"
echo "${arr[8]} ${arr[9]},${arr[0]},${arr[2]},${arr[3]},${arr[4]},${arr[5]},${arr[6]},${arr[7]}" >> netstats-${NODE}-${INTERFACE}.csv
done < tmp.txt
done
rm tmp.txt
fi
}
function parse_iostats_data {
LOG "Parsing iostat data for ${NODE}"
if [ -z "${IOSTATS_DEVICE_LIST}" ]; then
ERRLOG "IOSTAT_DEVICE_LIST is not set in host.conf. Skipping iostats..."
else
for DEVICE in ${IOSTATS_DEVICE_LIST}; do
# Add header to output csv file
echo "Date/Time,${DEVICE},rqm/s,wrqm/s,r/s,w/s,rkB/s,wkB/s,avgrq-sz,avgqu-sz,await,r_await,w_await,svctm,%util" > iostat-${NODE}-${DEVICE}.csv
# Dumping iostat content to tmp file
bzcat *iostat.bz2 | grep -E "/2015|/2016|/2017|${DEVICE}" | awk '{print $1","$2","$3","$4","$5","$6","$7","$8","$9","$10","$11","$12","$13","$14}' > tmp.txt
while IFS= read -r current; do
if test "${current#*Linux}" != "$current"
then
# Skip the line that contains the word "Linux"
continue
else
if test "${current#*$DEVICE}" == "$current"
then
# It's a date entry, look ahead
read -r next
if test "${next#*$DEVICE}" != "${next}"
then
# This next line contains the device stats
# Combine date and time fields
current="${current//2016,/2016 }"
current="${current//2017,/2017 }"
# Combine time and AM/PM fields
current="${current//,AM/ AM}"
current="${current//,PM/ PM}"
# Write both lines to intermediate file
echo "${current}" >> tmp2.txt
echo "${next}" >> tmp2.txt
fi
fi
fi
done < tmp.txt
mv tmp2.txt tmp.txt
# Combine the time and stats data into one line
# 11/22/2016 06:34:00 AM,,,,,,,,,,,
# dm-0,0.00,0.00,0.00,1.07,0.00,38.00,71.25,0.00,0.19,0.00,0.19,0.03,0.00
paste -d "" - - < tmp.txt > tmp2.txt
# Remove empty fields, use "complement" option for contiguous fields/range
cut -d, -f2-11 --complement tmp2.txt > tmp.txt
# Write final content to output csv
cat tmp.txt >> iostat-${NODE}-${DEVICE}.csv
rm tmp.txt tmp2.txt
done
fi
}
function parse_diskstats_data {
LOG "Parsing diskstats data for ${NODE}"
if [ -z "${DISKSTATS_FILESYSTEM_LIST}" ]; then
ERRLOG "DISKSTATS_FILESYSTEM_LIST is not set in host.conf. Skipping diskstats..."
else
for FS in ${DISKSTATS_FILESYSTEM_LIST}; do
fspair=(${FS//|/ })
fsname=${fspair[0]}
mountname=${fspair[1]}
if [ ${mountname} == "/" ]; then
mountname=" /"
echo "File system,Type,Size,Used,Avail,Used(%)" > diskstats-${NODE}-root.csv
bzcat *diskstats.bz2 | grep $fsname | grep $mountname | grep G | awk '{print $1","$2","$3","$4","$5","$6}' >> diskstats-${NODE}-root.csv
else
fname=$(get_filename_from_mountname $mountname)
echo "File system,Type,Size,Used,Avail,Used(%)" > diskstats-${NODE}-$fname.csv
bzcat *diskstats.bz2 | grep $fsname | grep $mountname | grep G | awk '{print $1","$2","$3","$4","$5","$6}' >> diskstats-${NODE}-$fname.csv
fi
done
fi
}
# Parsing starts here ...
LOG "Parsing ${NODE} files - ${CURDATE}"
# Let's get the host specific parsing out of the way
if test "${NODE#*"controller"}" != "${NODE}"; then
parse_controller_specific
elif test "${NODE#*"compute"}" != "${NODE}"; then
parse_compute_specific
fi
# Parsing CPU occtop data
parse_occtop_data
# Parsing memtop data
parse_memtop_data
# Parsing memstats data to generate the high level report. The most important piece of info is the list of
# hi-runners at the end of the file. If there is a leak, run parse-daily.sh script to generate the time
# series data for the offending processes only. Use process name, not PID as most Titanium Cloud processes have
# workers.
LOG "Parsing memstats summary for ${NODE}"
../parse_memstats --report *memstats.bz2 > memstats-summary-${NODE}-${DATESTAMP}.txt
#tar czf pidstats.tgz pid-*.csv
rm pid-*.csv
# Parsing netstats data
parse_netstats_data
# Parsing schedtop data to generate the high level report. Leave the process level schedtop parsing till
# the end as it is a long running task.
LOG "Parsing schedtop summary for ${NODE}"
FILES=$(ls *schedtop.bz2)
../parse_schedtop ${FILES} > schedtop-summary-${NODE}-${DATESTAMP}.txt
# Parsing iostats data
parse_iostats_data
# Parsing diskstats data
parse_diskstats_data
# Parsing filestats data to generate the high level report. If there is a file descriptor leak, run parse-daily.sh
# script to generate the time series data for the offending processes only. Use process name, not PID as most
# Titanium Cloud processes have workers.
LOG "Parsing filestats summary for ${NODE}"
../parse_filestats --all *filestats.bz2 > filestats-summary-${NODE}-${DATESTAMP}.txt
# Parsing process level schedtop data. This is a long running task. To skip this step or generate data for
# only specific processes, update the lab.conf and host.conf files.
[[ ${GENERATE_PROCESS_SCHEDTOP} == Y ]] && parse_process_schedtop_data || WARNLOG "Parsing process level schedtop is skipped."
# Done parsing for this host. If it's a controller host, check if the parsing of postgres connection stats which is run in
# parallel is done before creating a tar file.
if test "${NODE#*"controller"}" != "${NODE}"; then
# If postgres-conns.csv file has not been created which is highly unlikely, wait a couple of minutes
[ ! -e postgres-conns.csv ] && sleep 120
# Now check the stats of this file every 5 seconds to see if it's still being updated. Another option
# is to use inotify which requires another inotify-tools package.
oldsize=0
newsize=0
while true; do
newsize=$(stat -c %s postgres-conns.csv)
if [ "$oldsize" == "$newsize" ]; then
break
fi
oldsize=$newsize
sleep 5
done
fi
tar czf syseng-data-${NODE}-${DATESTAMP}.tgz *.csv *.txt
LOG "Parsing stats data for ${NODE} completed!"