integ/tools/engtools/parsers/common/parse-all.sh

#!/bin/bash

#Copyright (c) 2016-2017 Wind River Systems, Inc.
#
#SPDX-License-Identifier: Apache-2.0
#

# This script is used to parse all stats data. It is designed to be called by either
# parse-controllers.sh or parse-computes.sh and not used as a standalone script.
# If the input node is a controller, it will parse controller specific postgres &
# and rabbitmq stats first. If the input node is a compute, it will pars the compute
# specific vswitch stats first.
#
# The following parsing steps are common to all hosts and are executed in the specified order:
#     - Parse occtop
#     - Parse memtop
#     - Parse memstats (summary)
#     - Parse netstats
#     - Parse schedtop (summary)
#     - Parse iostats
#     - Parse diskstats
#     - Parse filestats (summary)
#     - Parse process level schedtop (optional step, configured in lab.conf)
#     - Generate tarball

if [[ $# != 1 ]]; then
    echo "ERROR: This script is meant to be called by either parse-controllers.sh or parse-computes.sh script."
    echo "To run it separately, copy the script to the host directory that contains *.bz2 files."
    echo "It takes a single argument - the name of the host directory (e.g. ./parse-all.sh controller-0)."
    exit 1
fi

source ../lab.conf
source ./host.conf

PARSERDIR=$(dirname $0)
. ${PARSERDIR}/parse-util.sh

NODE=$1

CURDATE=$(date)
DATESTAMP=$(date +%b-%d)

function sedit {
    local FILETOSED=$1
    sed -i -e "s/  */ /g" ${FILETOSED}
    sed -i -e "s/ /,/g" ${FILETOSED}
    # Remove any trailing comma
    sed -i "s/,$//" ${FILETOSED}
}

function get_filename_from_mountname {
    local name=$1
    local fname
    if test "${name#*"scratch"}" != "${name}"; then
        fname="scratch"
    elif test "${name#*"log"}" != "${name}"; then
        fname="log"
    elif test "${name#*"backup"}" != "${name}"; then
        fname="backup"
    elif test "${name#*"ceph/mon"}" != "${name}"; then
        fname="cephmon"
    elif test "${name#*"conversion"}" != "${name}"; then
        fname="img-conversion"
    elif test "${name#*"platform"}" != "${name}"; then
        fname="platform"
    elif test "${name#*"postgres"}" != "${name}"; then
        fname="postgres"
    elif test "${name#*"cgcs"}" != "${name}"; then
        fname="cgcs"
    elif test "${name#*"rabbitmq"}" != "${name}"; then
        fname="rabbitmq"
    elif test "${name#*"instances"}" != "${name}"; then
        fname="pv"
    elif test "${name#*"ceph/osd"}" != "${name}"; then
        # The ceph disk partition has the following mount name convention
        # /var/lib/ceph/osd/ceph-0
        fname=`basename ${name}`
    fi
    echo $fname
}

function parse_process_schedtop_data {
    # Logic has been moved to a separate script so that parsing process level schedtop
    # can be run either as part of parse-all.sh script or independently.
    LOG "Process level schedtop parsing is turned on in lab.conf. Parsing schedtop detail..."
    cd ..
    ./parse-schedtop.sh ${NODE}
    cd ${NODE}
}

function parse_controller_specific {
    # Parsing Postgres data, removing data from previous run if there are any. Generate summary
    # data for each database and detail data for specified tables
    LOG "Parsing postgres data for ${NODE}"
    if [ -z "${DATABASE_LIST}" ]; then
        WARNLOG "DATABASE_LIST is not set in the lab.conf file. Use default setting"
        DATABASE_LIST="cinder glance keystone nova neutron ceilometer heat sysinv aodh barbican postgres nova_api"
    fi

    for DB in ${DATABASE_LIST}; do
        rm /tmp/${DB}*.csv
    done
    ../parse_postgres *postgres.bz2 >postgres-summary-${NODE}-${DATESTAMP}.txt
    for DB in ${DATABASE_LIST}; do
        cp /tmp/${DB}_size.csv postgres_${DB}_size.csv
    done
    for TABLE in ${TABLE_LIST}; do
        cp /tmp/${TABLE}.csv postgres_${TABLE}.csv
    done

    # Parsing RabbitMQ data
    LOG "Parsing rabbitmq data for ${NODE}"
    ../parse-rabbitmq.sh rabbitmq-${NODE}.csv

    for QUEUE in ${RABBITMQ_QUEUE_LIST}; do
        # If node is not a controller node then parse-rabbitmq-queue.sh should skip
        ../parse-rabbitmq-queue.sh rabbitmq-${QUEUE}-${NODE}.csv ${QUEUE}
    done
}

function parse_compute_specific {
    LOG "Parsing vswitch data for ${NODE}"
    ../parse-vswitch.sh ${NODE}
}

function parse_occtop_data {
    LOG "Parsing occtop data for ${NODE}"
    bzcat *occtop.bz2 >occtop-${NODE}-${DATESTAMP}.txt
    cp occtop-${NODE}-${DATESTAMP}.txt tmp.txt
    sedit tmp.txt
    # Get the highest column count
    column_count=$(awk -F "," '{print NF}' tmp.txt | sort -nu | tail -n 1)
    grep '^[0-9]' tmp.txt |cut -d, -f1,2 | awk -F "," '{print $1" "$2}' > tmpdate.txt
    grep '^[0-9]' tmp.txt |cut -d, -f3-$column_count > tmpcore.txt
    paste -d, tmpdate.txt tmpcore.txt > tmp2.txt
    # Generate header based on the number of columns. The Date/Time column consists of date and time fields
    header="Date/Time,Total"
    count=$(($column_count-3))
    for i in $(seq 0 $(($count-1))); do
        header="$header,$i"
    done

    # Generate detailed CSV with Date/Time, Total CPU occupancy and individual core occupancies e.g.
    # Date/Time,Total,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35
    # 2016-11-22 00:29:16.523,759.5,21.4,18.9,43.8,24.5,23.1,25.3,28.1,25.5,20.5,27.8,26.8,32.7,27.3,25.1,21.1,23.2,21.7,36.4,23.3,16.6,15.3,13.9,14.4,15.0,14.7,14.4,16.4,13.8,17.0,17.8,19.0,15.1,14.0,13.2,14.5,17.8
    echo "${header}" > occtop-${NODE}-detailed.csv
    cat tmp2.txt >> occtop-${NODE}-detailed.csv

    # Generate simple CSV file which is used to generate host CPU occupancy chart. Platform cores are
    # defined in the host.conf. The simple CSV contains only the Date/Time and Total platform CPU occupancy e.g.
    # Date/Time,Total
    # 2016-11-22 00:29:16.523,94.9
    # 2016-11-22 00:30:16.526,71.3

    if [ -z "${PLATFORM_CPU_LIST}" ]; then
        # A controller node in standard system. In this case, all cores are dedicated to platform use.
        # Simply extract the Date/Time and Total CPU occupancy
        cut -d, -f1,2 occtop-${NODE}-detailed.csv > occtop-${NODE}.csv
    else
        # A CPE, compute or storage node. The cores dedicated to platform use are specified in the config.
        echo "Date/Time,Total" > occtop-${NODE}.csv
        while read -r line || [[ -n "$line" ]]; do
            IFS="," read -r -a arr <<< "${line}"
            total=0
            for CORE in ${PLATFORM_CPU_LIST}; do
                # Add 2 to the index as occupancy of each individual core starts after Date/Time and Total
                idx=$(($CORE+2))
                total=`echo $total + ${arr[$idx]} | bc`
            done
            echo "${arr[0]},${total}" >> occtop-${NODE}.csv
        done < tmp2.txt
    fi
    # Remove temporary files
    rm tmp.txt tmp2.txt tmpdate.txt tmpcore.txt
}

function parse_memtop_data {
    LOG "Parsing memtop data for ${NODE}"
    bzcat *memtop.bz2 > memtop-${NODE}-${DATESTAMP}.txt
    cp memtop-${NODE}-${DATESTAMP}.txt tmp.txt
    sedit tmp.txt

    # After dumping all memtop bz2 output into one text file and in-place sed, grab only relevant data
    # for CSV output. Generate both detailed and simple CSV files. Simple output will be used to generate
    # chart.
    grep '^[0-9]' tmp.txt | awk -F "," '{print $1" "$2","$3","$4","$5","$6","$7","$8","$9","$10","$11","$12","$13","$14","$15","$16","$17","$18}' > tmp2.txt
    echo "Date/Time,Total,Used,Free,Cached,Buf,Slab,CAS,CLim,Dirty,WBack,Anon,Avail,0:Avail,0:HFree,1:Avail,1:HFree" > memtop-${NODE}-detailed.csv
    cat tmp2.txt >> memtop-${NODE}-detailed.csv
    echo "Date/Time,Total,Anon" > memtop-${NODE}.csv
    cut -d, -f1-2,12 tmp2.txt >> memtop-${NODE}.csv
    # Remove temporary files
    rm tmp.txt tmp2.txt
}

function parse_netstats_data {
    LOG "Parsing netstats data for ${NODE}"
    # First generate the summary data then detail data for specified interfaces
    ../parse_netstats *netstats.bz2 > netstats-summary-${NODE}-${DATESTAMP}.txt
    if [ -z "${NETSTATS_INTERFACE_LIST}" ]; then
        ERRLOG "NETSTATS_INTERFACE_LIST is not set in host.conf. Skipping detail netstats..."
    else
        for INTERFACE in ${NETSTATS_INTERFACE_LIST}; do
            echo "Date/Time,Interface,Rx PPS,Rx Mbps,Rx Packet Size,Tx PPS,Tx Mbps,Tx Packet Size" > netstats-${NODE}-${INTERFACE}.csv
            ../parse_netstats *netstats.bz2 | grep " ${INTERFACE} " > tmp.txt
            sed -i -e "s/|/ /g" tmp.txt
            sed -i -e "s/    */ /g;s/  */ /g" tmp.txt
            sed -i -e "s/ /,/g" tmp.txt
            # Remove the leading comma
            sed -i 's/,//' tmp.txt
            while read -r line || [[ -n "$line" ]]; do
                IFS="," read -r -a arr <<< "${line}"
                echo "${arr[8]} ${arr[9]},${arr[0]},${arr[2]},${arr[3]},${arr[4]},${arr[5]},${arr[6]},${arr[7]}" >> netstats-${NODE}-${INTERFACE}.csv
            done < tmp.txt
        done
        rm tmp.txt
    fi
}

function parse_iostats_data {
    LOG "Parsing iostat data for ${NODE}"
    if [ -z "${IOSTATS_DEVICE_LIST}" ]; then
        ERRLOG "IOSTAT_DEVICE_LIST is not set in host.conf. Skipping iostats..."
    else
        for DEVICE in ${IOSTATS_DEVICE_LIST}; do
            # Add header to output csv file
            echo "Date/Time,${DEVICE},rqm/s,wrqm/s,r/s,w/s,rkB/s,wkB/s,avgrq-sz,avgqu-sz,await,r_await,w_await,svctm,%util" > iostat-${NODE}-${DEVICE}.csv
            # Dumping iostat content to tmp file
            bzcat *iostat.bz2 | grep -E "/2015|/2016|/2017|${DEVICE}"   | awk '{print $1","$2","$3","$4","$5","$6","$7","$8","$9","$10","$11","$12","$13","$14}' > tmp.txt
            while IFS= read -r current; do
                if test "${current#*Linux}" != "$current"
                then
                    # Skip the line that contains the word "Linux"
                    continue
                else
                    if test "${current#*$DEVICE}" == "$current"
                    then
                        # It's a date entry, look ahead
                        read -r next
                        if test "${next#*$DEVICE}" != "${next}"
                        then
                            # This next line contains the device stats
                            # Combine date and time fields
                            current="${current//2016,/2016 }"
                            current="${current//2017,/2017 }"
                            # Combine time and AM/PM fields
                            current="${current//,AM/ AM}"
                            current="${current//,PM/ PM}"
                            # Write both lines to intermediate file
                            echo "${current}" >> tmp2.txt
                            echo "${next}" >> tmp2.txt
                        fi
                    fi
                fi
            done < tmp.txt
            mv tmp2.txt tmp.txt
            # Combine the time and stats data into one line
            # 11/22/2016 06:34:00 AM,,,,,,,,,,,
            # dm-0,0.00,0.00,0.00,1.07,0.00,38.00,71.25,0.00,0.19,0.00,0.19,0.03,0.00
            paste -d "" - - < tmp.txt > tmp2.txt
            # Remove empty fields, use "complement" option for contiguous fields/range
            cut -d, -f2-11 --complement tmp2.txt > tmp.txt
            # Write final content to output csv
            cat tmp.txt >> iostat-${NODE}-${DEVICE}.csv
        rm tmp.txt tmp2.txt
        done
    fi
}

function parse_diskstats_data {
    LOG "Parsing diskstats data for ${NODE}"

    if [ -z "${DISKSTATS_FILESYSTEM_LIST}" ]; then
        ERRLOG "DISKSTATS_FILESYSTEM_LIST is not set in host.conf. Skipping diskstats..."
    else
        for FS in ${DISKSTATS_FILESYSTEM_LIST}; do
            fspair=(${FS//|/ })
            fsname=${fspair[0]}
            mountname=${fspair[1]}
            if [ ${mountname} == "/" ]; then
                mountname=" /"
                echo "File system,Type,Size,Used,Avail,Used(%)" > diskstats-${NODE}-root.csv
                bzcat *diskstats.bz2 | grep $fsname | grep $mountname | grep G | awk '{print $1","$2","$3","$4","$5","$6}' >> diskstats-${NODE}-root.csv
            else
                fname=$(get_filename_from_mountname $mountname)
                echo "File system,Type,Size,Used,Avail,Used(%)" > diskstats-${NODE}-$fname.csv
                bzcat *diskstats.bz2 | grep $fsname | grep $mountname | grep G | awk '{print $1","$2","$3","$4","$5","$6}' >> diskstats-${NODE}-$fname.csv
            fi
        done
    fi
}

# Parsing starts here ...
LOG "Parsing ${NODE} files - ${CURDATE}"

# Let's get the host specific parsing out of the way
if test "${NODE#*"controller"}" != "${NODE}"; then
    parse_controller_specific
elif test "${NODE#*"compute"}" != "${NODE}"; then
    parse_compute_specific
fi

# Parsing CPU occtop data
parse_occtop_data

# Parsing memtop data
parse_memtop_data

# Parsing memstats data to generate the high level report. The most important piece of info is the list of
# hi-runners at the end of the file. If there is a leak, run parse-daily.sh script to generate the time
# series data for the offending processes only. Use process name, not PID as most Titanium Cloud processes have
# workers.
LOG "Parsing memstats summary for ${NODE}"
../parse_memstats --report *memstats.bz2 > memstats-summary-${NODE}-${DATESTAMP}.txt
#tar czf pidstats.tgz pid-*.csv
rm pid-*.csv


# Parsing netstats data
parse_netstats_data

# Parsing schedtop data to generate the high level report. Leave the process level schedtop parsing till
# the end as it is a long running task.
LOG "Parsing schedtop summary for ${NODE}"
FILES=$(ls *schedtop.bz2)
../parse_schedtop ${FILES} > schedtop-summary-${NODE}-${DATESTAMP}.txt

# Parsing iostats data
parse_iostats_data

# Parsing diskstats data
parse_diskstats_data

# Parsing filestats data to generate the high level report. If there is a file descriptor leak, run parse-daily.sh
# script to generate the time series data for the offending processes only. Use process name, not PID as most
# Titanium Cloud processes have workers.
LOG "Parsing filestats summary for ${NODE}"
../parse_filestats --all *filestats.bz2 > filestats-summary-${NODE}-${DATESTAMP}.txt

# Parsing process level schedtop data. This is a long running task. To skip this step or generate data for
# only specific processes, update the lab.conf and host.conf files.
[[ ${GENERATE_PROCESS_SCHEDTOP} == Y ]] && parse_process_schedtop_data || WARNLOG "Parsing process level schedtop is skipped."

# Done parsing for this host. If it's a controller host, check if the parsing of postgres connection stats which is run in
# parallel is done before creating a tar file.
if test "${NODE#*"controller"}" != "${NODE}"; then
    # If postgres-conns.csv file has not been created which is highly unlikely, wait a couple of minutes
    [ ! -e postgres-conns.csv ] && sleep 120

    # Now check the stats of this file every 5 seconds to see if it's still being updated. Another option
    # is to use inotify which requires another inotify-tools package.
    oldsize=0
    newsize=0
    while true; do
        newsize=$(stat -c %s postgres-conns.csv)
        if [ "$oldsize" == "$newsize" ]; then
            break
        fi
        oldsize=$newsize
        sleep 5
    done
fi
tar czf syseng-data-${NODE}-${DATESTAMP}.tgz *.csv *.txt
LOG "Parsing stats data for ${NODE} completed!"