integ/tools/engtools/hostdata-collectors/scripts/live_stream.py

#!/usr/bin/python

"""
Copyright (c) 2017 Wind River Systems, Inc.

SPDX-License-Identifier: Apache-2.0
"""

import os
import sys
import time
import datetime
import psutil
import fcntl
import logging
import ConfigParser
import itertools
import six
from multiprocessing import Process, cpu_count
from subprocess import Popen, PIPE
from collections import OrderedDict


# generates the required string for the areas where fields are not static
def generateString(meas, tag_n, tag_v, field_n, field_v):
    base = "{},".format(meas)
    try:
        for i in range(len(tag_n)):
            if i == len(tag_n) - 1:
                # have space between tags and fields
                base += "'{}'='{}' ".format(tag_n[i], str(tag_v[i]))
            else:
                # separate with commas
                base += "'{}'='{}',".format(tag_n[i], str(tag_v[i]))
        for i in range(len(field_v)):
            if str(field_v[i]).replace(".", "").isdigit():
                if i == len(field_v) - 1:
                    base += "'{}'='{}'".format(field_n[i], str(field_v[i]))
                else:
                    base += "'{}'='{}',".format(field_n[i], str(field_v[i]))
        return base
    except IndexError:
        return None


# collects system memory information
def collectMemtop(influx_info, node, ci):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("memtop data starting collection with a collection interval of {}s".format(ci["memtop"]))
    measurement = "memtop"
    tags = {"node": node}
    MiB = 1024.0
    while True:
        try:
            fields = OrderedDict([("total", 0), ("used", 0), ("free", 0), ("cached", 0), ("buf", 0), ("slab", 0), ("cas", 0), ("clim", 0), ("dirty", 0), ("wback", 0), ("anon", 0), ("avail", 0)])
            with open("/proc/meminfo", "r") as f:
                hps = 0
                # for each line in /proc/meminfo, match with element in fields
                for line in f:
                    line = line.strip("\n").split()
                    if line[0].strip(":").startswith("MemTotal"):
                        # convert to from kibibytes to mibibytes
                        fields["total"] = float(line[1]) / MiB
                    elif line[0].strip(":").startswith("MemFree"):
                        fields["free"] = int(line[1]) / MiB
                    elif line[0].strip(":").startswith("MemAvailable"):
                        fields["avail"] = float(line[1]) / MiB
                    elif line[0].strip(":").startswith("Buffers"):
                        fields["buf"] = float(line[1]) / MiB
                    elif line[0].strip(":").startswith("Cached"):
                        fields["cached"] = float(line[1]) / MiB
                    elif line[0].strip(":").startswith("Slab"):
                        fields["slab"] = float(line[1]) / MiB
                    elif line[0].strip(":").startswith("CommitLimit"):
                        fields["clim"] = float(line[1]) / MiB
                    elif line[0].strip(":").startswith("Committed_AS"):
                        fields["cas"] = float(line[1]) / MiB
                    elif line[0].strip(":").startswith("Dirty"):
                        fields["dirty"] = float(line[1]) / MiB
                    elif line[0].strip(":").startswith("Writeback"):
                        fields["wback"] = float(line[1]) / MiB
                    elif line[0].strip(":").endswith("(anon)"):
                        fields["anon"] += float(line[1]) / MiB
                    elif line[0].strip(":").endswith("Hugepagesize"):
                        hps = float(line[1]) / MiB
                fields["used"] = fields["total"] - fields["avail"]
                f.close()
            # get platform specific memory info
            fields["platform_avail"] = 0
            fields["platform_hfree"] = 0
            for file in os.listdir("/sys/devices/system/node"):
                if file.startswith("node"):
                    node_num = file.replace("node", "").strip("\n")
                    avail = hfree = 0
                    with open("/sys/devices/system/node/{}/meminfo".format(file)) as f1:
                        for line in f1:
                            line = line.strip("\n").split()
                            if line[2].strip(":").startswith("MemFree") or line[2].strip(":").startswith("FilePages") or line[2].strip(":").startswith("SReclaimable"):
                                avail += float(line[3])
                            elif line[2].strip(":").startswith("HugePages_Free"):
                                hfree = float(line[3]) * hps
                        fields["{}:avail".format(node_num)] = avail / MiB
                        fields["{}:hfree".format(node_num)] = hfree
                        # get platform sum
                        fields["platform_avail"] += avail / MiB
                        fields["platform_hfree"] += hfree
                        f1.close()
            s = generateString(measurement, tags.keys(), tags.values(), fields.keys(), fields.values())
            if s is None:
                good_string = False
            else:
                good_string = True
            if good_string:
                # send data to InfluxDB
                p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], s), shell=True)
                p.communicate()
            time.sleep(ci["memtop"])
        except KeyboardInterrupt:
            break
        except Exception:
            logging.error("memtop collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects rss and vsz information
def collectMemstats(influx_info, node, ci, services, syseng_services, openstack_services, exclude_list, skip_list, collect_all):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("memstats data starting collection with a collection interval of {}s".format(ci["memstats"]))
    measurement = "memstats"
    tags = {"node": node}
    ps_output = None
    influx_string = ""
    while True:
        try:
            fields = {}
            ps_output = Popen("exec ps -e -o rss,vsz,cmd", shell=True, stdout=PIPE)
            # create dictionary of dictionaries
            if collect_all is False:
                for svc in services:
                    fields[svc] = {"rss": 0, "vsz": 0}
                fields["static_syseng"] = {"rss": 0, "vsz": 0}
                fields["live_syseng"] = {"rss": 0, "vsz": 0}
            fields["total"] = {"rss": 0, "vsz": 0}
            ps_output.stdout.readline()
            while True:
                # for each line in ps output, get rss and vsz info
                line = ps_output.stdout.readline().strip("\n").split()
                # if at end of output, send data
                if not line:
                    break
                else:
                    rss = float(line[0])
                    vsz = float(line[1])
                    # go through all command outputs
                    for i in range(2, len(line)):
                        # remove unwanted characters and borders from cmd name. Ex: /usr/bin/example.py -> example.py
                        svc = line[i].replace("(", "").replace(")", "").strip(":").split("/")[-1].strip("\n")
                        if svc == "gunicorn":
                            gsvc = line[-1].replace("[", "").replace("]", "").strip("\n")
                            if gsvc == "public:application":
                                gsvc = "keystone-public"
                            elif gsvc == "admin:application":
                                gsvc = "keystone-admin"
                            gsvc = "gunicorn_{}".format(gsvc)
                            if gsvc not in fields:
                                fields[gsvc] = {"rss": rss, "vsz": vsz}
                            else:
                                fields[gsvc]["rss"] += rss
                                fields[gsvc]["vsz"] += vsz

                        elif svc == "postgres":
                            if (len(line) <= i+2):
                                # Command line could be "sudo su postgres", skip it
                                break

                            if line[i + 1].startswith("-") is False and line[i + 1].startswith("_") is False and line[i + 1] != "psql":
                                psvc = ""
                                if line[i + 2] in openstack_services:
                                    psvc = line[i + 2].strip("\n")
                                else:
                                    for j in range(i + 1, len(line)):
                                        psvc += "{}_".format(line[j].strip("\n"))
                                psvc = "postgres_{}".format(psvc).strip("_")
                                if psvc not in fields:
                                    fields[psvc] = {"rss": rss, "vsz": vsz}
                                else:
                                    fields[psvc]["rss"] += rss
                                    fields[psvc]["vsz"] += vsz

                        if collect_all is False:
                            if svc in services:
                                fields[svc]["rss"] += rss
                                fields[svc]["vsz"] += vsz
                                fields["total"]["rss"] += rss
                                fields["total"]["vsz"] += vsz
                                break
                            elif svc in syseng_services:
                                if svc == "live_stream.py":
                                    fields["live_syseng"]["rss"] += rss
                                    fields["live_syseng"]["vsz"] += vsz
                                else:
                                    fields["static_syseng"]["rss"] += rss
                                    fields["static_syseng"]["vsz"] += vsz
                                fields["total"]["rss"] += rss
                                fields["total"]["vsz"] += vsz
                                break
                        # Collect all services
                        else:
                            if svc in exclude_list or svc.startswith("-") or svc[0].isdigit() or svc.startswith("[") or svc.endswith("]"):
                                continue
                            elif svc in skip_list or svc.startswith("IPaddr"):
                                break
                            else:
                                if svc not in fields:
                                    fields[svc] = {"rss": rss, "vsz": vsz}
                                else:
                                    fields[svc]["rss"] += rss
                                    fields[svc]["vsz"] += vsz
                                fields["total"]["rss"] += rss
                                fields["total"]["vsz"] += vsz
                                break
            # send data to InfluxDB
            for key in fields.keys():
                influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "service", key, "rss", fields[key]["rss"], "vsz", fields[key]["vsz"]) + "\n"
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
            p.communicate()
            influx_string = ""
            ps_output.kill()
            time.sleep(ci["memstats"])
        except KeyboardInterrupt:
            if ps_output is not None:
                ps_output.kill()
            break
        except Exception:
            logging.error("memstats collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects task cpu information
def collectSchedtop(influx_info, node, ci, services, syseng_services, openstack_services, exclude_list, skip_list, collect_all):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("schedtop data starting collection with a collection interval of {}s".format(ci["schedtop"]))
    measurement = "schedtop"
    tags = {"node": node}
    influx_string = ""
    top_output = Popen("exec top -b -c -w 512 -d{}".format(ci["schedtop"]), shell=True, stdout=PIPE)
    while True:
        try:
            fields = {}
            pro = psutil.Process(top_output.pid)
            # if process dies, restart it
            if pro.status() == "zombie":
                top_output.kill()
                top_output = Popen("exec top -b -c -w 512 -d{}".format(ci["schedtop"]), shell=True, stdout=PIPE)
            if collect_all is False:
                for svc in services:
                    fields[svc] = 0
                fields["static_syseng"] = 0
                fields["live_syseng"] = 0
            fields["total"] = 0
            # check first line
            line = top_output.stdout.readline()
            if not line:
                pass
            else:
                # skip header completely
                for _ in range(6):
                    top_output.stdout.readline()
                while True:
                    line = top_output.stdout.readline().strip("\n").split()
                    # if end of top output, leave this while loop
                    if not line:
                        break
                    else:
                        occ = float(line[8])
                        # for each command listed, check if it matches one from the list
                        for i in range(11, len(line)):
                            # remove unwanted characters and borders from cmd name. Ex: /usr/bin/example.py -> example.py
                            svc = line[i].replace("(", "").replace(")", "").strip(":").split("/")[-1]
                            if svc == "gunicorn":
                                gsvc = line[-1].replace("[", "").replace("]", "").strip("\n")
                                if gsvc == "public:application":
                                    gsvc = "keystone-public"
                                elif gsvc == "admin:application":
                                    gsvc = "keystone-admin"
                                gsvc = "gunicorn_{}".format(gsvc)
                                if gsvc not in fields:
                                    fields[gsvc] = occ
                                else:
                                    fields[gsvc] += occ

                            elif svc == "postgres":
                                if (len(line) <= i+2):
                                    # Command line could be "sudo su postgres", skip it
                                    break

                                if line[i + 1].startswith("-") is False and line[i + 1].startswith("_") is False and line[i + 1] != "psql":
                                    psvc = ""
                                    if line[i + 2] in openstack_services:
                                        psvc = line[i + 2].strip("\n")
                                    else:
                                        for j in range(i + 1, len(line)):
                                            psvc += "{}_".format(line[j].strip("\n"))
                                    psvc = "postgres_{}".format(psvc).strip("_")
                                    if psvc not in fields:
                                        fields[psvc] = occ
                                    else:
                                        fields[psvc] += occ

                            if collect_all is False:
                                if svc in services:
                                    fields[svc] += occ
                                    fields["total"] += occ
                                    break
                                elif svc in syseng_services:
                                    if svc == "live_stream.py":
                                        fields["live_syseng"] += occ
                                    else:
                                        fields["static_syseng"] += occ
                                    fields["total"] += occ
                                    break
                            # Collect all services
                            else:
                                if svc in exclude_list or svc.startswith("-") or svc[0].isdigit() or svc.startswith("[") or svc.endswith("]"):
                                    continue
                                elif svc in skip_list or svc.startswith("IPaddr"):
                                    break
                                else:
                                    if svc not in fields:
                                        fields[svc] = occ
                                    else:
                                        fields[svc] += occ
                                    fields["total"] += occ
                                    break
                for key in fields.keys():
                    influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}'".format(measurement, "node", tags["node"], "service", key, "occ", fields[key]) + "\n"
                # send data to InfluxDB
                p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
                p.communicate()
                influx_string = ""
                time.sleep(ci["schedtop"])
        except KeyboardInterrupt:
            if top_output is not None:
                top_output.kill()
            break
        except Exception:
            logging.error("schedtop collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects disk utilization information
def collectDiskstats(influx_info, node, ci):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("diskstats data starting collection with a collection interval of {}s".format(ci["diskstats"]))
    measurement = "diskstats"
    tags = {"node": node, "file_system": None, "type": None, "mount": None}
    fields = {"size": 0, "used": 0, "avail": 0, "usage": 0}
    influx_string = ""
    while True:
        try:
            parts = psutil.disk_partitions()
            for i in parts:
                # gather all partitions
                tags["mount"] = str(i[1]).split("/")[-1]
                # if mount == '', call it root
                if tags["mount"] == "":
                    tags["mount"] = "root"
                # skip boot
                elif tags["mount"] == "boot":
                    continue
                tags["file_system"] = str(i[0]).split("/")[-1]
                tags["type"] = i[2]
                u = psutil.disk_usage(i[1])
                fields["size"] = u[0]
                fields["used"] = u[1]
                fields["avail"] = u[2]
                fields["usage"] = u[3]
                influx_string += "{},'{}'='{}','{}'='{}','{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "file_system", tags["file_system"], "type", tags["type"], "mount", tags["mount"], "size", fields["size"], "used", fields["used"], "avail", fields["avail"], "usage", fields["usage"]) + "\n"
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
            p.communicate()
            influx_string = ""
            time.sleep(ci["diskstats"])
        except KeyboardInterrupt:
            break
        except Exception:
            logging.error("diskstats collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collect device I/O information
def collectIostat(influx_info, node, ci):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("iostat data starting collection with a collection interval of {}s".format(ci["iostat"]))
    measurement = "iostat"
    tags = {"node": node}
    sector_size = 512.0
    influx_string = ""
    while True:
        try:
            fields = {}
            tmp = {}
            tmp1 = {}
            start = time.time()
            # get initial values
            for dev in os.listdir("/sys/block/"):
                if dev.startswith("sr"):
                    continue
                else:
                    fields[dev] = {"r/s": 0, "w/s": 0, "io/s": 0, "rkB/s": 0, "wkB/s": 0, "rrqms/s": 0, "wrqms/s": 0, "util": 0}
                    tmp[dev] = {"init_reads": 0, "init_reads_merged": 0, "init_read_sectors": 0, "init_read_wait": 0, "init_writes": 0, "init_writes_merged": 0, "init_write_sectors": 0, "init_write_wait": 0, "init_io_progress": 0, "init_io_time": 0, "init_wait_time": 0}
                    with open("/sys/block/{}/stat".format(dev), "r") as f:
                        # get initial readings
                        line = f.readline().strip("\n").split()
                        tmp[dev]["init_reads"] = int(line[0])
                        tmp[dev]["init_reads_merged"] = int(line[1])
                        tmp[dev]["init_read_sectors"] = int(line[2])
                        tmp[dev]["init_read_wait"] = int(line[3])
                        tmp[dev]["init_writes"] = int(line[4])
                        tmp[dev]["init_writes_merged"] = int(line[5])
                        tmp[dev]["init_write_sectors"] = int(line[6])
                        tmp[dev]["init_write_wait"] = int(line[7])
                        tmp[dev]["init_io_progress"] = int(line[8])
                        tmp[dev]["init_io_time"] = int(line[9])
                        tmp[dev]["init_wait_time"] = int(line[10])
            time.sleep(ci["iostat"])
            dt = time.time() - start
            # get values again
            for dev in os.listdir("/sys/block/"):
                if dev.startswith("sr"):
                    continue
                else:
                    # during a swact, some devices may not have been read in the initial reading. If found now, add them to dict
                    if dev not in fields:
                        fields[dev] = {"r/s": 0, "w/s": 0, "io/s": 0, "rkB/s": 0, "wkB/s": 0, "rrqms/s": 0, "wrqms/s": 0, "util": 0}
                    tmp1[dev] = {"reads": 0, "reads_merged": 0, "read_sectors": 0, "read_wait": 0, "writes": 0, "writes_merged": 0, "write_sectors": 0, "write_wait": 0, "io_progress": 0, "io_time": 0, "wait_time": 0}
                    with open("/sys/block/{}/stat".format(dev), "r") as f:
                        line = f.readline().strip("\n").split()
                        tmp1[dev]["reads"] = int(line[0])
                        tmp1[dev]["reads_merged"] = int(line[1])
                        tmp1[dev]["read_sectors"] = int(line[2])
                        tmp1[dev]["read_wait"] = int(line[3])
                        tmp1[dev]["writes"] = int(line[4])
                        tmp1[dev]["writes_merged"] = int(line[5])
                        tmp1[dev]["write_sectors"] = int(line[6])
                        tmp1[dev]["write_wait"] = int(line[7])
                        tmp1[dev]["io_progress"] = int(line[8])
                        tmp1[dev]["io_time"] = int(line[9])
                        tmp1[dev]["wait_time"] = int(line[10])
            # take difference and divide by delta t
            for key in fields:
                # if device was found in initial and second reading, do calculation
                if key in tmp and key in tmp1:
                    fields[key]["r/s"] = abs(tmp1[key]["reads"] - tmp[key]["init_reads"]) / dt
                    fields[key]["w/s"] = abs(tmp1[key]["writes"] - tmp[key]["init_writes"]) / dt
                    fields[key]["rkB/s"] = abs(tmp1[key]["read_sectors"] - tmp[key]["init_read_sectors"]) * sector_size / dt / 1000
                    fields[key]["wkB/s"] = abs(tmp1[key]["write_sectors"] - tmp[key]["init_write_sectors"]) * sector_size / dt / 1000
                    fields[key]["rrqms/s"] = abs(tmp1[key]["reads_merged"] - tmp[key]["init_reads_merged"]) / dt
                    fields[key]["wrqms/s"] = abs(tmp1[key]["writes_merged"] - tmp[key]["init_writes_merged"]) / dt
                    fields[key]["io/s"] = fields[key]["r/s"] + fields[key]["w/s"] + fields[key]["rrqms/s"] + fields[key]["wrqms/s"]
                    fields[key]["util"] = abs(tmp1[key]["io_time"] - tmp[key]["init_io_time"]) / dt / 10
                    influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "device", key, "r/s", fields[key]["r/s"], "w/s", fields[key]["w/s"], "rkB/s", fields[key]["rkB/s"], "wkB/s", fields[key]["wkB/s"], "rrqms/s", fields[key]["rrqms/s"], "wrqms/s", fields[key]["wrqms/s"], "io/s", fields[key]["io/s"], "util", fields[key]["util"]) + "\n"
            # send data to InfluxDB
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
            p.communicate()
            influx_string = ""
        except KeyboardInterrupt:
            break
        except Exception:
            logging.error("iostat collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects cpu load average information
def collectLoadavg(influx_info, node, ci):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("load_avg data starting collection with a collection interval of {}s".format(ci["load_avg"]))
    measurement = "load_avg"
    tags = {"node": node}
    fields = {"load_avg": 0}
    while True:
        try:
            fields["load_avg"] = os.getloadavg()[0]
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{},'{}'='{}' '{}'='{}''".format(influx_info[0], influx_info[1], influx_info[2], measurement, "node", tags["node"], "load_avg", fields["load_avg"]), shell=True)
            p.communicate()
            time.sleep(ci["load_avg"])
        except KeyboardInterrupt:
            break
        except Exception:
            logging.error("load_avg collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects cpu utilization information
def collectOcctop(influx_info, node, ci, pc):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("occtop data starting collection with a collection interval of {}s".format(ci["occtop"]))
    measurement = "occtop"
    tags = {"node": node}
    platform_cores = pc
    influx_string = ""
    while True:
        try:
            cpu = psutil.cpu_percent(percpu=True)
            cpu_times = psutil.cpu_times_percent(percpu=True)
            fields = {}
            # sum all cpu percents
            total = float(sum(cpu))
            sys_total = 0
            fields["platform_total"] = {"usage": 0, "system": 0}
            cores = 0
            # for each core, get values and assign a tag
            for el in cpu:
                fields["usage"] = float(el)
                fields["system"] = float(cpu_times[cores][2])
                sys_total += float(cpu_times[cores][2])
                tags["core"] = "core_{}".format(cores)
                influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "core", tags["core"], "usage", fields["usage"], "system", fields["system"]) + "\n"
                if len(platform_cores) > 0:
                    if cores in platform_cores:
                        fields["platform_total"]["usage"] += float(el)
                        fields["platform_total"]["system"] += float(cpu_times[cores][2])
                cores += 1
            # add usage and system total to influx string
            if len(platform_cores) > 0:
                influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "core", "platform_total", "usage", fields["platform_total"]["usage"], "system", fields["platform_total"]["system"]) + "\n"
            influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "core", "total", "usage", total, "system", sys_total) + "\n"
            # send data to Influx
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
            p.communicate()
            influx_string = ""
            time.sleep(ci["occtop"])
        except KeyboardInterrupt:
            break
        except Exception:
            logging.error("occtop collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects network interface information
def collectNetstats(influx_info, node, ci):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("netstats data starting collection with a collection interval of {}s".format(ci["netstats"]))
    measurement = "netstats"
    tags = {"node": node}
    fields = {}
    prev_fields = {}
    Mbps = float(1000000 / 8)
    influx_string = ""
    while True:
        try:
            net = psutil.net_io_counters(pernic=True)
            # get initial data for difference calculation
            for key in net:
                prev_fields[key] = {"tx_B": net[key][0], "rx_B": net[key][1], "tx_p": net[key][2], "rx_p": net[key][3]}
            start = time.time()
            time.sleep(ci["netstats"])
            net = psutil.net_io_counters(pernic=True)
            # get new data for difference calculation
            dt = time.time() - start
            for key in net:
                tx_B = (float(net[key][0]) - float(prev_fields[key]["tx_B"]))
                tx_Mbps = tx_B / Mbps / dt
                rx_B = (float(net[key][1]) - float(prev_fields[key]["rx_B"]))
                rx_Mbps = rx_B / Mbps / dt
                tx_pps = (float(net[key][2]) - float(prev_fields[key]["tx_p"])) / dt
                rx_pps = (float(net[key][3]) - float(prev_fields[key]["rx_p"])) / dt
                # ensure no division by zero
                if rx_B > 0 and rx_pps > 0:
                    rx_packet_size = rx_B / rx_pps
                else:
                    rx_packet_size = 0
                if tx_B > 0 and tx_pps > 0:
                    tx_packet_size = tx_B / tx_pps
                else:
                    tx_packet_size = 0
                fields[key] = {"tx_mbps": tx_Mbps, "rx_mbps": rx_Mbps, "tx_pps": tx_pps, "rx_pps": rx_pps, "tx_packet_size": tx_packet_size, "rx_packet_size": rx_packet_size}
            for key in fields:
                influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "interface", key, "rx_mbps", fields[key]["rx_mbps"], "tx_mbps", fields[key]["tx_mbps"], "rx_pps", fields[key]["rx_pps"], "tx_pps", fields[key]["tx_pps"], "rx_packet_size", fields[key]["rx_packet_size"], "tx_packet_size", fields[key]["tx_packet_size"]) + "\n"
            # send data to InfluxDB
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
            p.communicate()
            influx_string = ""
        except KeyboardInterrupt:
            break
        except Exception:
            logging.error("netstats collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects postgres db size and postgres service size information
def collectPostgres(influx_info, node, ci):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("postgres data starting collection with a collection interval of {}s".format(ci["postgres"]))
    measurement = "postgres_db_size"
    measurement1 = "postgres_svc_stats"
    tags = {"node": node, "service": None, "table_schema": 0, "table": None}
    fields = {"db_size": 0, "connections": 0}
    fields1 = {"table_size": 0, "total_size": 0, "index_size": 0, "live_tuples": 0, "dead_tuples": 0}
    postgres_output = postgres_output1 = None
    influx_string = influx_string1 = ""
    good_string = False
    dbcount = 0
    BATCH_SIZE = 10

    while True:
        try:
            # make sure this is active controller, otherwise postgres queries wont work
            if isActiveController():
                while True:
                    postgres_output = Popen("sudo -u postgres psql --pset pager=off -q -t -c'SELECT datname, pg_database_size(datname) FROM pg_database WHERE datistemplate = false;'", shell=True, stdout=PIPE)
                    db_lines = postgres_output.stdout.read().replace(" ", "").strip().split("\n")
                    if db_lines == "" or db_lines is None:
                        postgres_output.kill()
                        break
                    else:
                        # for each database from the previous output
                        for line in db_lines:
                            if not line:
                                break
                            line = line.replace(" ", "").split("|")
                            tags["service"] = line[0]
                            fields["db_size"] = line[1]
                            # send DB size to InfluxDB
                            influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}'".format(measurement, "node", tags["node"], "service", tags["service"], "db_size", fields["db_size"]) + "\n"
                            # get tables for each database
                            sql = "SELECT table_schema,table_name,pg_size_pretty(table_size) AS table_size,pg_size_pretty(indexes_size) AS indexes_size,pg_size_pretty(total_size) AS total_size,live_tuples,dead_tuples FROM (SELECT table_schema,table_name,pg_table_size(table_name) AS table_size,pg_indexes_size(table_name) AS indexes_size,pg_total_relation_size(table_name) AS total_size,pg_stat_get_live_tuples(table_name::regclass) AS live_tuples,pg_stat_get_dead_tuples(table_name::regclass) AS dead_tuples FROM (SELECT table_schema,table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE') AS all_tables ORDER BY total_size DESC) AS pretty_sizes;"
                            postgres_output1 = Popen('sudo -u postgres psql --pset pager=off -q -t -d{} -c"{}"'.format(line[0], sql), shell=True, stdout=PIPE)
                            tbl_lines = postgres_output1.stdout.read().replace(" ", "").strip().split("\n")
                            for line in tbl_lines:
                                if line == "":
                                    continue
                                else:
                                    line = line.replace(" ", "").split("|")
                                    elements = list()
                                    # ensures all data is present
                                    if len(line) != 7:
                                        good_string = False
                                        break
                                    else:
                                        # do some conversions
                                        for el in line:
                                            if el.endswith("bytes"):
                                                el = int(el.replace("bytes", ""))
                                            elif el.endswith("kB"):
                                                el = el.replace("kB", "")
                                                el = int(el) * 1000
                                            elif el.endswith("MB"):
                                                el = el.replace("MB", "")
                                                el = int(el) * 1000000
                                            elif el.endswith("GB"):
                                                el = el.replace("GB", "")
                                                el = int(el) * 1000000000
                                            elements.append(el)
                                        tags["table_schema"] = elements[0]
                                        tags["table"] = elements[1]
                                        fields1["table_size"] = int(elements[2])
                                        fields1["index_size"] = int(elements[3])
                                        fields1["total_size"] = int(elements[4])
                                        fields1["live_tuples"] = int(elements[5])
                                        fields1["dead_tuples"] = int(elements[6])
                                        influx_string1 += "{},'{}'='{}','{}'='{}','{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}'".format(measurement1, "node", tags["node"], "service", tags["service"], "table_schema", tags["table_schema"], "table", tags["table"], "table_size", fields1["table_size"], "index_size", fields1["index_size"], "total_size", fields1["total_size"], "live_tuples", fields1["live_tuples"], "dead_tuples", fields1["dead_tuples"]) + "\n"
                                        good_string = True
			    dbcount += 1
			    if dbcount == BATCH_SIZE and good_string:
				# Curl will barf if the batch is too large
				p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string1), shell=True)
				p.communicate()
			       	influx_string1 = ""
				dbcount = 0
                        if good_string:
                            # send table data to InfluxDB
                            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
                            p.communicate()
                            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string1), shell=True)
                            p.communicate()
                            influx_string = influx_string1 = ""
                            dbcount = 0
                            time.sleep(ci["postgres"])
                        postgres_output1.kill()
                        postgres_output.kill()
            else:
                time.sleep(20)
        except KeyboardInterrupt:
            if postgres_output is not None:
                postgres_output.kill()
            if postgres_output1 is not None:
                postgres_output1.kill()
            break
        except Exception:
            logging.error("postgres collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collect postgres connections information
def collectPostgresConnections(influx_info, node, ci, fast):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    if fast:
        logging.info("postgres_connections data starting collection with a constant collection interval")
    else:
        logging.info("postgres_connections data starting collection with a collection interval of {}s".format(ci["postgres"]))
    measurement = "postgres_connections"
    tags = {"node": node, "service": None, "state": None}
    connections_output = None
    influx_string = ""
    while True:
        try:
            # make sure this is active controller, otherwise postgres queries wont work
            if isActiveController():
                while True:
                    fields = {}
                    # outputs a list of postgres dbs and their connections
                    connections_output = Popen("sudo -u postgres psql --pset pager=off -q -c 'SELECT datname,state,count(*) from pg_stat_activity group by datname,state;'", shell=True, stdout=PIPE)
                    line = connections_output.stdout.readline()
                    if line == "" or line is None:
                        break
                    # skip header
                    connections_output.stdout.readline()
                    while True:
                        line = connections_output.stdout.readline().strip("\n")
                        if not line:
                            break
                        else:
                            line = line.replace(" ", "").split("|")
                            if len(line) != 3:
                                continue
                            else:
                                svc = line[0]
                                connections = int(line[2])
                                tags["service"] = svc
                                if svc not in fields:
                                    fields[svc] = {"active": 0, "idle": 0, "other": 0}
                                if line[1] == "active":
                                    fields[svc]["active"] = connections
                                elif line[1] == "idle":
                                    fields[svc]["idle"] = connections
                                else:
                                    fields[svc]["other"] = connections
                                influx_string += "{},'{}'='{}','{}'='{}','{}'='{}' '{}'='{}'".format(measurement, "node", tags["node"], "service", tags["service"], "state", "active", "connections", fields[svc]["active"]) + "\n"
                                influx_string += "{},'{}'='{}','{}'='{}','{}'='{}' '{}'='{}'".format(measurement, "node", tags["node"], "service", tags["service"], "state", "idle", "connections", fields[svc]["idle"]) + "\n"
                                influx_string += "{},'{}'='{}','{}'='{}','{}'='{}' '{}'='{}'".format(measurement, "node", tags["node"], "service", tags["service"], "state", "other", "connections", fields[svc]["other"]) + "\n"

                    # send data to InfluxDB
                    p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
                    p.communicate()
                    influx_string = ""
                    connections_output.kill()
                    if fast:
                        pass
                    else:
                        time.sleep(ci["postgres"])
            else:
                time.sleep(20)
        except KeyboardInterrupt:
            if connections_output is not None:
                connections_output.kill()
            break
        except Exception:
            logging.error("postgres_connections collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects rabbitmq information
def collectRabbitMq(influx_info, node, ci):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("rabbitmq data starting collection with a collection interval of {}s".format(ci["rabbitmq"]))
    measurement = "rabbitmq"
    tags = OrderedDict([("node", node)])
    rabbitmq_output = None
    while True:
        try:
            # make sure this is active controller, otherwise rabbit queries wont work
            if isActiveController():
                while True:
                    fields = OrderedDict([])
                    rabbitmq_output = Popen("sudo rabbitmqctl -n rabbit@localhost status", shell=True, stdout=PIPE)
                    # needed data starts where output = '{memory,['
                    line = rabbitmq_output.stdout.readline()
                    # if no data is returned, exit
                    if line == "" or line is None:
                        rabbitmq_output.kill()
                        break
                    else:
                        line = rabbitmq_output.stdout.read().strip("\n").split("{memory,[")
                        if len(line) != 2:
                            rabbitmq_output.kill()
                            break
                        else:
                            # remove brackets from data
                            info = line[1].replace(" ", "").replace("{", "").replace("}", "").replace("\n", "").replace("[", "").replace("]", "").split(",")
                            for i in range(len(info) - 3):
                                if info[i].endswith("total"):
                                    info[i] = info[i].replace("total", "memory_total")
                                # some data needs string manipulation
                                if info[i].startswith("clustering") or info[i].startswith("amqp"):
                                    info[i] = "listeners_" + info[i]
                                if info[i].startswith("total_"):
                                    info[i] = "descriptors_" + info[i]
                                if info[i].startswith("limit") or info[i].startswith("used"):
                                    info[i] = "processes_" + info[i]
                                if info[i].replace("_", "").isalpha() and info[i + 1].isdigit():
                                    fields[info[i]] = info[i + 1]
                            s = generateString(measurement, tags.keys(), tags.values(), fields.keys(), fields.values())
                            if s is None:
                                rabbitmq_output.kill()
                            else:
                                # send data to InfluxDB
                                p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], s), shell=True)
                                p.communicate()
                                time.sleep(ci["rabbitmq"])
                                rabbitmq_output.kill()
            else:
                time.sleep(20)
        except KeyboardInterrupt:
            if rabbitmq_output is not None:
                rabbitmq_output.kill()
            break
        except Exception:
            logging.error("rabbitmq collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects rabbitmq messaging information
def collectRabbitMqSvc(influx_info, node, ci, services):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("rabbitmq_svc data starting collection with a collection interval of {}s".format(ci["rabbitmq"]))
    measurement = "rabbitmq_svc"
    tags = {"node": node, "service": None}
    fields = {"messages": 0, "messages_ready": 0, "messages_unacknowledged": 0, "memory": 0, "consumers": 0}
    rabbitmq_svc_output = None
    good_string = False
    influx_string = ""
    while True:
        try:
            # make sure this is active controller, otherwise rabbit queries wont work
            if isActiveController():
                while True:
                    rabbitmq_svc_output = Popen("sudo rabbitmqctl -n rabbit@localhost list_queues name messages messages_ready messages_unacknowledged memory consumers", shell=True, stdout=PIPE)
                    # # if no data is returned, exit
                    if rabbitmq_svc_output.stdout.readline() == "" or rabbitmq_svc_output.stdout.readline() is None:
                        rabbitmq_svc_output.kill()
                        break
                    else:
                        for line in rabbitmq_svc_output.stdout:
                            line = line.split()
                            if not line:
                                break
                            else:
                                if len(line) != 6:
                                    good_string = False
                                    break
                                else:
                                    # read line and fill fields
                                    if line[0] in services:
                                        tags["service"] = line[0]
                                        fields["messages"] = line[1]
                                        fields["messages_ready"] = line[2]
                                        fields["messages_unacknowledged"] = line[3]
                                        fields["memory"] = line[4]
                                        fields["consumers"] = line[5]
                                        influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "service", tags["service"], "messages", fields["messages"], "messages_ready", fields["messages_ready"], "messages_unacknowledged", fields["messages_unacknowledged"], "memory", fields["memory"], "consumers", fields["consumers"]) + "\n"
                                        good_string = True
                        if good_string:
                            # send data to InfluxDB
                            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
                            p.communicate()
                            influx_string = ""
                            time.sleep(ci["rabbitmq"])
                        rabbitmq_svc_output.kill()
            else:
                time.sleep(20)
        except KeyboardInterrupt:
            if rabbitmq_svc_output is not None:
                rabbitmq_svc_output.kill()
            break
        except Exception:
            logging.error("rabbitmq_svc collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects open file information
def collectFilestats(influx_info, node, ci, services, syseng_services, exclude_list, skip_list, collect_all):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("filestats data starting collection with a collection interval of {}s".format(ci["filestats"]))
    measurement = "filestats"
    tags = {"node": node}
    influx_string = ""
    while True:
        try:
            fields = {}
            # fill dict with services from engtools.conf
            if collect_all is False:
                for svc in services:
                    fields[svc] = {"read/write": 0, "write": 0, "read": 0}
                fields["static_syseng"] = {"read/write": 0, "write": 0, "read": 0}
                fields["live_syseng"] = {"read/write": 0, "write": 0, "read": 0}
            fields["total"] = {"read/write": 0, "write": 0, "read": 0}
            for process in os.listdir("/proc/"):
                if process.isdigit():
                    # sometimes the process dies before reading its info
                    try:
                        svc = psutil.Process(int(process)).name()
                        svc = svc.split()[0].replace("(", "").replace(")", "").strip(":").split("/")[-1]
                    except Exception:
                        continue
                    if collect_all is False:
                        if svc in services:
                            try:
                                p = Popen("ls -l /proc/{}/fd".format(process), shell=True, stdout=PIPE)
                                p.stdout.readline()
                                while True:
                                    line = p.stdout.readline().strip("\n").split()
                                    if not line:
                                        break
                                    else:
                                        priv = line[0]
                                        if priv[1] == "r" and priv[2] == "w":
                                            fields[svc]["read/write"] += 1
                                            fields["total"]["read/write"] += 1
                                        elif priv[1] == "r" and priv[2] != "w":
                                            fields[svc]["read"] += 1
                                            fields["total"]["read"] += 1
                                        elif priv[1] != "r" and priv[2] == "w":
                                            fields[svc]["write"] += 1
                                            fields["total"]["write"] += 1
                            except Exception:
                                p.kill()
                                continue
                            p.kill()

                        elif svc in syseng_services:
                            try:
                                p = Popen("ls -l /proc/{}/fd".format(process), shell=True, stdout=PIPE)
                                p.stdout.readline()
                                while True:
                                    line = p.stdout.readline().strip("\n").split()
                                    if not line:
                                        break
                                    else:
                                        priv = line[0]
                                        if svc == "live_stream.py":
                                            if priv[1] == "r" and priv[2] == "w":
                                                fields["live_syseng"]["read/write"] += 1
                                                fields["total"]["read/write"] += 1
                                            elif priv[1] == "r" and priv[2] != "w":
                                                fields["live_syseng"]["read"] += 1
                                                fields["total"]["read"] += 1
                                            elif priv[1] != "r" and priv[2] == "w":
                                                fields["live_syseng"]["write"] += 1
                                                fields["total"]["write"] += 1
                                        else:
                                            if priv[1] == "r" and priv[2] == "w":
                                                fields["static_syseng"]["read/write"] += 1
                                                fields["total"]["read/write"] += 1
                                            elif priv[1] == "r" and priv[2] != "w":
                                                fields["static_syseng"]["read"] += 1
                                                fields["total"]["read"] += 1
                                            elif priv[1] != "r" and priv[2] == "w":
                                                fields["static_syseng"]["write"] += 1
                                                fields["total"]["write"] += 1
                            except Exception:
                                p.kill()
                                continue
                            p.kill()

                    else:
                        # remove garbage processes
                        if svc in exclude_list or svc in skip_list or svc.startswith("-") or svc.endswith("-") or svc[0].isdigit() or svc[-1].isdigit() or svc[0].isupper():
                            continue
                        elif svc not in fields:
                            fields[svc] = {"read/write": 0, "write": 0, "read": 0}
                        try:
                            p = Popen("ls -l /proc/{}/fd".format(process), shell=True, stdout=PIPE)
                            p.stdout.readline()
                            while True:
                                line = p.stdout.readline().strip("\n").split()
                                if not line:
                                    break
                                else:
                                    priv = line[0]
                                    if priv[1] == "r" and priv[2] == "w":
                                        fields[svc]["read/write"] += 1
                                        fields["total"]["read/write"] += 1
                                    elif priv[1] == "r" and priv[2] != "w":
                                        fields[svc]["read"] += 1
                                        fields["total"]["read"] += 1
                                    elif priv[1] != "r" and priv[2] == "w":
                                        fields[svc]["write"] += 1
                                        fields["total"]["write"] += 1
                            if fields[svc]["read/write"] == 0 and fields[svc]["read"] == 0 and fields[svc]["write"] == 0:
                                del fields[svc]
                        except Exception:
                            p.kill()
                            continue
                        p.kill()
            for key in fields.keys():
                influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "service", key, "read/write", fields[key]["read/write"], "write", fields[key]["write"], "read", fields[key]["read"]) + "\n"
                # send data to InfluxDB
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
            p.communicate()
            influx_string = ""
            time.sleep(ci["filestats"])
        except KeyboardInterrupt:
            break
        except Exception:
            logging.error("filestats collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects vshell information
def collectVswitch(influx_info, node, ci):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("vswitch data starting collection with a collection interval of {}s".format(ci["vswitch"]))
    measurement = "vswitch"
    tags = OrderedDict([("node", node), ("engine", 0)])
    tags1 = OrderedDict([("node", node), ("port", 0)])
    tags2 = OrderedDict([("node", node), ("interface", 0)])
    fields = OrderedDict([("cpuid", 0), ("rx_packets", 0), ("tx_packets", 0), ("rx_discard", 0), ("tx_discard", 0), ("tx_disabled", 0), ("tx_overflow", 0), ("tx_timeout", 0), ("usage", 0)])
    fields1 = OrderedDict([("rx_packets", 0), ("tx_packets", 0), ("rx_bytes", 0), ("tx_bytes", 0), ("tx_errors", 0), ("rx_errors", 0), ("rx_nombuf", 0)])
    fields2 = OrderedDict([("rx_packets", 0), ("tx_packets", 0), ("rx_bytes", 0), ("tx_bytes", 0), ("tx_errors", 0), ("rx_errors", 0), ("tx_discards", 0), ("rx_discards", 0), ("rx_floods", 0), ("rx_no_vlan", 0)])
    vshell_engine_stats_output = vshell_port_stats_output = vshell_interface_stats_output = None
    influx_string = ""
    while True:
        try:
            vshell_engine_stats_output = Popen("vshell engine-stats-list", shell=True, stdout=PIPE)
            # skip first few lines
            vshell_engine_stats_output.stdout.readline()
            vshell_engine_stats_output.stdout.readline()
            vshell_engine_stats_output.stdout.readline()
            while True:
                line = vshell_engine_stats_output.stdout.readline().replace("|", "").split()
                if not line:
                    break
                # skip lines like +++++++++++++++++++++++++++++
                elif line[0].startswith("+"):
                    continue
                else:
                    # get info from output
                    i = 2
                    tags["engine"] = line[1]
                    for key in fields:
                        fields[key] = line[i].strip("%")
                        i += 1
                    influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}'".format(measurement, tags.keys()[0], tags.values()[0], tags.keys()[1], tags.values()[1], fields.keys()[0], fields.values()[0], fields.keys()[1], fields.values()[1], fields.keys()[2], fields.values()[2], fields.keys()[3], fields.values()[3], fields.keys()[4], fields.values()[4], fields.keys()[5], fields.values()[5], fields.keys()[6], fields.values()[6], fields.keys()[7], fields.values()[7], fields.keys()[8], fields.values()[8]) + "\n"
            vshell_engine_stats_output.kill()
            vshell_port_stats_output = Popen("vshell port-stats-list", shell=True, stdout=PIPE)
            vshell_port_stats_output.stdout.readline()
            vshell_port_stats_output.stdout.readline()
            vshell_port_stats_output.stdout.readline()
            while True:
                line = vshell_port_stats_output.stdout.readline().replace("|", "").split()
                if not line:
                    break
                elif line[0].startswith("+"):
                    continue
                else:
                    i = 3
                    tags1["port"] = line[1]
                    for key in fields1:
                        fields1[key] = line[i].strip("%")
                        i += 1
                    influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}'".format(measurement, tags1.keys()[0], tags1.values()[0], tags1.keys()[1], tags1.values()[1], fields1.keys()[0], fields1.values()[0], fields1.keys()[1], fields1.values()[1], fields1.keys()[2], fields1.values()[2], fields1.keys()[3], fields1.values()[3], fields1.keys()[4], fields1.values()[4], fields1.keys()[5], fields1.values()[5], fields1.keys()[6], fields1.values()[6]) + "\n"
            vshell_port_stats_output.kill()
            vshell_interface_stats_output = Popen("vshell interface-stats-list", shell=True, stdout=PIPE)
            vshell_interface_stats_output.stdout.readline()
            vshell_interface_stats_output.stdout.readline()
            vshell_interface_stats_output.stdout.readline()
            while True:
                line = vshell_interface_stats_output.stdout.readline().replace("|", "").split()
                if not line:
                    break
                elif line[0].startswith("+"):
                    continue
                else:
                    if line[2] == "ethernet" and line[3].startswith("eth"):
                        i = 4
                        tags2["interface"] = line[3]
                        for key in fields2:
                            fields2[key] = line[i].strip("%")
                            i += 1
                        influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}','{}'='{}'".format(measurement, tags2.keys()[0], tags2.values()[0], tags2.keys()[1], tags2.values()[1], fields2.keys()[0], fields2.values()[0], fields2.keys()[1], fields2.values()[1], fields2.keys()[2], fields2.values()[2], fields2.keys()[3], fields2.values()[3], fields2.keys()[4], fields2.values()[4], fields2.keys()[5], fields2.values()[5], fields2.keys()[6], fields2.values()[6], fields2.keys()[7], fields2.values()[7], fields2.keys()[8], fields2.values()[8], fields2.keys()[9], fields2.values()[9]) + "\n"
                    else:
                        continue
            vshell_interface_stats_output.kill()
            # send data to InfluxDB
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
            p.communicate()
            influx_string = ""
            time.sleep(ci["vswitch"])
        except KeyboardInterrupt:
            if vshell_engine_stats_output is not None:
                vshell_engine_stats_output.kill()
            if vshell_port_stats_output is not None:
                vshell_port_stats_output.kill()
            if vshell_interface_stats_output is not None:
                vshell_interface_stats_output.kill()
            break
        except Exception:
            logging.error("vswitch collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)


# collects the number of cores
def collectCpuCount(influx_info, node, ci):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("cpu_count data starting collection with a collection interval of {}s".format(ci["cpu_count"]))
    measurement = "cpu_count"
    tags = {"node": node}
    while True:
        try:
            fields = {"cpu_count": cpu_count()}
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{},'{}'='{}' '{}'='{}''".format(influx_info[0], influx_info[1], influx_info[2], measurement, "node", tags["node"], "cpu_count", fields["cpu_count"]), shell=True)
            p.communicate()
            time.sleep(ci["cpu_count"])
        except KeyboardInterrupt:
            break
        except Exception:
            logging.error("cpu_count collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))

def collectApiStats(influx_info, node, ci, services, db_port, rabbit_port):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    logging.info("api_request data starting collection with a collection interval of {}s".format(ci["cpu_count"]))
    measurement = "api_requests"
    tags = {"node": node}
    influx_string = ""
    lsof_args = ['lsof', '-Pn', '-i', 'tcp']
    while True:
        try:
            fields = {}
            lsof_result = Popen(lsof_args, shell=False, stdout=PIPE)
            lsof_lines = list()
            while True:
                line = lsof_result.stdout.readline().strip("\n")
                if not line:
                    break
                lsof_lines.append(line)
            lsof_result.kill()
            for name, service in services.iteritems():
                pid_list = list()
                check_pid = False
                if name == "keystone-public":
                    check_pid = True
                    ps_result = Popen("pgrep -f --delimiter=' ' keystone-public", shell=True, stdout=PIPE)
                    pid_list = ps_result.stdout.readline().strip().split(' ')
                    ps_result.kill()
                elif name == "gnocchi-api":
                    check_pid = True
                    ps_result = Popen("pgrep -f --delimiter=' ' gnocchi-api", shell=True, stdout=PIPE)
                    pid_list = ps_result.stdout.readline().strip().split(' ')
                    ps_result.kill()
                api_count = 0
                db_count = 0
                rabbit_count = 0
                for line in lsof_lines:
                    if service['name'] is not None and service['name'] in line and (not check_pid or any(pid in line for pid in pid_list)):
                        if service['api-port'] is not None and service['api-port'] in line:
                            api_count += 1
                        elif db_port is not None and db_port in line:
                            db_count += 1
                        elif rabbit_port is not None and rabbit_port in line:
                            rabbit_count += 1
                fields[name] = {"api": api_count, "db": db_count, "rabbit": rabbit_count}
                influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "service", name, "api", fields[name]["api"], "db", fields[name]["db"], "rabbit", fields[name]["rabbit"]) + "\n"
            p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
            p.communicate()
            influx_string = ""
        except KeyboardInterrupt:
            break
        except Exception:
            logging.error("api_request collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
            time.sleep(3)

# returns the cores dedicated to platform use
def getPlatformCores(node, cpe):
    if cpe is True or node.startswith("compute"):
        logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
        core_list = list()
        try:
            with open("/etc/nova/compute_reserved.conf", "r") as f:
                for line in f:
                    if line.startswith("PLATFORM_CPU_LIST"):
                        core_list = line.split("=")[1].replace("\"", "").strip("\n").split(",")
                        core_list = [int(x) for x in core_list]
                        return core_list
        except Exception:
            logging.warning("skipping platform specific collection for {} due to error: {}".format(node, sys.exc_info()))
            return core_list
    else:
        return []


# determine if controller is active/standby
def isActiveController():
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    try:
        o = Popen("sm-dump", shell=True, stdout=PIPE)
        o.stdout.readline()
        o.stdout.readline()
        # read line for active/standby
        l = o.stdout.readline().strip("\n").split()
        per = l[1]
        o.kill()
        if per == "active":
            return True
        else:
            return False
    except Exception:
        if o is not None:
            o.kill()
        logging.error("sm-dump command could not be called properly. This is usually caused by a swact. Trying again on next call: {}".format(sys.exc_info()))
        return False


# checks whether the duration param has been set. If set, sleep; then kill processes upon waking up
def checkDuration(duration):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    if duration is None:
        return None
    else:
        time.sleep(duration)
        print "Duration interval has ended. Killing processes now"
        logging.warning("Duration interval has ended. Killing processes now")
        raise KeyboardInterrupt


# kill all processes and log each death
def killProcesses(tasks):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    for t in tasks:
        try:
            logging.info("{} data stopped collection".format(str(t.name)))
            t.terminate()
        except Exception:
            continue


# create database in InfluxDB and add it to Grafana
def createDB(influx_info, grafana_port, grafana_api_key):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    p = None
    try:
        logging.info("Adding database to InfluxDB and Grafana")
        # create database in InfluxDB if not already created. Will NOT overwrite previous db
        p = Popen("curl -s -XPOST 'http://'{}':'{}'/query' --data-urlencode 'q=CREATE DATABASE {}'".format(influx_info[0], influx_info[1], influx_info[2]), shell=True, stdout=PIPE)
        response = p.stdout.read().strip("\n")
        if response == "":
            raise Exception("An error occurred while creating the database: Please make sure the Grafana and InfluxDB services are running")
        else:
            logging.info("InfluxDB response: {}".format(response))
        p.kill()

        # add database to Grafana
        grafana_db = '{"name":"%s", "type":"influxdb", "url":"http://%s:%s", "access":"proxy", "isDefault":false, "database":"%s"}' % (influx_info[2], influx_info[0], influx_info[1], influx_info[2])
        p = Popen("curl -s 'http://{}:{}/api/datasources' -H 'Accept: application/json' -H 'Content-Type: application/json' -H 'Authorization: Bearer {}' --data-binary '{}'".format(influx_info[0], grafana_port, grafana_api_key, grafana_db), shell=True, stdout=PIPE)
        response = p.stdout.read().strip("\n")
        if response == "":
            raise Exception("An error occurred while creating the database: Please make sure the Grafana and InfluxDB services are running")
        else:
            logging.info("Grafana response: {}".format(response))
        p.kill()
    except KeyboardInterrupt:
        if p is not None:
            p.kill()
    except Exception as e:
        print e.message
        sys.exit(0)


# delete database from InfluxDB and remove it from Grafana
def deleteDB(influx_info, grafana_port, grafana_api_key):
    logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
    p = None
    try:
        answer = str(raw_input("\nAre you sure you would like to delete {}? (Y/N): ".format(influx_info[2]))).lower()
    except Exception:
        answer = None
    if answer is None or answer == "" or answer == "y" or answer == "yes":
        try:
            logging.info("Removing database from InfluxDB and Grafana")
            print "Removing database from InfluxDB and Grafana. Please wait..."
            # delete database from InfluxDB
            p = Popen("curl -s -XPOST 'http://'{}':'{}'/query' --data-urlencode 'q=DROP DATABASE {}'".format(influx_info[0], influx_info[1], influx_info[2]), shell=True, stdout=PIPE)
            response = p.stdout.read().strip("\n")
            if response == "":
                raise Exception("An error occurred while removing the database: Please make sure the Grafana and InfluxDB services are running")
            else:
                logging.info("InfluxDB response: {}".format(response))
            p.kill()

            # get database ID for db removal
            p = Popen("curl -s -G 'http://{}:{}/api/datasources/id/{}' -H 'Accept: application/json' -H 'Content-Type: application/json' -H 'Authorization: Bearer {}'".format(influx_info[0], grafana_port, influx_info[2], grafana_api_key), shell=True, stdout=PIPE)
            id = p.stdout.read().split(":")[1].strip("}")
            if id == "":
                raise Exception("An error occurred while removing the database: Could not determine the database ID")
            p.kill()

            # remove database from Grafana
            p = Popen("curl -s -XDELETE 'http://{}:{}/api/datasources/{}' -H 'Accept: application/json' -H 'Content-Type: application/json' -H 'Authorization: Bearer {}'".format(influx_info[0], grafana_port, id, grafana_api_key), shell=True, stdout=PIPE)
            response = p.stdout.read().strip("\n")
            if response == "":
                raise Exception("An error occurred while removing the database: Please make sure the Grafana and InfluxDB services are running")
            else:
                logging.info("Grafana response: {}".format(response))
            p.kill()
        except KeyboardInterrupt:
            if p is not None:
                p.kill()
        except Exception as e:
            print e.message
        sys.exit(0)


# used for output log
def appendToFile(file, content):
    with open(file, "a") as f:
        fcntl.flock(f, fcntl.LOCK_EX)
        f.write(content + '\n')
        fcntl.flock(f, fcntl.LOCK_UN)


# main program
if __name__ == "__main__":
    # make sure user is root
    if os.geteuid() != 0:
        print "Must be run as root!\n"
        sys.exit(0)

    # initialize variables
    cpe_lab = False
    influx_ip = influx_port = influx_db = ""
    external_if = ""
    influx_info = list()
    grafana_port = ""
    grafana_api_key = ""
    controller_services = list()
    compute_services = list()
    storage_services = list()
    rabbit_services = list()
    common_services = list()
    services = {}
    live_svc = ("live_stream.py",)
    collection_intervals = {"memtop": None, "memstats": None, "occtop": None, "schedtop": None, "load_avg": None, "cpu_count": None, "diskstats": None, "iostat": None, "filestats": None, "netstats": None, "postgres": None, "rabbitmq": None, "vswitch": None}
    duration = None
    unconverted_duration = ""
    collect_api_requests = False
    api_requests = ""
    auto_delete_db = False
    delete_db = ""
    collect_all_services = False
    all_services = ""
    fast_postgres_connections = False
    fast_postgres = ""
    config = ConfigParser.ConfigParser()

    node = os.popen("hostname").read().strip("\n")

    # get info from engtools.conf
    try:
        conf_file = ""
        if "engtools.conf" in tuple(os.listdir(os.getcwd())):
            conf_file = os.getcwd() + "/engtools.conf"
        elif "engtools.conf" in tuple(os.listdir("/etc/engtools/")):
            conf_file = "/etc/engtools/engtools.conf"
        config.read(conf_file)
        if config.get("LabConfiguration", "CPE_LAB").lower() == "y" or config.get("LabConfiguration", "CPE_LAB").lower() == "yes":
            cpe_lab = True
        if node.startswith("controller"):
            external_if = config.get("CollectInternal", "{}_EXTERNAL_INTERFACE".format(node.upper().replace("-", "")))
        influx_ip = config.get("RemoteServer", "INFLUX_IP")
        influx_port = config.get("RemoteServer", "INFLUX_PORT")
        influx_db = config.get("RemoteServer", "INFLUX_DB")
        grafana_port = config.get("RemoteServer", "GRAFANA_PORT")
        grafana_api_key = config.get("RemoteServer", "GRAFANA_API_KEY")
        duration = config.get("LiveStream", "DURATION")
        unconverted_duration = config.get("LiveStream", "DURATION")
        api_requests = config.get("AdditionalOptions", "API_REQUESTS")
        delete_db = config.get("AdditionalOptions", "AUTO_DELETE_DB")
        all_services = config.get("AdditionalOptions", "ALL_SERVICES")
        fast_postgres = config.get("AdditionalOptions", "FAST_POSTGRES_CONNECTIONS")
        # additional options
        if api_requests.lower() == "y" or api_requests.lower() == "yes":
            collect_api_requests = True
        if delete_db.lower() == "y" or delete_db.lower() == "yes":
            auto_delete_db = True
        if all_services.lower() == "y" or all_services.lower() == "yes":
            collect_all_services = True
        if fast_postgres.lower() == "y" or fast_postgres.lower() == "yes":
            fast_postgres_connections = True
        # convert duration into seconds
        if duration == "":
            duration = None
        elif duration.endswith("s") or duration.endswith("S"):
            duration = duration.strip("s")
            duration = duration.strip("S")
            duration = int(duration)
        elif duration.endswith("m") or duration.endswith("M"):
            duration = duration.strip("m")
            duration = duration.strip("M")
            duration = int(duration) * 60
        elif duration.endswith("h") or duration.endswith("H"):
            duration = duration.strip("h")
            duration = duration.strip("H")
            duration = int(duration) * 3600
        elif duration.endswith("d") or duration.endswith("D"):
            duration = duration.strip("d")
            duration = duration.strip("D")
            duration = int(duration) * 3600 * 24
        controller_services = tuple(config.get("ControllerServices", "CONTROLLER_SERVICE_LIST").split())
        compute_services = tuple(config.get("ComputeServices", "COMPUTE_SERVICE_LIST").split())
        storage_services = tuple(config.get("StorageServices", "STORAGE_SERVICE_LIST").split())
        rabbit_services = tuple(config.get("RabbitmqServices", "RABBITMQ_QUEUE_LIST").split())
        common_services = tuple(config.get("CommonServices", "COMMON_SERVICE_LIST").split())
        static_svcs = tuple(config.get("StaticServices", "STATIC_SERVICE_LIST").split())
        openstack_services = tuple(config.get("OpenStackServices", "OPEN_STACK_SERVICE_LIST").split())
        skip_list = tuple(config.get("SkipList", "SKIP_LIST").split())
        exclude_list = tuple(config.get("ExcludeList", "EXCLUDE_LIST").split())
        # get collection intervals
        for i in config.options("Intervals"):
            if config.get("Intervals", i) == "" or config.get("Intervals", i) is None:
                collection_intervals[i] = None
            else:
                collection_intervals[i] = int(config.get("Intervals", i))
        # get api-stats services
        DB_PORT_NUMBER = config.get("ApiStatsConstantPorts", "DB_PORT_NUMBER")
        RABBIT_PORT_NUMBER = config.get("ApiStatsConstantPorts", "RABBIT_PORT_NUMBER")
        SERVICES = OrderedDict()
        SERVICES_INFO = tuple(config.get("ApiStatsServices", "API_STATS_STRUCTURE").split('|'))
        for service_string in SERVICES_INFO:
            service_tuple = tuple(service_string.split(';'))
            if service_tuple[2] != "" and service_tuple[2] != None:
                SERVICES[service_tuple[0]] = {'name': service_tuple[1], 'api-port': service_tuple[2]}
            else:
                SERVICES[service_tuple[0]] = {'name': service_tuple[1], 'api-port': None}
    except Exception:
        print "An error has occurred when parsing the engtools.conf configuration file: {}".format(sys.exc_info())
        sys.exit(0)

    syseng_services = live_svc + static_svcs
    if cpe_lab is True:
        services["controller_services"] = controller_services + compute_services + storage_services + common_services
    else:
        controller_services += common_services
        compute_services += common_services
        storage_services += common_services
        services["controller_services"] = controller_services
        services["compute_services"] = compute_services
        services["storage_services"] = storage_services
        services["common_services"] = common_services
    services["syseng_services"] = syseng_services
    services["rabbit_services"] = rabbit_services

    influx_info.append(influx_ip)
    influx_info.append(influx_port)
    influx_info.append(influx_db)

    # add config options to log
    with open("/tmp/livestream.log", "w") as e:
        e.write("Configuration for {}:\n".format(node))
        e.write("-InfluxDB address: {}:{}\n".format(influx_ip, influx_port))
        e.write("-InfluxDB name: {}\n".format(influx_db))
        e.write("-CPE lab: {}\n".format(str(cpe_lab)))
        e.write(("-Collect API requests: {}\n".format(str(collect_api_requests))))
        e.write(("-Collect all services: {}\n".format(str(collect_all_services))))
        e.write(("-Fast postgres connections: {}\n".format(str(fast_postgres_connections))))
        e.write(("-Automatic database removal: {}\n".format(str(auto_delete_db))))
        if duration is not None:
            e.write("-Live stream duration: {}\n".format(unconverted_duration))
        e.close()

    # add POSTROUTING entry to NAT table
    if cpe_lab is False:
        # check controller-0 for NAT entry. If not there, add it
        if node.startswith("controller"):
            # use first interface if not specified in engtools.conf
            if external_if == "" or external_if is None:
                p = Popen("ifconfig", shell=True, stdout=PIPE)
                external_if = p.stdout.readline().split(":")[0]
                p.kill()
            appendToFile("/tmp/livestream.log", "-External interface for {}: {}".format(node, external_if))
            # enable IP forwarding
            p = Popen("sysctl -w net.ipv4.ip_forward=1 > /dev/null", shell=True)
            p.communicate()
            p = Popen("iptables -t nat -L --line-numbers", shell=True, stdout=PIPE)
            tmp = []
            # entries need to be removed in reverse order
            for line in p.stdout:
                tmp.append(line.strip("\n"))
            for line in reversed(tmp):
                l = " ".join(line.strip("\n").split()[1:])
                # if an entry already exists, remove it
                if l.startswith("MASQUERADE tcp -- anywhere"):
                    line_number = line.strip("\n").split()[0]
                    p1 = Popen("iptables -t nat -D POSTROUTING {}".format(line_number), shell=True)
                    p1.communicate()
            p.kill()
            appendToFile("/tmp/livestream.log", "-Adding NAT information to allow compute/storage nodes to communicate with remote server\n")
            # add new entry for both InfluxDB and Grafana
            p = Popen("iptables -t nat -A POSTROUTING -p tcp -o {} -d {} --dport {} -j MASQUERADE".format(external_if, influx_ip, influx_port), shell=True)
            p.communicate()
            p = Popen("iptables -t nat -A POSTROUTING -p tcp -o {} -d {} --dport {} -j MASQUERADE".format(external_if, influx_ip, grafana_port), shell=True)
            p.communicate()

    appendToFile("/tmp/livestream.log", "\nStarting collection at {}\n".format(datetime.datetime.utcnow()))
    tasks = []

    createDB(influx_info, grafana_port, grafana_api_key)

    try:
        node_type = str(node.split("-")[0])
        # if not a standard node, run the common functions with collect_all enabled
        if node_type != "controller" and node_type != "compute" and node_type != "storage":
            node_type = "common"
            collect_all_services = True

        if collection_intervals["memstats"] is not None:
            p = Process(target=collectMemstats, args=(influx_info, node, collection_intervals, services["{}_services".format(node_type)], services["syseng_services"], openstack_services, exclude_list, skip_list, collect_all_services), name="memstats")
            tasks.append(p)
            p.start()
        if collection_intervals["schedtop"] is not None:
            p = Process(target=collectSchedtop, args=(influx_info, node, collection_intervals, services["{}_services".format(node_type)], services["syseng_services"], openstack_services, exclude_list, skip_list, collect_all_services), name="schedtop")
            tasks.append(p)
            p.start()
        if collection_intervals["filestats"] is not None:
            p = Process(target=collectFilestats, args=(influx_info, node, collection_intervals, services["{}_services".format(node_type)], services["syseng_services"], exclude_list, skip_list, collect_all_services), name="filestats")
            tasks.append(p)
            p.start()
        if collection_intervals["occtop"] is not None:
            p = Process(target=collectOcctop, args=(influx_info, node, collection_intervals, getPlatformCores(node, cpe_lab)), name="occtop")
            tasks.append(p)
            p.start()
        if collection_intervals["load_avg"] is not None:
            p = Process(target=collectLoadavg, args=(influx_info, node, collection_intervals), name="load_avg")
            tasks.append(p)
            p.start()
        if collection_intervals["cpu_count"] is not None:
            p = Process(target=collectCpuCount, args=(influx_info, node, collection_intervals), name="cpu_count")
            tasks.append(p)
            p.start()
        if collection_intervals["memtop"] is not None:
            p = Process(target=collectMemtop, args=(influx_info, node, collection_intervals), name="memtop")
            tasks.append(p)
            p.start()
        if collection_intervals["diskstats"] is not None:
            p = Process(target=collectDiskstats, args=(influx_info, node, collection_intervals), name="diskstats")
            tasks.append(p)
            p.start()
        if collection_intervals["iostat"] is not None:
            p = Process(target=collectIostat, args=(influx_info, node, collection_intervals), name="iostat")
            tasks.append(p)
            p.start()
        if collection_intervals["netstats"] is not None:
            p = Process(target=collectNetstats, args=(influx_info, node, collection_intervals), name="netstats")
            tasks.append(p)
            p.start()
        if collect_api_requests is True and node_type == "controller":
            p = Process(target=collectApiStats, args=(influx_info, node, collection_intervals, SERVICES, DB_PORT_NUMBER, RABBIT_PORT_NUMBER), name="api_requests")
            tasks.append(p)
            p.start()

        if node_type == "controller":
            if collection_intervals["postgres"] is not None:
                p = Process(target=collectPostgres, args=(influx_info, node, collection_intervals), name="postgres")
                tasks.append(p)
                p.start()
                p = Process(target=collectPostgresConnections, args=(influx_info, node, collection_intervals, fast_postgres_connections), name="postgres_connections")
                tasks.append(p)
                p.start()
            if collection_intervals["rabbitmq"] is not None:
                p = Process(target=collectRabbitMq, args=(influx_info, node, collection_intervals), name="rabbitmq")
                tasks.append(p)
                p.start()
                p = Process(target=collectRabbitMqSvc, args=(influx_info, node, collection_intervals, services["rabbit_services"]), name="rabbitmq_svc")
                tasks.append(p)
                p.start()

        if node_type == "compute" or cpe_lab is True:
            if collection_intervals["vswitch"] is not None:
                p = Process(target=collectVswitch, args=(influx_info, node, collection_intervals), name="vswitch")
                tasks.append(p)
                p.start()

        print "Sending data to InfluxDB. Please tail /tmp/livestream.log"

        checkDuration(duration)
        # give a small delay to ensure services have started
        time.sleep(3)
        for t in tasks:
            os.wait()
    except KeyboardInterrupt:
        pass
    finally:
        # end here once duration param has ended or ctrl-c is pressed
        appendToFile("/tmp/livestream.log", "\nEnding collection at {}\n".format(datetime.datetime.utcnow()))
        if tasks is not None and len(tasks) > 0:
            killProcesses(tasks)
        if auto_delete_db is True:
            deleteDB(influx_info, grafana_port, grafana_api_key)
        sys.exit(0)