It turned out, that first results portion was collected on the environment with sporadic connectivity issue between Keystone and Memcache. Valid results were collected and processed. Change-Id: I6c8f388ef380a19016c4bc9cf49582c8a8f433d1
		
			
				
	
	
		
			362 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			362 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import argparse
 | 
						|
import collections
 | 
						|
import copy
 | 
						|
import itertools
 | 
						|
import json
 | 
						|
import numpy as np
 | 
						|
import os
 | 
						|
import prettytable
 | 
						|
 | 
						|
 | 
						|
NODES_TEMPLATE = {
 | 
						|
    "DB queries": {
 | 
						|
        "total": 0,
 | 
						|
        "total_time_spent": 0,
 | 
						|
        "select1": 0,
 | 
						|
        "select1_time_spent": 0,
 | 
						|
        "real": 0,
 | 
						|
        "real_time_spent": 0,
 | 
						|
        "SELECT": {
 | 
						|
            "total": 0,
 | 
						|
            "INNER JOIN": 0,
 | 
						|
            "LEFT JOIN": 0,
 | 
						|
            "RIGHT JOIN": 0,
 | 
						|
            "FULL JOIN": 0,
 | 
						|
            "time_spent": collections.OrderedDict(),
 | 
						|
        },
 | 
						|
        "INSERT": {
 | 
						|
            "total": 0,
 | 
						|
            "time_spent": collections.OrderedDict(),
 | 
						|
        },
 | 
						|
        "UPDATE": {
 | 
						|
            "total": 0,
 | 
						|
            "time_spent": collections.OrderedDict(),
 | 
						|
        },
 | 
						|
        "DELETE": {
 | 
						|
            "total": 0,
 | 
						|
            "time_spent": collections.OrderedDict(),
 | 
						|
        },
 | 
						|
        "red_flag": {
 | 
						|
            "2joins": {
 | 
						|
                "total": 0,
 | 
						|
                "queries": [],
 | 
						|
                "time_spent": collections.OrderedDict(),
 | 
						|
            },
 | 
						|
            "3+joins": {
 | 
						|
                "total": 0,
 | 
						|
                "queries": [],
 | 
						|
                "time_spent": collections.OrderedDict(),
 | 
						|
            }
 | 
						|
        }
 | 
						|
    },
 | 
						|
    "Memcache cached operations": {},
 | 
						|
    "Context cached operations": {},
 | 
						|
    "Cached time spent": collections.OrderedDict(),
 | 
						|
}
 | 
						|
 | 
						|
NODES = copy.deepcopy(NODES_TEMPLATE)
 | 
						|
OUTLIER_QUERIES = {}
 | 
						|
 | 
						|
 | 
						|
def define_node(node):
 | 
						|
    if node["info"]["project"] != "keystone":
 | 
						|
        return
 | 
						|
 | 
						|
    if node["info"]["name"] not in ["db", "cache", "memoize"]:
 | 
						|
        return
 | 
						|
 | 
						|
    time_spent = node["info"]["finished"] - node["info"]["started"]
 | 
						|
 | 
						|
    if node["info"]["name"] == "db":
 | 
						|
        process_db_calls(node, time_spent)
 | 
						|
    elif node["info"]["name"] == "cache":
 | 
						|
        if not node["children"]:
 | 
						|
            process_cache_calls(node, time_spent, "cache")
 | 
						|
        else:
 | 
						|
            for child in node["children"]:
 | 
						|
                define_node(child)
 | 
						|
    elif node["info"]["name"] == "memoize":
 | 
						|
        if not node["children"] or len(node["children"]) == 1:
 | 
						|
            process_cache_calls(node, time_spent)
 | 
						|
        else:
 | 
						|
            for child in node["children"]:
 | 
						|
                define_node(child)
 | 
						|
 | 
						|
 | 
						|
def process_cache_calls(node, time_spent, prefix="memoize"):
 | 
						|
    cache_info = node["info"]["meta.raw_payload.%s-start" % prefix][
 | 
						|
        "info"]["fn_info"]
 | 
						|
 | 
						|
    def add_info(pref):
 | 
						|
        if not NODES["%s cached operations" % pref].get(cache_info):
 | 
						|
            NODES["%s cached operations" % pref][cache_info] = 0
 | 
						|
        NODES["%s cached operations" % pref][cache_info] += 1
 | 
						|
        if not NODES["Cached time spent"].get(time_spent):
 | 
						|
            NODES["Cached time spent"][time_spent] = []
 | 
						|
        NODES["Cached time spent"][time_spent].append(cache_info)
 | 
						|
 | 
						|
    # Liberty env, all cache is Memcached Cache
 | 
						|
    if not node["children"]:
 | 
						|
        add_info("Memcache")
 | 
						|
        return
 | 
						|
 | 
						|
    # Mitaka env with nested 2-layer cache
 | 
						|
    ctxt_cache_info = node["children"][0]
 | 
						|
    if ctxt_cache_info["children"]:
 | 
						|
        memcache_cache_info = ctxt_cache_info["children"][0]
 | 
						|
        if not memcache_cache_info["children"]:
 | 
						|
            add_info("Memcache")
 | 
						|
    else:
 | 
						|
        add_info("Context")
 | 
						|
 | 
						|
 | 
						|
def process_db_calls(node, time_spent):
 | 
						|
    NODES["DB queries"]["total"] += 1
 | 
						|
    NODES["DB queries"]["total_time_spent"] += time_spent
 | 
						|
    statement = node[
 | 
						|
        "info"]["meta.raw_payload.db-start"]["info"]["db"]["statement"]
 | 
						|
    if statement.startswith("SELECT 1"):
 | 
						|
        NODES["DB queries"]["select1"] += 1
 | 
						|
        NODES["DB queries"]["select1_time_spent"] += time_spent
 | 
						|
    else:
 | 
						|
        NODES["DB queries"]["real"] += 1
 | 
						|
        NODES["DB queries"]["real_time_spent"] += time_spent
 | 
						|
 | 
						|
        if statement.startswith("SELECT"):
 | 
						|
            process_selects(statement, time_spent)
 | 
						|
        elif statement.startswith("UPDATE"):
 | 
						|
            process_base_db_calls("UPDATE", statement, time_spent)
 | 
						|
        elif statement.startswith("INSERT"):
 | 
						|
            process_base_db_calls("INSERT", statement, time_spent)
 | 
						|
        elif statement.startswith("DELETE"):
 | 
						|
            process_base_db_calls("DELETE", statement, time_spent)
 | 
						|
 | 
						|
 | 
						|
def process_base_db_calls(command, statement, time_spent):
 | 
						|
    NODES["DB queries"][command]["total"] += 1
 | 
						|
    if not NODES["DB queries"][command]["time_spent"].get(time_spent):
 | 
						|
        NODES["DB queries"][command]["time_spent"][time_spent] = []
 | 
						|
    NODES["DB queries"][command]["time_spent"][time_spent].append(
 | 
						|
        statement)
 | 
						|
 | 
						|
 | 
						|
def process_selects(statement, time_spent):
 | 
						|
    process_base_db_calls("SELECT", statement, time_spent)
 | 
						|
 | 
						|
    ij = statement.count("INNER JOIN")
 | 
						|
    lj = statement.count("LEFT JOIN")
 | 
						|
    rj = statement.count("RIGHT JOIN")
 | 
						|
    fj = statement.count("FULL JOIN")
 | 
						|
    NODES["DB queries"]["SELECT"]["INNER JOIN"] += ij
 | 
						|
    NODES["DB queries"]["SELECT"]["LEFT JOIN"] += lj
 | 
						|
    NODES["DB queries"]["SELECT"]["RIGHT JOIN"] += rj
 | 
						|
    NODES["DB queries"]["SELECT"]["FULL JOIN"] += fj
 | 
						|
 | 
						|
    # raise red flags if too many JOINS met
 | 
						|
    if ij + lj + rj + fj == 2:
 | 
						|
        NODES["DB queries"]["red_flag"]["2joins"]["total"] += 1
 | 
						|
        NODES["DB queries"]["red_flag"]["2joins"][
 | 
						|
            "queries"].append(statement)
 | 
						|
        if not NODES["DB queries"]["red_flag"]["2joins"][
 | 
						|
                "time_spent"].get(time_spent):
 | 
						|
            NODES["DB queries"]["red_flag"]["2joins"]["time_spent"][
 | 
						|
                time_spent] = []
 | 
						|
        NODES["DB queries"]["red_flag"]["2joins"]["time_spent"][
 | 
						|
            time_spent].append(statement)
 | 
						|
    elif ij + lj + rj + fj >= 3:
 | 
						|
        NODES["DB queries"]["red_flag"]["3+joins"]["total"] += 1
 | 
						|
        NODES["DB queries"]["red_flag"]["3+joins"][
 | 
						|
            "queries"].append(statement)
 | 
						|
        if not NODES["DB queries"]["red_flag"]["3+joins"][
 | 
						|
                "time_spent"].get(time_spent):
 | 
						|
            NODES["DB queries"]["red_flag"]["3+joins"]["time_spent"][
 | 
						|
                time_spent] = []
 | 
						|
        NODES["DB queries"]["red_flag"]["3+joins"]["time_spent"][
 | 
						|
            time_spent].append(statement)
 | 
						|
 | 
						|
 | 
						|
def define_nodes(data):
 | 
						|
    for child in data["children"]:
 | 
						|
        if not child["children"] or child["info"]["name"] == "memoize":
 | 
						|
            define_node(child)
 | 
						|
        else:
 | 
						|
            define_nodes(child)
 | 
						|
 | 
						|
 | 
						|
def sort_dicts(dictionary):
 | 
						|
    new_nodes = copy.deepcopy(dictionary)
 | 
						|
    for key in ["SELECT", "INSERT", "DELETE", "UPDATE"]:
 | 
						|
        new_nodes["DB queries"][key]["time_spent"] = \
 | 
						|
            sum([k*len(v) for k, v
 | 
						|
                 in dictionary["DB queries"][key]["time_spent"].iteritems()])
 | 
						|
    for key in ["2joins", "3+joins"]:
 | 
						|
        new_nodes["DB queries"]["red_flag"][key]["time_spent"] = \
 | 
						|
            sum([k*len(v) for k, v
 | 
						|
                 in dictionary["DB queries"]["red_flag"][key][
 | 
						|
                     "time_spent"].iteritems()])
 | 
						|
    new_nodes["Cached time spent"] = \
 | 
						|
        sum([k*len(v) for k, v
 | 
						|
             in dictionary["Cached time spent"].iteritems()])
 | 
						|
    return new_nodes
 | 
						|
 | 
						|
 | 
						|
def detect_outliers(data, m=2.):
 | 
						|
    full_time_set = list(itertools.chain(*[[k] * len(v) for k, v
 | 
						|
                                           in data.iteritems()]))
 | 
						|
    dat = np.abs(full_time_set - np.median(full_time_set))
 | 
						|
    mdev = np.median(dat)
 | 
						|
    sss = dat/mdev if mdev else 0.
 | 
						|
    if mdev:
 | 
						|
        for idx, val in enumerate((sss < m).tolist()):
 | 
						|
            if not val:
 | 
						|
                for query in data[full_time_set[idx]]:
 | 
						|
                    OUTLIER_QUERIES[query] = full_time_set[idx]
 | 
						|
 | 
						|
 | 
						|
def prepare_tables(nodes):
 | 
						|
    # prepare table with common information
 | 
						|
    common_info_table = prettytable.PrettyTable(["**Metric**", "**Value**"])
 | 
						|
    common_info_table.align["**Metric**"] = "l"
 | 
						|
    common_info_table.align["**Value**"] = "l"
 | 
						|
    common_info_table.padding_width = 1
 | 
						|
    common_info_table.max_width = 100
 | 
						|
    common_info_table.header = True
 | 
						|
    common_info_table.hrules = prettytable.ALL
 | 
						|
 | 
						|
    common_info_table.add_row(["Total (*) Keystone DB queries count",
 | 
						|
                               nodes["DB queries"]["total"]])
 | 
						|
    common_info_table.add_row(["Total (*) Keystone DB queries time spent, ms",
 | 
						|
                               nodes["DB queries"]["total_time_spent"]])
 | 
						|
    common_info_table.add_row([
 | 
						|
        "Infrastructure (SELECT 1) Keystone DB queries count",
 | 
						|
        nodes["DB queries"]["select1"]])
 | 
						|
    common_info_table.add_row([
 | 
						|
        "Infrastructure (SELECT 1) Keystone DB queries time spent, ms",
 | 
						|
        nodes["DB queries"]["select1_time_spent"]])
 | 
						|
    common_info_table.add_row(["Real Keystone DB queries count",
 | 
						|
                               nodes["DB queries"]["real"]])
 | 
						|
    common_info_table.add_row(["Real Keystone DB queries time spent, ms",
 | 
						|
                               nodes["DB queries"]["real_time_spent"]])
 | 
						|
 | 
						|
    db_query_tmpl = "%s\n\n|"
 | 
						|
 | 
						|
    for key in ["SELECT", "INSERT", "DELETE", "UPDATE"]:
 | 
						|
        if nodes["DB queries"][key]["total"]:
 | 
						|
            common_info_table.add_row([
 | 
						|
                "%s Keystone DB queries count" % key,
 | 
						|
                nodes["DB queries"][key]["total"]])
 | 
						|
            common_info_table.add_row([
 | 
						|
                "%s Keystone DB queries time spent, ms" % key,
 | 
						|
                nodes["DB queries"][key]["time_spent"]])
 | 
						|
        detect_outliers(NODES["DB queries"][key]["time_spent"])
 | 
						|
 | 
						|
    # prepare table with outliers information
 | 
						|
    outliers_table = prettytable.PrettyTable(["**DB query**",
 | 
						|
                                              "**Time spent, ms**"])
 | 
						|
    outliers_table.align["**DB query**"] = "l"
 | 
						|
    outliers_table.align["**Time spent, ms**"] = "l"
 | 
						|
    outliers_table.max_width = 100
 | 
						|
    outliers_table.header = True
 | 
						|
    outliers_table.hrules = prettytable.ALL
 | 
						|
 | 
						|
    for query in OUTLIER_QUERIES:
 | 
						|
        outliers_table.add_row([db_query_tmpl % query, OUTLIER_QUERIES[query]])
 | 
						|
 | 
						|
    # prepare table with information about DB requests containing multiple
 | 
						|
    # JOIN statements inside
 | 
						|
    multi_join_queries = prettytable.PrettyTable(["**DB query**",
 | 
						|
                                                  "**Time spent, ms**"])
 | 
						|
    multi_join_queries.align["**DB query**"] = "l"
 | 
						|
    multi_join_queries.align["**Time spent, ms**"] = "l"
 | 
						|
    multi_join_queries.max_width = 100
 | 
						|
    multi_join_queries.header = True
 | 
						|
    multi_join_queries.hrules = prettytable.ALL
 | 
						|
 | 
						|
    for key in ["2joins", "3+joins"]:
 | 
						|
        for ts in NODES["DB queries"]["red_flag"][key]["time_spent"]:
 | 
						|
            for query in NODES["DB queries"]["red_flag"][key][
 | 
						|
                    "time_spent"][ts]:
 | 
						|
                multi_join_queries.add_row([db_query_tmpl % query, ts])
 | 
						|
 | 
						|
    # prepare table(s) with cache info
 | 
						|
    cache_table = prettytable.PrettyTable(["**Cache**",
 | 
						|
                                           "**Cached operations**",
 | 
						|
                                           "**args**",
 | 
						|
                                           "**kwargs**",
 | 
						|
                                           "**Times used**"])
 | 
						|
 | 
						|
    for cache in ["Memcache cached operations", "Context cached operations"]:
 | 
						|
        name_map = {
 | 
						|
            "Memcache cached operations": "Memcache",
 | 
						|
            "Context cached operations": "Local context",
 | 
						|
        }
 | 
						|
        cache_table.align["**Cache**"] = "l"
 | 
						|
        cache_table.align["**Cached operations**"] = "l"
 | 
						|
        cache_table.align["**args**"] = "l"
 | 
						|
        cache_table.align["**kwargs**"] = "l"
 | 
						|
        cache_table.align["**Times used**"] = "l"
 | 
						|
        cache_table.max_width = 100
 | 
						|
        cache_table.header = True
 | 
						|
        cache_table.hrules = prettytable.ALL
 | 
						|
 | 
						|
        for operation, times in nodes[cache].iteritems():
 | 
						|
            operation = operation[1:-1].split(", ")
 | 
						|
            cache_table.add_row([name_map[cache],
 | 
						|
                                 operation[0][1:-1],
 | 
						|
                                 ", ".join(operation[1:-1])[1:-1],
 | 
						|
                                 operation[-1][1:-1], times])
 | 
						|
 | 
						|
    return common_info_table, multi_join_queries, outliers_table, cache_table
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    parser = argparse.ArgumentParser(description='Process JSON file with '
 | 
						|
                                                 'OSprofiler output.')
 | 
						|
    parser.add_argument('path', type=str,
 | 
						|
                        help='Path to the JSON file / directory with list of '
 | 
						|
                             'JSON files with OSprofiler output')
 | 
						|
 | 
						|
    args = parser.parse_args()
 | 
						|
    global NODES
 | 
						|
    if os.path.isfile(args.path):
 | 
						|
        with open(args.path) as data_file:
 | 
						|
            data = json.load(data_file)
 | 
						|
            define_nodes(data)
 | 
						|
            nodes = sort_dicts(NODES)
 | 
						|
            common_info_table, multi_join_queries, outliers_table,\
 | 
						|
                cache_table = prepare_tables(nodes)
 | 
						|
            print(common_info_table)
 | 
						|
            print(outliers_table)
 | 
						|
            print(multi_join_queries)
 | 
						|
            print(cache_table)
 | 
						|
    elif os.path.isdir(args.path):
 | 
						|
        for item in os.listdir(args.path):
 | 
						|
            if item.endswith(".txt"):
 | 
						|
                with open(os.path.join(args.path, item)) as data_file:
 | 
						|
                    data = json.load(data_file)
 | 
						|
                    NODES = copy.deepcopy(NODES_TEMPLATE)
 | 
						|
                    define_nodes(data)
 | 
						|
                    nodes = sort_dicts(NODES)
 | 
						|
                    common_info_table, multi_join_queries, outliers_table,\
 | 
						|
                        cache_table = prepare_tables(nodes)
 | 
						|
                    item_name = \
 | 
						|
                        item.split(".")[0].replace("_", " ").capitalize() + \
 | 
						|
                        " request stats"
 | 
						|
                    print(item_name)
 | 
						|
                    print(len(item_name) * "~" + "\n")
 | 
						|
                    print("**%s**\n" % "Control plane request overlook")
 | 
						|
                    print(common_info_table)
 | 
						|
                    print("\n**%s**\n" % "Keystone DB queries outliers")
 | 
						|
                    print(outliers_table)
 | 
						|
                    print("\n**%s**\n" % "Keystone DB queries with multi "
 | 
						|
                                         "JOINs inside")
 | 
						|
                    print(multi_join_queries)
 | 
						|
                    print("\n")
 | 
						|
                    print("**Keystone cached methods stats**\n")
 | 
						|
                    print(cache_table)
 | 
						|
                    print("\n")
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 |