devstack_parser and log_node to parser folder

This commit is contained in:
Austin Clark 2015-07-22 08:23:28 -06:00
parent 56b8e829c8
commit 2c7e6d6d90
3 changed files with 464 additions and 45 deletions

View File

@ -0,0 +1,233 @@
#
# A parser for logs as generated by DevStack's `stack.sh` script.
# Logs can be generated by running:
# LOGFILE="output.log" ./stack.sh
#
# Two files will be generated:
# - test.log.(datestamp): referred to as "log"
# - test.log.(datestamp).summary.(datestamp): referred to as "summary"
#
# The full log can be parsed using `parse_log(path)` and the summary can be
# parsed using `parse_summary(path)`. Both functions will return a list of
# `LogNode` instances which can be combined and categorized using `merge()`.
#
# For testing purposes, it's recommended to use an IPython shell. Import this
# module, and call `bootstrap()` against the path to the primary logfile to get
# the fully merged list of `LogNode` instances. Nodes can be analyzed directly
# using the IPython `_repr_pretty_` functionality.
#
import os
from datetime import datetime, timedelta
from log_node import LogNode
#: The format of the timestamp prefixing each log entry
TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S.%f'
def extract_date(line):
"""
Extracts a date from the given line, returning the parsed date and
remaining contents of the line.
:param line: the line to extract a date from
:return: a tuple of the parsed date and remaining line contents
"""
date_str, message = line.split(' | ', 1)
date = datetime.strptime(date_str, TIMESTAMP_FORMAT)
return date, message.strip()
def parse_summary(summary_path):
"""
Parses a summary logfile. Summary entries are prefixed with identical
datestamps to those in the main log, but have only explicit log messages
denoting the overall execution progress.
While summary entries are also printed into the main log, the explicit
summary file is used to simplify parsing.
:param summary_path: the path to the summary file to parse
:return: a list of ordered `LogNode` instances
"""
ret = []
last_node = None
with open(summary_path, 'r') as f:
for line in f:
date, message = extract_date(line)
node = LogNode(date, message)
if last_node:
last_node.next_sibling = node
ret.append(node)
last_node = node
return ret
def parse_log(log_path):
"""
Parses a general `stack.sh` logfile, forming a full log tree based on the
hierarchy of nested commands as presented in the log.
Note that command output (that is, lines not prefixed with one or more '+'
symbols) is ignored and will not be included it the returned list of log
nodes.
:param log_path: the path to the logfile to parse
:return: a list of parsed `LogNode` instances
"""
last_depth = 1
last_node = None
# fake node to act as root stack entry
ret = LogNode(None, None)
node_stack = [ret]
with open(log_path, 'r') as f:
for line in f:
date, message = extract_date(line)
if not message.startswith('+'):
# ignore command output - we only want actual commands
continue
depth = 0
for char in message:
if char == '+':
depth += 1
else:
break
if depth - last_depth > 1:
# skip discontinuous lines (???)
continue
# adjust the stack if the depth has changed since the last node
if depth < last_depth:
node_stack.pop()
elif depth > last_depth:
node_stack.append(last_node)
node = LogNode(date, message.lstrip('+ '))
parent = node_stack[-1]
parent.children.append(node)
if last_node:
last_node.next_sibling = node
last_depth = depth
last_node = node
return ret.children
def merge(summary, log):
"""
Merges general log entries into parent categories based on their timestamp
relative to the summary output timestamp.
Note that this function is destructive and will directly modify the nodes
within the `summary` list.
:type summary: list[LogNode]
:param summary: the list of summary nodes
:type log: list[LogNode]
:param log: the list of general log nodes
:return: the original summary nodes with children set to the log nodes
"""
if not summary:
return []
current_node = summary[0]
next_node = current_node.next_sibling
for i, entry in enumerate(log):
if entry.date < current_node.date:
# skip forward until a possible summary node is reached
continue
if entry.date < next_node.date:
current_node.children.append(entry)
else:
next_node.children.append(entry)
current_node = next_node
next_node = current_node.next_sibling
return summary
def bootstrap(log_path, summary_path=None):
"""
Loads, parses, and merges the given log and summary files. The path to the
summary file will be determined automatically based on the path to the
general logfile, but it must exist within the same directory.
If the log file names are changed from their default values, a summary path
can be explicitly provided using the optional `summary_path` parameter.
:param log_path: the path to the logfile to parse
:param summary_path: optional; bypasses autodetection of path names
:return: a list of merged `LogNode` instances, or `None` if no matching
summary file can be located automatically
"""
if summary_path:
return merge(parse_summary(summary_path), parse_log(log_path))
name = os.path.basename(log_path)
directory = os.path.dirname(os.path.abspath(log_path))
for candidate in os.listdir(directory):
if candidate.startswith('{0}.summary.'.format(name)):
return merge(parse_summary(os.path.join(directory, candidate)),
parse_log(os.path.join(directory, name)))
return None
def get_command_totals(node, totals=None):
if totals is None:
totals = {}
for entry in node.traverse():
# attempt to resolve a nice looking command, ignoring any obvious
# arguments to make subcommands float to left of array
tokens = filter(lambda tok: not tok.startswith('-'),
entry.message.split())
if not tokens:
continue
# strip sudo and any variable declarations
if tokens[0] == 'sudo':
tokens.pop(0)
while '=' in tokens[0]:
tokens.pop(0)
if '/' in tokens[0]:
tokens[0] = tokens[0].split('/')[-1]
# select # of tokens to include based on the first token
token_count = {
'openstack': 2,
'read': 2,
'python': 2
}.get(tokens[0], 1)
# combine token_count tokens to generate the command to group by
combined = ' '.join(tokens[0:token_count])
if combined not in totals:
totals[combined] = timedelta()
totals[combined] += entry.duration_self
return totals

185
stackviz/parser/log_node.py Normal file
View File

@ -0,0 +1,185 @@
from inspect import getmembers
from numbers import Number
from datetime import datetime, timedelta
#: The default cutoff for log entries when pruning takes place, in seconds
DEFAULT_PRUNE_CUTOFF = 0.05
class LogNode:
"""
Represents an entry in an ordered event log, consisting of a date, message,
and an arbitrary set of child nodes.
Note that entries are assumed to be strictly sequential and linear, and all
nodes must have a correctly-set `next_sibling` pointing to the next
chronological log entry, regardless of child depth.
This class implements a custom IPython repr useful for interactive use.
"""
def __init__(self, date, message):
self.date = date
self.message = message
self.next_sibling = None
self.children = []
@property
def duration(self):
"""
Determines the overall duration for this node, beginning at this parent
node's start time through the final child's ending time.
"""
if self.children:
last_sibling = self.children[-1].next_sibling
if not last_sibling:
# if no last sibling exists, use the last child itself to
# keep as close as possible to the true value
last_sibling = self.children[-1]
diff = last_sibling.date - self.date
return diff + last_sibling.duration
else:
# attempt get self execution time
if self.next_sibling:
return self.next_sibling.date - self.date
else:
return timedelta()
@property
def duration_self(self):
if not self.next_sibling:
return timedelta()
return self.next_sibling.date - self.date
def traverse(self):
"""
A generator that will traverse all child nodes of this log tree
sequentially.
"""
for child in self.children:
yield child
for subchild in child.traverse():
yield subchild
def __repr__(self):
return "%s(%s, '%s', duration=%d)" % (
self.__class__.__name__,
self.date.strftime('%H:%M:%S.%f'),
self.message,
self.duration.total_seconds()
)
def _repr_pretty_(self, p, cycle):
tc = __import__('IPython').utils.coloransi.TermColors()
c = self.__class__.__name__
if cycle:
p.text('%s(...)' % c)
return
with p.group(4, '%s%s%s(' % (tc.Green, c, tc.Normal), ')'):
i = 0
p.breakable()
for field_name, value in reversed(getmembers(self)):
if field_name.startswith('_'):
continue
if field_name in ('next_sibling', 'traverse'):
continue
if i:
p.text(',')
p.breakable()
p.text('%s%s %s=%s ' % (
tc.Brown, field_name,
tc.DarkGray, tc.Normal))
if isinstance(value, list):
lp = (tc.Cyan, tc.Normal)
with p.group(4, '%s[%s' % lp, '%s]%s' % lp):
l_first = True
count = 0
for x in value:
if not l_first:
p.text(', ')
p.breakable()
p.pretty(x)
l_first = False
count += 1
if count > 10:
p.breakable()
p.text('%s... %d more ...%s' % (
tc.LightGreen,
len(value) - count,
tc.Normal
))
break
else:
if value is None:
p.text(tc.Blue)
elif isinstance(value, basestring):
p.text(tc.Red)
elif isinstance(value, Number):
p.text(tc.DarkGray)
p.pretty(value)
p.text(tc.Normal)
i += 1
def prune(nodes, cutoff=DEFAULT_PRUNE_CUTOFF, fill=None):
"""
Prunes the given list of `LogNode` instances, removing nodes whose duration
is less than the given cutoff value. If a `fill` value is provided, removed
nodes will be replaced with a single filler value accounting for the lost
duration. This filler value will be inserted at the end of the list and will
not be properly linked to other values.
Note that returned values will not necessarily be a continuous list of
nodes. The original list will remain unchanged; sibling and child references
will not be modified to point to account any modified, removed, or added
nodes.
:param nodes: the list of log nodes to prune
:type nodes: list[LogNode]
:param fill: if set, replace removed nodes with a single larger node
:type fill: str | None
:param cutoff: the minimum duration for nodes that will be retained in the
tree
:type cutoff: float
:return: a (potentially) reduced list of nodes
"""
ret = []
fill_amount = 0.0
for entry in nodes:
d = entry.duration.total_seconds()
if d >= cutoff:
ret.append(entry)
else:
fill_amount += d
if fill:
# create a dummy filler node with an arbitrary time to account for the
# change in total duration
time = datetime.now()
node = LogNode(time, fill)
node.next_sibling = LogNode(time + timedelta(seconds=fill_amount), '')
ret.append(node)
return ret

View File

@ -1,9 +1,9 @@
"use strict";
function calculateChildrenTime(i) {
var dur = 0;
if (typeof i["duration"] !== "undefined") {
if (typeof i.duration !== "undefined") {
dur = i.duration;
}
else {
@ -17,12 +17,12 @@
function displayFailingTests(d) {
document.getElementById("failure-table-div").innerHTML="";
tbl = document.createElement('table');
var tbl = document.createElement('table');
tbl.setAttribute("id","failure-table");
tbl.setAttribute("class","table table-bordered table-hover table-striped");
function findFailingTests(i,result) {
if (i["status"] == "fail") {
if (i.status == "fail") {
result.push(i);
}
else {
@ -73,58 +73,59 @@
.value(function(d) { return d.duration; });
var arc = d3.svg.arc()
.startAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x))); })
.startAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x))); })
.endAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x + d.dx))); })
.innerRadius(function(d) { return Math.max(0, y(d.y)); })
.outerRadius(function(d) { return Math.max(0, y(d.y + d.dy)); });
d3.json("/tempest/api/tree/" + run_id + "/", function(error, root) {
if (error) throw error;
if (error) throw error;
displayFailingTests(root);
var path = svg.selectAll("path")
.data(partition.nodes(root))
.enter().append("path")
.attr("d", arc)
.style("fill", function(d) { return color(d.name); })
.on("click", click);
displayFailingTests(root);
function click(d) {
path.transition()
.duration(750)
.attrTween("d", arcTween(d));
var path = svg.selectAll("path")
.data(partition.nodes(root))
.enter().append("path")
.attr("d", arc)
.style("fill", function(d) { return color(d.name); })
.on("click", click);
oldtbl = document.getElementById("result-table-div");
oldtbl.innerHTML = "";
tbl = document.createElement('table');
tbl.setAttribute("id","test-table");
tbl.setAttribute("class","table table-bordered table-hover table-striped");
if (typeof d.children == "undefined") {
for (var key in d) {
function click(d) {
path.transition()
.duration(750)
.attrTween("d", arcTween(d));
var oldtbl = document.getElementById("result-table-div");
oldtbl.innerHTML = "";
var tbl = document.createElement('table');
tbl.setAttribute("id","test-table");
tbl.setAttribute("class","table table-bordered table-hover table-striped");
if (typeof d.children == "undefined") {
for (var key in d) {
var row = tbl.insertRow();
var td1 = row.insertCell();
var td2 = row.insertCell();
td1.innerHTML = key;
td2.innerHTML = d[key];
}
document.getElementById("result-table-div").appendChild(tbl);
document.getElementById("table-heading").innerHTML=d.name;
}
else {
for (var j in d.children) {
var row = tbl.insertRow();
var td1 = row.insertCell();
var td2 = row.insertCell();
td1.innerHTML = key;
td2.innerHTML = d[key];
td1.innerHTML = d.children[j].name;
td2.innerHTML = calculateChildrenTime(d.children[j]).toFixed(2);
td1.style.color = color(d.children[j].name);
document.getElementById("result-table-div").appendChild(tbl);
document.getElementById("table-heading").innerHTML=d.name +
": " + calculateChildrenTime(d).toFixed(2) + " seconds"
$( "table-test" ).DataTable();
}
}
document.getElementById("result-table-div").appendChild(tbl);
document.getElementById("table-heading").innerHTML=d.name;
}
else {
for (var j in d.children) {
var row = tbl.insertRow();
var td1 = row.insertCell();
var td2 = row.insertCell();
td1.innerHTML = d.children[j].name;
td2.innerHTML = calculateChildrenTime(d.children[j]).toFixed(2);
td1.style.color = color(d.children[j].name);
document.getElementById("result-table-div").appendChild(tbl);
document.getElementById("table-heading").innerHTML=d.name +
": " + calculateChildrenTime(d).toFixed(2) + " seconds"
$( "table-test" ).DataTable();
}
}
}
}
});
@ -136,7 +137,7 @@
yd = d3.interpolate(y.domain(), [d.y, 1]),
yr = d3.interpolate(y.range(), [d.y ? 20 : 0, radius]);
return function(d, i) {
return i
return i
? function(t) { return arc(d); }
: function(t) { x.domain(xd(t)); y.domain(yd(t)).range(yr(t)); return arc(d); };
};