devstack_parser and log_node to parser folder

2015-07-22 08:23:28 -06:00 · 2015-07-22 08:23:28 -06:00 · 2c7e6d6d90
parent 56b8e829c8
commit 2c7e6d6d90
3 changed files with 464 additions and 45 deletions
--- a/stackviz/parser/devstack_parser.py
+++ b/stackviz/parser/devstack_parser.py
@ -0,0 +1,233 @@
+#
+# A parser for logs as generated by DevStack's `stack.sh` script.
+# Logs can be generated by running:
+#     LOGFILE="output.log" ./stack.sh
+#
+# Two files will be generated:
+#  - test.log.(datestamp): referred to as "log"
+#  - test.log.(datestamp).summary.(datestamp): referred to as "summary"
+#
+# The full log can be parsed using `parse_log(path)` and the summary can be
+# parsed using `parse_summary(path)`. Both functions will return a list of
+# `LogNode` instances which can be combined and categorized using `merge()`.
+#
+# For testing purposes, it's recommended to use an IPython shell. Import this
+# module, and call `bootstrap()` against the path to the primary logfile to get
+# the fully merged list of `LogNode` instances. Nodes can be analyzed directly
+# using the IPython `_repr_pretty_` functionality.
+#
+
+import os
+
+from datetime import datetime, timedelta
+from log_node import LogNode
+
+#: The format of the timestamp prefixing each log entry
+TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S.%f'
+
+
+def extract_date(line):
+    """
+    Extracts a date from the given line, returning the parsed date and
+    remaining contents of the line.
+
+    :param line: the line to extract a date from
+    :return: a tuple of the parsed date and remaining line contents
+    """
+    date_str, message = line.split(' | ', 1)
+    date = datetime.strptime(date_str, TIMESTAMP_FORMAT)
+
+    return date, message.strip()
+
+
+def parse_summary(summary_path):
+    """
+    Parses a summary logfile. Summary entries are prefixed with identical
+    datestamps to those in the main log, but have only explicit log messages
+    denoting the overall execution progress.
+
+    While summary entries are also printed into the main log, the explicit
+    summary file is used to simplify parsing.
+
+    :param summary_path: the path to the summary file to parse
+    :return: a list of ordered `LogNode` instances
+    """
+    ret = []
+
+    last_node = None
+
+    with open(summary_path, 'r') as f:
+        for line in f:
+            date, message = extract_date(line)
+
+            node = LogNode(date, message)
+            if last_node:
+                last_node.next_sibling = node
+
+            ret.append(node)
+            last_node = node
+
+    return ret
+
+
+def parse_log(log_path):
+    """
+    Parses a general `stack.sh` logfile, forming a full log tree based on the
+    hierarchy of nested commands as presented in the log.
+
+    Note that command output (that is, lines not prefixed with one or more '+'
+    symbols) is ignored and will not be included it the returned list of log
+    nodes.
+
+    :param log_path: the path to the logfile to parse
+    :return: a list of parsed `LogNode` instances
+    """
+    last_depth = 1
+    last_node = None
+
+    # fake node to act as root stack entry
+    ret = LogNode(None, None)
+
+    node_stack = [ret]
+
+    with open(log_path, 'r') as f:
+        for line in f:
+            date, message = extract_date(line)
+
+            if not message.startswith('+'):
+                # ignore command output - we only want actual commands
+                continue
+
+            depth = 0
+            for char in message:
+                if char == '+':
+                    depth += 1
+                else:
+                    break
+
+            if depth - last_depth > 1:
+                # skip discontinuous lines (???)
+                continue
+
+            # adjust the stack if the depth has changed since the last node
+            if depth < last_depth:
+                node_stack.pop()
+            elif depth > last_depth:
+                node_stack.append(last_node)
+
+            node = LogNode(date, message.lstrip('+ '))
+
+            parent = node_stack[-1]
+            parent.children.append(node)
+
+            if last_node:
+                last_node.next_sibling = node
+
+            last_depth = depth
+            last_node = node
+
+    return ret.children
+
+
+def merge(summary, log):
+    """
+    Merges general log entries into parent categories based on their timestamp
+    relative to the summary output timestamp.
+
+    Note that this function is destructive and will directly modify the nodes
+    within the `summary` list.
+
+    :type summary: list[LogNode]
+    :param summary: the list of summary nodes
+    :type log: list[LogNode]
+    :param log: the list of general log nodes
+    :return: the original summary nodes with children set to the log nodes
+    """
+    if not summary:
+        return []
+
+    current_node = summary[0]
+    next_node = current_node.next_sibling
+
+    for i, entry in enumerate(log):
+        if entry.date < current_node.date:
+            # skip forward until a possible summary node is reached
+            continue
+
+        if entry.date < next_node.date:
+            current_node.children.append(entry)
+        else:
+            next_node.children.append(entry)
+
+            current_node = next_node
+            next_node = current_node.next_sibling
+
+    return summary
+
+
+def bootstrap(log_path, summary_path=None):
+    """
+    Loads, parses, and merges the given log and summary files. The path to the
+    summary file will be determined automatically based on the path to the
+    general logfile, but it must exist within the same directory.
+
+    If the log file names are changed from their default values, a summary path
+    can be explicitly provided using the optional `summary_path` parameter.
+
+    :param log_path: the path to the logfile to parse
+    :param summary_path: optional; bypasses autodetection of path names
+    :return: a list of merged `LogNode` instances, or `None` if no matching
+             summary file can be located automatically
+    """
+    if summary_path:
+        return merge(parse_summary(summary_path), parse_log(log_path))
+
+    name = os.path.basename(log_path)
+    directory = os.path.dirname(os.path.abspath(log_path))
+
+    for candidate in os.listdir(directory):
+        if candidate.startswith('{0}.summary.'.format(name)):
+            return merge(parse_summary(os.path.join(directory, candidate)),
+                         parse_log(os.path.join(directory, name)))
+
+    return None
+
+
+def get_command_totals(node, totals=None):
+    if totals is None:
+        totals = {}
+
+    for entry in node.traverse():
+        # attempt to resolve a nice looking command, ignoring any obvious
+        # arguments to make subcommands float to left of array
+        tokens = filter(lambda tok: not tok.startswith('-'),
+                        entry.message.split())
+        if not tokens:
+            continue
+
+        # strip sudo and any variable declarations
+        if tokens[0] == 'sudo':
+            tokens.pop(0)
+
+            while '=' in tokens[0]:
+                tokens.pop(0)
+
+        if '/' in tokens[0]:
+            tokens[0] = tokens[0].split('/')[-1]
+
+        # select # of tokens to include based on the first token
+        token_count = {
+            'openstack': 2,
+            'read': 2,
+            'python': 2
+        }.get(tokens[0], 1)
+
+        # combine token_count tokens to generate the command to group by
+        combined = ' '.join(tokens[0:token_count])
+        if combined not in totals:
+            totals[combined] = timedelta()
+
+        totals[combined] += entry.duration_self
+
+    return totals
+
--- a/stackviz/parser/log_node.py
+++ b/stackviz/parser/log_node.py
@ -0,0 +1,185 @@
+from inspect import getmembers
+from numbers import Number
+from datetime import datetime, timedelta
+
+#: The default cutoff for log entries when pruning takes place, in seconds
+DEFAULT_PRUNE_CUTOFF = 0.05
+
+
+class LogNode:
+    """
+    Represents an entry in an ordered event log, consisting of a date, message,
+    and an arbitrary set of child nodes.
+
+    Note that entries are assumed to be strictly sequential and linear, and all
+    nodes must have a correctly-set `next_sibling` pointing to the next
+    chronological log entry, regardless of child depth.
+
+    This class implements a custom IPython repr useful for interactive use.
+    """
+
+    def __init__(self, date, message):
+        self.date = date
+        self.message = message
+        self.next_sibling = None
+
+        self.children = []
+
+    @property
+    def duration(self):
+        """
+        Determines the overall duration for this node, beginning at this parent
+        node's start time through the final child's ending time.
+        """
+        if self.children:
+            last_sibling = self.children[-1].next_sibling
+            if not last_sibling:
+                # if no last sibling exists, use the last child itself to
+                # keep as close as possible to the true value
+                last_sibling = self.children[-1]
+
+            diff = last_sibling.date - self.date
+            return diff + last_sibling.duration
+        else:
+            # attempt get self execution time
+            if self.next_sibling:
+                return self.next_sibling.date - self.date
+            else:
+                return timedelta()
+
+    @property
+    def duration_self(self):
+        if not self.next_sibling:
+            return timedelta()
+
+        return self.next_sibling.date - self.date
+
+    def traverse(self):
+        """
+        A generator that will traverse all child nodes of this log tree
+        sequentially.
+        """
+
+        for child in self.children:
+            yield child
+
+            for subchild in child.traverse():
+                yield subchild
+
+    def __repr__(self):
+        return "%s(%s, '%s', duration=%d)" % (
+            self.__class__.__name__,
+            self.date.strftime('%H:%M:%S.%f'),
+            self.message,
+            self.duration.total_seconds()
+        )
+
+    def _repr_pretty_(self, p, cycle):
+        tc = __import__('IPython').utils.coloransi.TermColors()
+
+        c = self.__class__.__name__
+        if cycle:
+            p.text('%s(...)' % c)
+            return
+
+        with p.group(4, '%s%s%s(' % (tc.Green, c, tc.Normal), ')'):
+            i = 0
+            p.breakable()
+            for field_name, value in reversed(getmembers(self)):
+                if field_name.startswith('_'):
+                    continue
+
+                if field_name in ('next_sibling', 'traverse'):
+                    continue
+
+                if i:
+                    p.text(',')
+                    p.breakable()
+
+                p.text('%s%s %s=%s ' % (
+                    tc.Brown, field_name,
+                    tc.DarkGray, tc.Normal))
+
+                if isinstance(value, list):
+                    lp = (tc.Cyan, tc.Normal)
+                    with p.group(4, '%s[%s' % lp, '%s]%s' % lp):
+                        l_first = True
+
+                        count = 0
+                        for x in value:
+                            if not l_first:
+                                p.text(', ')
+
+                            p.breakable()
+                            p.pretty(x)
+
+                            l_first = False
+
+                            count += 1
+
+                            if count > 10:
+                                p.breakable()
+                                p.text('%s... %d more ...%s' % (
+                                    tc.LightGreen,
+                                    len(value) - count,
+                                    tc.Normal
+                                ))
+
+                                break
+                else:
+                    if value is None:
+                        p.text(tc.Blue)
+                    elif isinstance(value, basestring):
+                        p.text(tc.Red)
+                    elif isinstance(value, Number):
+                        p.text(tc.DarkGray)
+
+                    p.pretty(value)
+                    p.text(tc.Normal)
+
+                i += 1
+
+
+def prune(nodes, cutoff=DEFAULT_PRUNE_CUTOFF, fill=None):
+    """
+    Prunes the given list of `LogNode` instances, removing nodes whose duration
+    is less than the given cutoff value. If a `fill` value is provided, removed
+    nodes will be replaced with a single filler value accounting for the lost
+    duration. This filler value will be inserted at the end of the list and will
+    not be properly linked to other values.
+
+    Note that returned values will not necessarily be a continuous list of
+    nodes. The original list will remain unchanged; sibling and child references
+    will not be modified to point to account any modified, removed, or added
+    nodes.
+
+    :param nodes: the list of log nodes to prune
+    :type nodes: list[LogNode]
+    :param fill: if set, replace removed nodes with a single larger node
+    :type fill: str | None
+    :param cutoff: the minimum duration for nodes that will be retained in the
+                   tree
+    :type cutoff: float
+    :return: a (potentially) reduced list of nodes
+    """
+    ret = []
+
+    fill_amount = 0.0
+
+    for entry in nodes:
+        d = entry.duration.total_seconds()
+        if d >= cutoff:
+            ret.append(entry)
+        else:
+            fill_amount += d
+
+    if fill:
+        # create a dummy filler node with an arbitrary time to account for the
+        # change in total duration
+        time = datetime.now()
+
+        node = LogNode(time, fill)
+        node.next_sibling = LogNode(time + timedelta(seconds=fill_amount), '')
+        ret.append(node)
+
+    return ret
--- a/stackviz/static/js/visuals.js
+++ b/stackviz/static/js/visuals.js
@ -1,9 +1,9 @@

-
+"use strict";

        function calculateChildrenTime(i) {
             var dur = 0;
-             if (typeof i["duration"] !== "undefined") {
+             if (typeof i.duration !== "undefined") {
                 dur = i.duration;
             }
             else {
@ -17,12 +17,12 @@
        function displayFailingTests(d) {

            document.getElementById("failure-table-div").innerHTML="";
-            tbl = document.createElement('table');
+            var tbl = document.createElement('table');
            tbl.setAttribute("id","failure-table");
            tbl.setAttribute("class","table table-bordered table-hover table-striped");

            function findFailingTests(i,result) {
-                    if (i["status"] == "fail") {
+                    if (i.status == "fail") {
                        result.push(i);
                    }
                    else {
@ -73,58 +73,59 @@
                .value(function(d) { return d.duration; });

            var arc = d3.svg.arc()
-                .startAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x))); })
+               .startAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x))); })
                .endAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x + d.dx))); })
                .innerRadius(function(d) { return Math.max(0, y(d.y)); })
                .outerRadius(function(d) { return Math.max(0, y(d.y + d.dy)); });

            d3.json("/tempest/api/tree/" + run_id + "/", function(error, root) {
-              if (error) throw error;
+                if (error) throw error;

-              displayFailingTests(root);
-              var path = svg.selectAll("path")
-                  .data(partition.nodes(root))
-                .enter().append("path")
-                  .attr("d", arc)
-                  .style("fill", function(d) { return color(d.name); })
-                  .on("click", click);
+                displayFailingTests(root);

-              function click(d) {
-                path.transition()
-                  .duration(750)
-                  .attrTween("d", arcTween(d));
+                var path = svg.selectAll("path")
+                    .data(partition.nodes(root))
+                  .enter().append("path")
+                    .attr("d", arc)
+                    .style("fill", function(d) { return color(d.name); })
+                    .on("click", click);

-                oldtbl = document.getElementById("result-table-div");
-                oldtbl.innerHTML = "";
-                tbl = document.createElement('table');
-                tbl.setAttribute("id","test-table");
-                tbl.setAttribute("class","table table-bordered table-hover table-striped");
-                if (typeof d.children == "undefined") {
-                    for (var key in d) {
+                function click(d) {
+                    path.transition()
+                      .duration(750)
+                      .attrTween("d", arcTween(d));
+
+                    var oldtbl = document.getElementById("result-table-div");
+                    oldtbl.innerHTML = "";
+                    var tbl = document.createElement('table');
+                    tbl.setAttribute("id","test-table");
+                    tbl.setAttribute("class","table table-bordered table-hover table-striped");
+                    if (typeof d.children == "undefined") {
+                        for (var key in d) {
+                                var row = tbl.insertRow();
+                                var td1 = row.insertCell();
+                                var td2 = row.insertCell();
+                                td1.innerHTML = key;
+                                td2.innerHTML = d[key];
+                        }
+                        document.getElementById("result-table-div").appendChild(tbl);
+                        document.getElementById("table-heading").innerHTML=d.name;
+                    }
+                    else {
+                        for (var j in d.children) {
                            var row = tbl.insertRow();
                            var td1 = row.insertCell();
                            var td2 = row.insertCell();
-                            td1.innerHTML = key;
-                            td2.innerHTML = d[key];
+                            td1.innerHTML = d.children[j].name;
+                            td2.innerHTML = calculateChildrenTime(d.children[j]).toFixed(2);
+                            td1.style.color = color(d.children[j].name);
+                            document.getElementById("result-table-div").appendChild(tbl);
+                            document.getElementById("table-heading").innerHTML=d.name +
+                                ": " + calculateChildrenTime(d).toFixed(2) + " seconds"
+                            $( "table-test" ).DataTable();
+                        }
                    }
-                    document.getElementById("result-table-div").appendChild(tbl);
-                    document.getElementById("table-heading").innerHTML=d.name;
-                }
-                else {
-                    for (var j in d.children) {
-                        var row = tbl.insertRow();
-                        var td1 = row.insertCell();
-                        var td2 = row.insertCell();
-                        td1.innerHTML = d.children[j].name;
-                        td2.innerHTML = calculateChildrenTime(d.children[j]).toFixed(2);
-                        td1.style.color = color(d.children[j].name);
-                        document.getElementById("result-table-div").appendChild(tbl);
-                        document.getElementById("table-heading").innerHTML=d.name +
-                            ": " + calculateChildrenTime(d).toFixed(2) + " seconds"
-                        $( "table-test" ).DataTable();
-                    }
-                }
-              }
+                  }

            });

@ -136,7 +137,7 @@
                  yd = d3.interpolate(y.domain(), [d.y, 1]),
                  yr = d3.interpolate(y.range(), [d.y ? 20 : 0, radius]);
              return function(d, i) {
-                return i
+                  return i
                    ? function(t) { return arc(d); }
                    : function(t) { x.domain(xd(t)); y.domain(yd(t)).range(yr(t)); return arc(d); };
              };