# Copyright 2015 Hewlett-Packard Development Company, L.P. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # # A parser for logs as generated by DevStack's `stack.sh` script. # Logs can be generated by running: # LOGFILE="output.log" ./stack.sh # # Two files will be generated: # - test.log.(datestamp): referred to as "log" # - test.log.(datestamp).summary.(datestamp): referred to as "summary" # # The full log can be parsed using `parse_log(path)` and the summary can be # parsed using `parse_summary(path)`. Both functions will return a list of # `LogNode` instances which can be combined and categorized using `merge()`. # # For testing purposes, it's recommended to use an IPython shell. Import this # module, and call `bootstrap()` against the path to the primary logfile to get # the fully merged list of `LogNode` instances. Nodes can be analyzed directly # using the IPython `_repr_pretty_` functionality. # import os from datetime import datetime from datetime import timedelta from log_node import LogNode #: The format of the timestamp prefixing each log entry TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S.%f' def extract_date(line): """Extracts a date from the given line Returns the parsed date and remaining contents of the line. :param line: the line to extract a date from :return: a tuple of the parsed date and remaining line contents """ date_str, message = line.split(' | ', 1) date = datetime.strptime(date_str, TIMESTAMP_FORMAT) return date, message.strip() def parse_summary(summary_path): """Parses a summary logfile Summary entries are prefixed with identical datestamps to those in the main log, but have only explicit log messages denoting the overall execution progress. While summary entries are also printed into the main log, the explicit summary file is used to simplify parsing. :param summary_path: the path to the summary file to parse :return: a list of ordered `LogNode` instances """ ret = [] last_node = None with open(summary_path, 'r') as f: for line in f: date, message = extract_date(line) node = LogNode(date, message) if last_node: last_node.next_sibling = node ret.append(node) last_node = node return ret def parse_log(log_path): """Parses a general `stack.sh` logfile, forming a full log tree The log tree is based on the hierarchy of nested commands as presented in the log. Note that command output (that is, lines not prefixed with one or more '+' symbols) is ignored and will not be included it the returned list of log nodes. :param log_path: the path to the logfile to parse :return: a list of parsed `LogNode` instances """ last_depth = 1 last_node = None # fake node to act as root stack entry ret = LogNode(None, None) node_stack = [ret] with open(log_path, 'r') as f: for line in f: date, message = extract_date(line) if not message.startswith('+'): # ignore command output - we only want actual commands continue depth = 0 for char in message: if char == '+': depth += 1 else: break if depth - last_depth > 1: # skip discontinuous lines (???) continue # adjust the stack if the depth has changed since the last node if depth < last_depth: node_stack.pop() elif depth > last_depth: node_stack.append(last_node) node = LogNode(date, message.lstrip('+ ')) parent = node_stack[-1] parent.children.append(node) if last_node: last_node.next_sibling = node last_depth = depth last_node = node return ret.children def merge(summary, log): """Merges log entries into parent categories based on timestamp. Merges general log entries into parent categories based on their timestamp relative to the summary output timestamp. Note that this function is destructive and will directly modify the nodes within the `summary` list. :type summary: list[LogNode] :param summary: the list of summary nodes :type log: list[LogNode] :param log: the list of general log nodes :return: the original summary nodes with children set to the log nodes """ if not summary: return [] current_node = summary[0] next_node = current_node.next_sibling for i, entry in enumerate(log): if entry.date < current_node.date: # skip forward until a possible summary node is reached continue if entry.date < next_node.date: current_node.children.append(entry) else: next_node.children.append(entry) current_node = next_node next_node = current_node.next_sibling return summary def bootstrap(log_path, summary_path=None): """Loads, parses, and merges the given log and summary files. The path to the summary file will be determined automatically based on the path to the general logfile, but it must exist within the same directory. If the log file names are changed from their default values, a summary path can be explicitly provided using the optional `summary_path` parameter. :param log_path: the path to the logfile to parse :param summary_path: optional; bypasses autodetection of path names :return: a list of merged `LogNode` instances, or `None` if no matching summary file can be located automatically """ if summary_path: return merge(parse_summary(summary_path), parse_log(log_path)) name = os.path.basename(log_path) directory = os.path.dirname(os.path.abspath(log_path)) for candidate in os.listdir(directory): if candidate.startswith('{0}.summary.'.format(name)): return merge(parse_summary(os.path.join(directory, candidate)), parse_log(os.path.join(directory, name))) return None def get_command_totals(node, totals=None): if totals is None: totals = {} for entry in node.traverse(): # attempt to resolve a nice looking command, ignoring any obvious # arguments to make subcommands float to left of array tokens = filter(lambda tok: not tok.startswith('-'), entry.message.split()) if not tokens: continue # strip sudo and any variable declarations if tokens[0] == 'sudo': tokens.pop(0) while '=' in tokens[0]: tokens.pop(0) if '/' in tokens[0]: tokens[0] = tokens[0].split('/')[-1] # select # of tokens to include based on the first token token_count = { 'openstack': 2, 'read': 2, 'python': 2 }.get(tokens[0], 1) # combine token_count tokens to generate the command to group by combined = ' '.join(tokens[0:token_count]) if combined not in totals: totals[combined] = timedelta() totals[combined] += entry.duration_self return totals