Files
training-labs/labs/osbash/tools/log_snapshot_split.py
Roger Luethi 5f94f7bd8a Split logs by installation phase
This patch provides a mechanism for splitting server log files from
nodes into chunks that correspond to time periods (by default, roughly
from the begin of every script start to the begin of the next script
start).

In order to minimize the resources used for this (compute and storage),
the log_point function uses ls(1) to record the size of interesting
log files. These ls-snapshots are retrieved at the end of a
repeat-test run along with the long files.

A new tool, log_snapshot_split, uses that information to create a
directory for each snapshot. For all observed log files, each directory
contain the files and lines written since the previous ls-snapshot.

The whole tool chain can be called manually but is automatically used
by repeat-test.sh.

Change-Id: I809e485844b343bc1a39302396ec0f68a6801371
2016-03-06 07:57:46 +01:00

225 lines
6.7 KiB
Python
Executable File

#!/usr/bin/env python
"""
This script splits out log file portions based on full log files and
"ls -l" snapshots that document their size at various points in time.
"""
# Force Python 2 to use float division even for ints
from __future__ import division
from __future__ import print_function
import argparse
import errno
import mmap
import os
import sys
from glob import glob
# Extension of directory snapshot files
SNAP_EXT = "lsl"
def get_destdir(destdir, name):
"""Return destination directory for results of current snapshot."""
# Remove extension from name
basename = os.path.splitext(name)[0]
dir_path = os.path.join(destdir, basename)
# Sanity check: we don't want to overwrite exisiting results.
if os.path.exists(dir_path):
print("ERROR: destination directory already exists:", dir_path)
sys.exit(1)
return dir_path
def get_file_slice(path, old_size, size):
"""Get the content from first to second byte counter of given file."""
with open(path, "r+b") as fin:
try:
mmp = mmap.mmap(fin.fileno(), 0)
except ValueError as err:
if os.path.getsize(path) == 0:
# Log file is empty, nothing to mmap or read
return None
else:
# Some other error, pass it on
raise err
else:
mmp.seek(old_size)
return mmp.read(size - old_size)
def create_parent_dirs_for(file_path):
"""Create parent directories for given file or directory."""
dir_path = os.path.dirname(file_path)
try:
os.makedirs(dir_path)
except OSError as err:
if err.errno == errno.EEXIST and os.path.isdir(dir_path):
pass
else:
raise
def get_size_and_path(line):
"""From a 'ls -l' line, return columns 4 (size) and 8 (path)."""
cols = line.split()
size, path = (int(cols[4]), cols[8])
return size, path
def get_ls_snap_files_from_dir(snapdir):
"""Get paths of all 'ls -l' snapshot files in given directory."""
# Return them sorted so they get processed in order
return sorted(glob(os.path.join(snapdir, "*." + SNAP_EXT)))
def get_ls_snap_files_from_path(snap_paths, verbose):
"""Get paths of all 'ls -l' snapshot files."""
ls_snap_files = []
for path in snap_paths:
if os.path.isdir(path):
if len(snap_paths) > 1:
print("ERROR LS_SNAP_PATH contains a directory and additional "
"paths. Aborting.")
sys.exit(1)
ls_snap_files = get_ls_snap_files_from_dir(path)
break
elif os.path.isfile(path):
ls_snap_files.append(path)
else:
print("Bad argument: ", path)
sys.exit(1)
if verbose:
print("LS_SNAP_PATH ", snap_paths)
print("'ls -l' snapshot files ", ls_snap_files)
return ls_snap_files
def get_log_and_result_dirs(args):
"""Return paths to log and result directories."""
if os.path.isdir(args.ls_snap_path[0]):
snapdir = args.ls_snap_path[0]
else:
snapdir = os.path.dirname(args.ls_snap_path[0])
if args.logdir is None:
logdir = snapdir
else:
logdir = args.logdir
if args.resultdir is None:
# If no destination directory is given, use a subdir in the snapdir
resultdir = os.path.join(snapdir, "split_logs")
else:
resultdir = args.resultdir
if args.verbose:
print("LOG_DIR ", logdir)
print("RESULTS_DIR", resultdir)
return logdir, resultdir
def write_results(result_path, new_lines):
"""Create one split out log file (and any parent directories)."""
# Create parent directories (if any) for current log file
create_parent_dirs_for(result_path)
with open(result_path, "w") as fout:
# If the log file was empty at this point, skip writing
# new_lines (Empty) to the split out file.
if new_lines:
fout.write(new_lines)
def indicate_new_snapshot(snap_name, verbose):
"""Indicate progress by printing name (verbose) or a dot."""
if verbose:
print(snap_name)
else:
# Print dots to indicate progress
print('.', end='')
sys.stdout.flush()
def process_snap_files(ls_snap_files, args):
"""Read snapshot files and create split out log files."""
logdir, resultdir = get_log_and_result_dirs(args)
# For each log file, number of bytes handled so far
log_size = dict()
for ls_snap_file in ls_snap_files:
snap_name = os.path.basename(ls_snap_file)
indicate_new_snapshot(snap_name, args.verbose)
with open(ls_snap_file, "r") as ls_snap_content:
# Create directory for results of this log snapshot
dest_subdir = get_destdir(resultdir, snap_name)
for ls_line in ls_snap_content:
new_size, log_rpath = get_size_and_path(ls_line)
result_path = os.path.join(dest_subdir, log_rpath)
if args.verbose:
print("\t", log_rpath)
if log_rpath not in log_size:
# New log file
log_size[log_rpath] = 0
elif log_size[log_rpath] == new_size:
# Log file did not change, skip
continue
src_log = os.path.join(logdir, log_rpath)
new_lines = get_file_slice(src_log, log_size[log_rpath],
new_size)
log_size[log_rpath] = new_size
write_results(result_path, new_lines)
if not args.verbose:
# New line after last period of progress indicator
print('')
def main():
parser = argparse.ArgumentParser(description="Split log files according to"
" 'ls -l' snapshots.")
parser.add_argument('ls_snap_path', metavar='LS_SNAP_PATH', nargs='+',
help="'ls -l' snapshot files or directory containing"
" them")
parser.add_argument('--logdir', metavar='LOG_DIR', nargs='?',
help="Root directory for log files (default: "
"LS_SNAP_PATH)")
parser.add_argument('--resultdir', metavar='RESULT_DIR', nargs='?',
help="Target directory for results (default: "
"LS_SNAP_PATH/split_logs)")
parser.add_argument('--verbose', action='store_true')
args = parser.parse_args()
ls_snap_files = get_ls_snap_files_from_path(args.ls_snap_path,
args.verbose)
process_snap_files(ls_snap_files, args)
if __name__ == "__main__":
sys.exit(main())