
This patch provides a mechanism for splitting server log files from nodes into chunks that correspond to time periods (by default, roughly from the begin of every script start to the begin of the next script start). In order to minimize the resources used for this (compute and storage), the log_point function uses ls(1) to record the size of interesting log files. These ls-snapshots are retrieved at the end of a repeat-test run along with the long files. A new tool, log_snapshot_split, uses that information to create a directory for each snapshot. For all observed log files, each directory contain the files and lines written since the previous ls-snapshot. The whole tool chain can be called manually but is automatically used by repeat-test.sh. Change-Id: I809e485844b343bc1a39302396ec0f68a6801371
225 lines
6.7 KiB
Python
Executable File
225 lines
6.7 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""
|
|
|
|
This script splits out log file portions based on full log files and
|
|
"ls -l" snapshots that document their size at various points in time.
|
|
|
|
"""
|
|
|
|
# Force Python 2 to use float division even for ints
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import errno
|
|
import mmap
|
|
import os
|
|
import sys
|
|
|
|
from glob import glob
|
|
|
|
# Extension of directory snapshot files
|
|
SNAP_EXT = "lsl"
|
|
|
|
|
|
def get_destdir(destdir, name):
|
|
"""Return destination directory for results of current snapshot."""
|
|
|
|
# Remove extension from name
|
|
basename = os.path.splitext(name)[0]
|
|
|
|
dir_path = os.path.join(destdir, basename)
|
|
|
|
# Sanity check: we don't want to overwrite exisiting results.
|
|
if os.path.exists(dir_path):
|
|
print("ERROR: destination directory already exists:", dir_path)
|
|
sys.exit(1)
|
|
|
|
return dir_path
|
|
|
|
|
|
def get_file_slice(path, old_size, size):
|
|
"""Get the content from first to second byte counter of given file."""
|
|
with open(path, "r+b") as fin:
|
|
try:
|
|
mmp = mmap.mmap(fin.fileno(), 0)
|
|
except ValueError as err:
|
|
if os.path.getsize(path) == 0:
|
|
# Log file is empty, nothing to mmap or read
|
|
return None
|
|
else:
|
|
# Some other error, pass it on
|
|
raise err
|
|
else:
|
|
mmp.seek(old_size)
|
|
return mmp.read(size - old_size)
|
|
|
|
|
|
def create_parent_dirs_for(file_path):
|
|
"""Create parent directories for given file or directory."""
|
|
dir_path = os.path.dirname(file_path)
|
|
try:
|
|
os.makedirs(dir_path)
|
|
except OSError as err:
|
|
if err.errno == errno.EEXIST and os.path.isdir(dir_path):
|
|
pass
|
|
else:
|
|
raise
|
|
|
|
|
|
def get_size_and_path(line):
|
|
"""From a 'ls -l' line, return columns 4 (size) and 8 (path)."""
|
|
cols = line.split()
|
|
size, path = (int(cols[4]), cols[8])
|
|
return size, path
|
|
|
|
|
|
def get_ls_snap_files_from_dir(snapdir):
|
|
"""Get paths of all 'ls -l' snapshot files in given directory."""
|
|
# Return them sorted so they get processed in order
|
|
return sorted(glob(os.path.join(snapdir, "*." + SNAP_EXT)))
|
|
|
|
|
|
def get_ls_snap_files_from_path(snap_paths, verbose):
|
|
"""Get paths of all 'ls -l' snapshot files."""
|
|
ls_snap_files = []
|
|
|
|
for path in snap_paths:
|
|
if os.path.isdir(path):
|
|
if len(snap_paths) > 1:
|
|
print("ERROR LS_SNAP_PATH contains a directory and additional "
|
|
"paths. Aborting.")
|
|
sys.exit(1)
|
|
ls_snap_files = get_ls_snap_files_from_dir(path)
|
|
break
|
|
elif os.path.isfile(path):
|
|
ls_snap_files.append(path)
|
|
else:
|
|
print("Bad argument: ", path)
|
|
sys.exit(1)
|
|
|
|
if verbose:
|
|
print("LS_SNAP_PATH ", snap_paths)
|
|
print("'ls -l' snapshot files ", ls_snap_files)
|
|
|
|
return ls_snap_files
|
|
|
|
|
|
def get_log_and_result_dirs(args):
|
|
"""Return paths to log and result directories."""
|
|
if os.path.isdir(args.ls_snap_path[0]):
|
|
snapdir = args.ls_snap_path[0]
|
|
else:
|
|
snapdir = os.path.dirname(args.ls_snap_path[0])
|
|
|
|
if args.logdir is None:
|
|
logdir = snapdir
|
|
else:
|
|
logdir = args.logdir
|
|
|
|
if args.resultdir is None:
|
|
# If no destination directory is given, use a subdir in the snapdir
|
|
resultdir = os.path.join(snapdir, "split_logs")
|
|
else:
|
|
resultdir = args.resultdir
|
|
|
|
if args.verbose:
|
|
print("LOG_DIR ", logdir)
|
|
print("RESULTS_DIR", resultdir)
|
|
|
|
return logdir, resultdir
|
|
|
|
|
|
def write_results(result_path, new_lines):
|
|
"""Create one split out log file (and any parent directories)."""
|
|
# Create parent directories (if any) for current log file
|
|
create_parent_dirs_for(result_path)
|
|
|
|
with open(result_path, "w") as fout:
|
|
# If the log file was empty at this point, skip writing
|
|
# new_lines (Empty) to the split out file.
|
|
if new_lines:
|
|
fout.write(new_lines)
|
|
|
|
|
|
def indicate_new_snapshot(snap_name, verbose):
|
|
"""Indicate progress by printing name (verbose) or a dot."""
|
|
if verbose:
|
|
print(snap_name)
|
|
else:
|
|
# Print dots to indicate progress
|
|
print('.', end='')
|
|
sys.stdout.flush()
|
|
|
|
|
|
def process_snap_files(ls_snap_files, args):
|
|
"""Read snapshot files and create split out log files."""
|
|
|
|
logdir, resultdir = get_log_and_result_dirs(args)
|
|
|
|
# For each log file, number of bytes handled so far
|
|
log_size = dict()
|
|
|
|
for ls_snap_file in ls_snap_files:
|
|
|
|
snap_name = os.path.basename(ls_snap_file)
|
|
|
|
indicate_new_snapshot(snap_name, args.verbose)
|
|
|
|
with open(ls_snap_file, "r") as ls_snap_content:
|
|
|
|
# Create directory for results of this log snapshot
|
|
dest_subdir = get_destdir(resultdir, snap_name)
|
|
|
|
for ls_line in ls_snap_content:
|
|
new_size, log_rpath = get_size_and_path(ls_line)
|
|
result_path = os.path.join(dest_subdir, log_rpath)
|
|
|
|
if args.verbose:
|
|
print("\t", log_rpath)
|
|
|
|
if log_rpath not in log_size:
|
|
# New log file
|
|
log_size[log_rpath] = 0
|
|
elif log_size[log_rpath] == new_size:
|
|
# Log file did not change, skip
|
|
continue
|
|
|
|
src_log = os.path.join(logdir, log_rpath)
|
|
new_lines = get_file_slice(src_log, log_size[log_rpath],
|
|
new_size)
|
|
|
|
log_size[log_rpath] = new_size
|
|
|
|
write_results(result_path, new_lines)
|
|
|
|
if not args.verbose:
|
|
# New line after last period of progress indicator
|
|
print('')
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Split log files according to"
|
|
" 'ls -l' snapshots.")
|
|
parser.add_argument('ls_snap_path', metavar='LS_SNAP_PATH', nargs='+',
|
|
help="'ls -l' snapshot files or directory containing"
|
|
" them")
|
|
parser.add_argument('--logdir', metavar='LOG_DIR', nargs='?',
|
|
help="Root directory for log files (default: "
|
|
"LS_SNAP_PATH)")
|
|
parser.add_argument('--resultdir', metavar='RESULT_DIR', nargs='?',
|
|
help="Target directory for results (default: "
|
|
"LS_SNAP_PATH/split_logs)")
|
|
parser.add_argument('--verbose', action='store_true')
|
|
args = parser.parse_args()
|
|
|
|
ls_snap_files = get_ls_snap_files_from_path(args.ls_snap_path,
|
|
args.verbose)
|
|
|
|
process_snap_files(ls_snap_files, args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|