Always merge sort input files with heapq.merge
heapq.merge does an extremely efficient merge sort. Being a merge sort it has constant memory overhead, and is very fast. It also starts producing output immediately. It obsoletes both previous sorting methods, so we deprecate the --min-memory option. Change-Id: I8384b7214ba54dffa61d1c2195f3b4c238ba253a
This commit is contained in:
parent
5b9c82746b
commit
f2be96fe6a
19
README.rst
19
README.rst
|
@ -28,19 +28,6 @@ Limitations
|
||||||
This tool is not able to properly (or meaningfully) merge logs if your servers
|
This tool is not able to properly (or meaningfully) merge logs if your servers
|
||||||
are not time synced to a common time source.
|
are not time synced to a common time source.
|
||||||
|
|
||||||
By default os-log-merger uses a memory hogging implementation because it
|
|
||||||
provides a considerable time reduction to complete the merging. This
|
|
||||||
implementation loads all file contents in memory and then sorts and then
|
|
||||||
proceeds to output merged result.
|
|
||||||
|
|
||||||
For operation on memory constrained systems and with log files of considerable
|
|
||||||
sizes os-log-merger can operate on a memory conservative mode where log entries
|
|
||||||
will be read from files one by one and sorted as they come.
|
|
||||||
|
|
||||||
This memory reduction has an impact on processing speed, and will increase the
|
|
||||||
time to process the files by 25%.
|
|
||||||
|
|
||||||
|
|
||||||
How to install
|
How to install
|
||||||
~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~
|
||||||
pip install os-log-merger
|
pip install os-log-merger
|
||||||
|
@ -72,12 +59,6 @@ The previous example would produce something like this::
|
||||||
References to http url files instead of local files is also supported. Files
|
References to http url files instead of local files is also supported. Files
|
||||||
will be cached locally to avoid re-downloading on next runs.
|
will be cached locally to avoid re-downloading on next runs.
|
||||||
|
|
||||||
Limit memory usage
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
We can disabled default speed optimized operation for those case were we want
|
|
||||||
to favor a small memory footprint by using option `-m` (`--min-memory`).
|
|
||||||
|
|
||||||
Common Base
|
Common Base
|
||||||
~~~~~~~~~~~
|
~~~~~~~~~~~
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import print_function
|
||||||
import argparse
|
import argparse
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import heapq
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
@ -320,41 +321,6 @@ class TSLogEntry(LogEntry):
|
||||||
return self.start_date + timedelta(seconds=timestamp)
|
return self.start_date + timedelta(seconds=timestamp)
|
||||||
|
|
||||||
|
|
||||||
def process_logs_limit_memory_usage(logs):
|
|
||||||
oslogs = [iter(log) for log in logs]
|
|
||||||
|
|
||||||
def process_entry(entry_iterable):
|
|
||||||
try:
|
|
||||||
next(entry_iterable)
|
|
||||||
except StopIteration:
|
|
||||||
# There are no more entries in the iterable, we can remove it
|
|
||||||
# from the list to process
|
|
||||||
oslogs.remove(entry_iterable)
|
|
||||||
|
|
||||||
for log in oslogs:
|
|
||||||
process_entry(log)
|
|
||||||
|
|
||||||
while oslogs:
|
|
||||||
entry_iterable = min(oslogs)
|
|
||||||
result = entry_iterable.peek()
|
|
||||||
if result is None:
|
|
||||||
break
|
|
||||||
yield result
|
|
||||||
process_entry(entry_iterable)
|
|
||||||
|
|
||||||
|
|
||||||
def process_logs_memory_hog(logs):
|
|
||||||
all_entries = []
|
|
||||||
# read all the logs
|
|
||||||
for log in logs:
|
|
||||||
for entry in log:
|
|
||||||
all_entries.append(entry)
|
|
||||||
|
|
||||||
sorted_entries = sorted(all_entries)
|
|
||||||
for entry in sorted_entries:
|
|
||||||
yield entry
|
|
||||||
|
|
||||||
|
|
||||||
LOG_TYPES = [
|
LOG_TYPES = [
|
||||||
('logfiles', OSLogEntry),
|
('logfiles', OSLogEntry),
|
||||||
('logfiles_m', MsgLogEntry),
|
('logfiles_m', MsgLogEntry),
|
||||||
|
@ -375,12 +341,8 @@ def process_logs(cfg):
|
||||||
|
|
||||||
alias = generate_aliases(filename_alias, cfg)
|
alias = generate_aliases(filename_alias, cfg)
|
||||||
|
|
||||||
if cfg.limit_memory:
|
entry_iters = [iter(log) for log in logs]
|
||||||
method = process_logs_limit_memory_usage
|
for entry in heapq.merge(*entry_iters):
|
||||||
else:
|
|
||||||
method = process_logs_memory_hog
|
|
||||||
|
|
||||||
for entry in method(logs):
|
|
||||||
print('%s [%s] %s' % (entry.date_str, alias[entry.filename],
|
print('%s [%s] %s' % (entry.date_str, alias[entry.filename],
|
||||||
entry.data.rstrip('\n')))
|
entry.data.rstrip('\n')))
|
||||||
|
|
||||||
|
@ -633,7 +595,7 @@ one has not been provided:'
|
||||||
help='Level of smart alias naming (0-3)')
|
help='Level of smart alias naming (0-3)')
|
||||||
parser.add_argument('--min-memory', '-m', default=False,
|
parser.add_argument('--min-memory', '-m', default=False,
|
||||||
action='store_true', dest='limit_memory',
|
action='store_true', dest='limit_memory',
|
||||||
help='Limit memory usage')
|
help='This option is deprecated and has no effect')
|
||||||
parser.add_argument('--msg-logs', '-ml', default=[], nargs='+',
|
parser.add_argument('--msg-logs', '-ml', default=[], nargs='+',
|
||||||
dest='logfiles_m', metavar='file[:ALIAS]',
|
dest='logfiles_m', metavar='file[:ALIAS]',
|
||||||
help='Message log files with format: Oct 15 14:11:19')
|
help='Message log files with format: Oct 15 14:11:19')
|
||||||
|
|
Loading…
Reference in New Issue