Browse Source

Always merge sort input files with heapq.merge

heapq.merge does an extremely efficient merge sort. Being a merge sort
it has constant memory overhead, and is very fast. It also starts
producing output immediately. It obsoletes both previous sorting
methods, so we deprecate the --min-memory option.

Change-Id: I8384b7214ba54dffa61d1c2195f3b4c238ba253a
Matthew Booth 1 year ago
parent
commit
f2be96fe6a
2 changed files with 4 additions and 61 deletions
  1. 0
    19
      README.rst
  2. 4
    42
      oslogmerger/oslogmerger.py

+ 0
- 19
README.rst View File

@@ -28,19 +28,6 @@ Limitations
28 28
 This tool is not able to properly (or meaningfully) merge logs if your servers
29 29
 are not time synced to a common time source.
30 30
 
31
-By default os-log-merger uses a memory hogging implementation because it
32
-provides a considerable time reduction to complete the merging.  This
33
-implementation loads all file contents in memory and then sorts and then
34
-proceeds to output merged result.
35
-
36
-For operation on memory constrained systems and with log files of considerable
37
-sizes os-log-merger can operate on a memory conservative mode where log entries
38
-will be read from files one by one and sorted as they come.
39
-
40
-This memory reduction has an impact on processing speed, and will increase the
41
-time to process the files by 25%.
42
-
43
-
44 31
 How to install
45 32
 ~~~~~~~~~~~~~~
46 33
 pip install os-log-merger
@@ -72,12 +59,6 @@ The previous example would produce something like this::
72 59
 References to http url files instead of local files is also supported. Files
73 60
 will be cached locally to avoid re-downloading on next runs.
74 61
 
75
-Limit memory usage
76
-~~~~~~~~~~~~~~~~~~
77
-
78
-We can disabled default speed optimized operation for those case were we want
79
-to favor a small memory footprint by using option `-m` (`--min-memory`).
80
-
81 62
 Common Base
82 63
 ~~~~~~~~~~~
83 64
 

+ 4
- 42
oslogmerger/oslogmerger.py View File

@@ -2,6 +2,7 @@ from __future__ import print_function
2 2
 import argparse
3 3
 from datetime import datetime, timedelta
4 4
 import hashlib
5
+import heapq
5 6
 import os
6 7
 import sys
7 8
 import tempfile
@@ -320,41 +321,6 @@ class TSLogEntry(LogEntry):
320 321
         return self.start_date + timedelta(seconds=timestamp)
321 322
 
322 323
 
323
-def process_logs_limit_memory_usage(logs):
324
-    oslogs = [iter(log) for log in logs]
325
-
326
-    def process_entry(entry_iterable):
327
-        try:
328
-            next(entry_iterable)
329
-        except StopIteration:
330
-            # There are no more entries in the iterable, we can remove it
331
-            # from the list to process
332
-            oslogs.remove(entry_iterable)
333
-
334
-    for log in oslogs:
335
-        process_entry(log)
336
-
337
-    while oslogs:
338
-        entry_iterable = min(oslogs)
339
-        result = entry_iterable.peek()
340
-        if result is None:
341
-            break
342
-        yield result
343
-        process_entry(entry_iterable)
344
-
345
-
346
-def process_logs_memory_hog(logs):
347
-    all_entries = []
348
-    # read all the logs
349
-    for log in logs:
350
-        for entry in log:
351
-            all_entries.append(entry)
352
-
353
-    sorted_entries = sorted(all_entries)
354
-    for entry in sorted_entries:
355
-        yield entry
356
-
357
-
358 324
 LOG_TYPES = [
359 325
     ('logfiles', OSLogEntry),
360 326
     ('logfiles_m', MsgLogEntry),
@@ -375,12 +341,8 @@ def process_logs(cfg):
375 341
 
376 342
     alias = generate_aliases(filename_alias, cfg)
377 343
 
378
-    if cfg.limit_memory:
379
-        method = process_logs_limit_memory_usage
380
-    else:
381
-        method = process_logs_memory_hog
382
-
383
-    for entry in method(logs):
344
+    entry_iters = [iter(log) for log in logs]
345
+    for entry in heapq.merge(*entry_iters):
384 346
         print('%s [%s] %s' % (entry.date_str, alias[entry.filename],
385 347
               entry.data.rstrip('\n')))
386 348
 
@@ -633,7 +595,7 @@ one has not been provided:'
633 595
                         help='Level of smart alias naming (0-3)')
634 596
     parser.add_argument('--min-memory', '-m', default=False,
635 597
                         action='store_true', dest='limit_memory',
636
-                        help='Limit memory usage')
598
+                        help='This option is deprecated and has no effect')
637 599
     parser.add_argument('--msg-logs', '-ml', default=[], nargs='+',
638 600
                         dest='logfiles_m', metavar='file[:ALIAS]',
639 601
                         help='Message log files with format: Oct 15 14:11:19')

Loading…
Cancel
Save