Browse Source

Make iter(LogFile) return an independent generator

This is a simplification of the previous code which used global state.
It was prompted by a bug: iterating over a log file containing 3
lines, we were returning lines 1 and 3, and omitting line 2.

Change-Id: Ic7388f96a6201f30e16662ff4a5f3c081ac2c4bd
Matthew Booth 1 year ago
parent
commit
c9e5f14026
1 changed files with 26 additions and 42 deletions
  1. 26
    42
      oslogmerger/oslogmerger.py

+ 26
- 42
oslogmerger/oslogmerger.py View File

@@ -5,6 +5,7 @@ import dateutil.parser
5 5
 import dateutil.tz
6 6
 import hashlib
7 7
 import heapq
8
+import itertools
8 9
 import os
9 10
 import re
10 11
 import sys
@@ -325,8 +326,6 @@ class TSLogParser(LogParser):
325 326
 
326 327
 class LogFile(object):
327 328
     def _detect_format(self, filename, cfg):
328
-        self.open(filename)
329
-
330 329
         parsers = []
331 330
         for cls in LOG_TYPES.values() + DETECTED_LOG_TYPES:
332 331
             if cls is None:
@@ -341,11 +340,7 @@ class LogFile(object):
341 340
 
342 341
         # Try to parse the first few lines with each parser in turn, returning
343 342
         # the first to successfully parse a line
344
-        for i in range(0, 5):
345
-            line = self._readline()
346
-            if line is None:
347
-                continue
348
-
343
+        for line in itertools.islice(self._lines(), 0, 5):
349 344
             for parser in parsers:
350 345
                 try:
351 346
                     parser.parse_line(line)
@@ -361,6 +356,11 @@ class LogFile(object):
361 356
         raise ValueError("Failed to detect format of %s" % self.alias)
362 357
 
363 358
     def __init__(self, filename, alias, cfg, parser_cls=None):
359
+        if filename.startswith("http://"):
360
+            self.filename = self._cached_download(filename)
361
+        else:
362
+            self.filename = filename
363
+
364 364
         self.alias = alias
365 365
 
366 366
         if parser_cls is None:
@@ -368,14 +368,15 @@ class LogFile(object):
368 368
         else:
369 369
             self.parser = parser_cls(filename, cfg)
370 370
 
371
-        self.open(filename)
371
+    def _lines(self):
372
+        with open(self.filename, 'r') as logfile:
373
+            while True:
374
+                line = logfile.readline()
375
+                if line == "":
376
+                    break
372 377
 
373
-    def open(self, filename):
374
-        self._filename = filename
375
-        if filename.startswith("http://"):
376
-            filename = self._cached_download(filename)
377
-
378
-        self._file = open(filename, 'r')
378
+                line.replace('\0', ' ')
379
+                yield line
379 380
 
380 381
     def _url_cache_path(self, url):
381 382
         md5 = hashlib.md5()
@@ -408,29 +409,17 @@ class LogFile(object):
408 409
         return path
409 410
 
410 411
     def __iter__(self):
411
-        self.entry = None
412
-        self.next_entry = None
413
-        return self
414
-
415
-    def _readline(self):
416
-        line = self._file.readline()
417
-        if line == "":
418
-            return None
419
-        line.replace('\0', ' ')
420
-        return line
421
-
422
-    def _next_entry(self, entry):
423
-        while True:
424
-            line = self._readline()
425
-            if line is None:
426
-                return entry, None
427
-
412
+        entry = None
413
+        for line in self._lines():
428 414
             try:
429 415
                 dt, dt_str, data = self.parser.parse_line(line)
430
-                new_entry = LogEntry(self.alias, dt, data, dt_str=dt_str)
416
+
417
+                # If we successfully parsed a line, it means that we've
418
+                # finished appending un-timestamped lines to the previous entry
431 419
                 if entry:
432
-                    return entry, new_entry
433
-                entry = new_entry
420
+                    yield entry
421
+
422
+                entry = LogEntry(self.alias, dt, data, dt_str=dt_str)
434 423
 
435 424
             except ValueError:
436 425
                 # it's probably a non-dated line, or a garbled entry, just
@@ -438,14 +427,9 @@ class LogFile(object):
438 427
                 if entry:
439 428
                     entry.append_line(line)
440 429
 
441
-    def __next__(self):
442
-        return self.next()
443
-
444
-    def next(self):
445
-        self.entry, self.next_entry = self._next_entry(self.next_entry)
446
-        if self.entry is None:
447
-            raise StopIteration()
448
-        return self.entry
430
+        # We reached EOF, to return the in-progress entry
431
+        if entry is not None:
432
+            yield entry
449 433
 
450 434
 
451 435
 # Log file formats with command line options

Loading…
Cancel
Save