Browse Source

Merge pull request #25 from Akrog/issue7/other_log_formats

Add support for other log formats
Miguel Ángel Ajo 3 years ago
parent
commit
561b4310f7
3 changed files with 256 additions and 43 deletions
  1. 3
    0
      CHANGELOG.rst
  2. 44
    0
      README.rst
  3. 209
    43
      oslogmerger/oslogmerger.py

+ 3
- 0
CHANGELOG.rst View File

@@ -10,6 +10,9 @@ Changelog
10 10
 - Add base log path option: `-b` `--log-base`.
11 11
 - Log postfix option: `-p` `--log-postfix`.
12 12
 - Auto alias generation: `-a` `--alias-level`.
13
+- Add support for default /var/log/messages datetime format files with
14
+  `-ml [FILE [FILE]]`
15
+- Add support for timestamped log files with `-tl [FILE [FILE]]`
13 16
 
14 17
 **Bugfixes:**
15 18
 

+ 44
- 0
README.rst View File

@@ -93,6 +93,50 @@ Example for Cinder:
93 93
     $ os-log-merger -b /var/log/cinder/ -p .log api:api scheduler:sch volume:vol
94 94
 
95 95
 
96
+/var/log/messages
97
+~~~~~~~~~~~~~~~~~
98
+
99
+os-log-merger also supports /var/log/messages type of files with options `-ml`
100
+and `--msg-logs` options.
101
+
102
+Since the format for those files is missing year information -MAR 24 14:11:19-
103
+the year from the last file modification will be used.
104
+
105
+These files can also be specified with globs and they support alias definition
106
+as well.
107
+
108
+Beware that openstack files should be listed before `-ml` option files.
109
+
110
+Example for Cinder:
111
+
112
+.. code:: bash
113
+
114
+    $ os-log-merger -b /var/log/ cinder/api.log:API -ml messages:MSG *.log
115
+
116
+
117
+Timestamped logs
118
+~~~~~~~~~~~~~~~~
119
+
120
+os-log-merger also supports timestamped -[    0.003036]- with options `-tl`
121
+and `--timestamp-logs` options.
122
+
123
+Since timestamp many times will not take epoc time as the source of the
124
+timestamp but the time the system started, the initial datetime will be
125
+calculated by substracting from the file modified datetime the last timestamp
126
+in the file.
127
+
128
+These files can also be specified with globs and they support alias definition
129
+as well.
130
+
131
+Beware that openstack files should be listed before `-tl` option files.
132
+
133
+Example for Cinder:
134
+
135
+.. code:: bash
136
+
137
+    $ os-log-merger -b /var/log/ cinder/api.log:API -tl dmesg:DMSG
138
+
139
+
96 140
 Auto Alias
97 141
 ~~~~~~~~~~
98 142
 

+ 209
- 43
oslogmerger/oslogmerger.py View File

@@ -1,10 +1,11 @@
1 1
 from __future__ import print_function
2 2
 import argparse
3
-from datetime import datetime
3
+from datetime import datetime, timedelta
4 4
 import hashlib
5 5
 import os
6 6
 import sys
7 7
 import tempfile
8
+import time
8 9
 import urllib2
9 10
 
10 11
 
@@ -74,15 +75,94 @@ FILE_MAP = {
74 75
 }
75 76
 
76 77
 
77
-class OpenStackLog:
78
+class LogEntry(object):
79
+    separator = ' '
80
+    date_format = None
81
+    _date_parse_msg = 'unconverted data remains: '
82
+
83
+    def __init__(self, **kwargs):
84
+        self._date_length = None
85
+        self.__dict__.update(**kwargs)
86
+
87
+    @classmethod
88
+    def get_init_args(cls, filename):
89
+        return {}
90
+
91
+    def prepare_line(self, line):
92
+        return line
93
+
94
+    def parse_date(self, line):
95
+        try:
96
+            dt = datetime.strptime(line, self.date_format)
97
+        except ValueError as e:
98
+            if not e.args[0].startswith(self._date_parse_msg):
99
+                raise
100
+            prepared_date_length = (len(line) - len(e.args[0]) +
101
+                                    len(self._date_parse_msg))
102
+            dt = datetime.strptime(line[:prepared_date_length],
103
+                                   self.date_format)
104
+        return dt
105
+
106
+    def _calculate_date_length(self):
107
+        return len(self.date.strftime(self.date_format))
108
+
109
+    @property
110
+    def date_length(self):
111
+        if not self._date_length:
112
+            self._date_length = self._calculate_date_length()
113
+        return self._date_length
114
+
115
+    @classmethod
116
+    def factory(cls, filename, line, **kwargs):
117
+        self = cls(**kwargs)
118
+
119
+        self.filename = filename
120
+        if not line:
121
+            raise ValueError
122
+
123
+        # Prepare the line for date parsing
124
+        prepared_line = self.prepare_line(line)
125
+
126
+        # Extract the datetime
127
+        self.date = self.parse_date(prepared_line)
128
+
129
+        if (len(line) == self.date_length or
130
+                line[self.date_length] != self.separator):
131
+            raise ValueError
132
+
133
+        self.date_str = line[:self.date_length]
134
+        # +1 to remove the separator so we don't have 2 spaces on output
135
+        self.data = line[self.date_length + 1:]
136
+        return self
137
+
138
+    def append_line(self, line):
139
+        self.data += EXTRALINES_PADDING + line
140
+
141
+    def __cmp__(self, other):
142
+        return cmp(self.date, other.date)
143
+
144
+
145
+class LogFile(object):
146
+    log_entry_class = LogEntry
147
+
148
+    @staticmethod
149
+    def factory(cls, filename):
150
+        instance = LogFile(filename)
151
+        instance.log_entry_class = cls
152
+        instance.entry_kwargs = cls.get_init_args(filename)
153
+        return instance
154
+
78 155
     def __init__(self, filename):
79
-        self._open(filename)
156
+        self.open(filename)
80 157
 
81
-    def _open(self, filename):
158
+    def open(self, filename):
82 159
         self._filename = filename
83 160
         if filename.startswith("http://"):
84 161
             filename = self._cached_download(filename)
162
+
85 163
         self._file = open(filename, 'r')
164
+        stat = os.stat(filename)
165
+        self.mtime = datetime.fromtimestamp(stat.st_mtime)
86 166
 
87 167
     def _url_cache_path(self, url):
88 168
         md5 = hashlib.md5()
@@ -104,29 +184,16 @@ class OpenStackLog:
104 184
         file_out = open(path, 'w')
105 185
         file_out.write(http_in.read())
106 186
         file_out.close()
187
+
188
+        # Set the file time to the one from the URL
189
+        info = http_in.info()
190
+        m_date = info.getdate('date')
191
+        mtime = time.mktime(m_date)
192
+        os.utime(path, (mtime, mtime))
193
+
107 194
         http_in.close()
108 195
         return path
109 196
 
110
-    def _extract_with_date(self, line):
111
-        try:
112
-            # TODO(mangelajo): We support the default log format
113
-            #                  so far, but we may need to discover
114
-            #                  different ones.
115
-            chunks = line.split(" ")
116
-            datetime_str = ' '.join(chunks[:2])
117
-            # this is likely to be not necessary, we can just compare
118
-            # strings, and that's going to be faster than parsing
119
-            # and regenerating later, but, could be useful when mixing
120
-            # log and date formats.
121
-            date_object = datetime.strptime(
122
-                datetime_str, "%Y-%m-%d %H:%M:%S.%f")
123
-            pid, level = chunks[2], chunks[3]
124
-            rest = ' '.join(chunks[4:])
125
-            return (date_object, datetime_str, self._filename, pid, level,
126
-                    rest)
127
-        except IndexError:
128
-            return None
129
-
130 197
     def __iter__(self):
131 198
         self.entry = None
132 199
         self.next_entry = None
@@ -139,7 +206,9 @@ class OpenStackLog:
139 206
                 return entry, None
140 207
 
141 208
             try:
142
-                new_entry = self._extract_with_date(line)
209
+                new_entry = self.log_entry_class.factory(self._filename,
210
+                                                         line,
211
+                                                         **self.entry_kwargs)
143 212
                 if new_entry is None:
144 213
                     continue
145 214
                 if entry:
@@ -150,9 +219,7 @@ class OpenStackLog:
150 219
                 # it's a non-dated line, just append to the entry
151 220
                 # extra info
152 221
                 if entry:
153
-                    (date_object, date_str, filename, pid, level, rest) = entry
154
-                    entry = (date_object, date_str, filename, pid, level,
155
-                             rest + EXTRALINES_PADDING + line)
222
+                    entry.append_line(line)
156 223
 
157 224
     def __next__(self):
158 225
         return self.next()
@@ -174,7 +241,80 @@ class OpenStackLog:
174 241
 
175 242
         if (other.peek() or self.peek()) is None:
176 243
             return 0 if self.peek() is None else -1
177
-        return cmp(self.peek()[0], other.peek()[0])
244
+        return cmp(self.peek(), other.peek())
245
+
246
+
247
+class MsgLogEntry(LogEntry):
248
+    """Message format: Oct 15 14:11:19"""
249
+    date_format = '%Y%b %d %H:%M:%S'
250
+
251
+    @classmethod
252
+    def get_init_args(cls, filename):
253
+        kwargs = super(MsgLogEntry, cls).get_init_args(filename)
254
+        stat = os.stat(filename)
255
+        kwargs['file_year'] = datetime.fromtimestamp(stat.st_mtime).year
256
+        return kwargs
257
+
258
+    def prepare_line(self, line):
259
+        # TODO: If year of file creation and file last modification are
260
+        # different we should start with the cration year and then change to
261
+        # the next year once the months go back.
262
+        return '%s%s' % (self.file_year, line)
263
+
264
+    def _calculate_date_length(self):
265
+        return super(MsgLogEntry, self)._calculate_date_length() - 4
266
+
267
+
268
+class OSLogEntry(LogEntry):
269
+    """OpenStack default log: 2016-02-01 10:22:59.239"""
270
+    date_format = '%Y-%m-%d %H:%M:%S.%f'
271
+
272
+    def _calculate_date_length(self):
273
+        return super(OSLogEntry, self)._calculate_date_length() - 3
274
+
275
+
276
+class TSLogEntry(LogEntry):
277
+    """Timestamped log: [275514.814982]"""
278
+
279
+    @classmethod
280
+    def get_init_args(cls, filename):
281
+        kwargs = super(TSLogEntry, cls).get_init_args(filename)
282
+        stat = os.stat(filename)
283
+        mtime = datetime.fromtimestamp(stat.st_mtime)
284
+        timestamp = cls._get_last_timestamp(filename)
285
+        kwargs['start_date'] = mtime - timedelta(seconds=timestamp)
286
+        return kwargs
287
+
288
+    @classmethod
289
+    def _get_last_timestamp(cls, filename):
290
+        result = None
291
+        with open(filename, 'r') as f:
292
+            file_size = os.fstat(f.fileno()).st_size
293
+            # We will jump to the last KB so we don't have to read all file
294
+            offset = max(0, file_size - 1024)
295
+            f.seek(offset)
296
+            for line in f:
297
+                try:
298
+                    __, result = cls._read_timestamp(line)
299
+                except ValueError:
300
+                    continue
301
+
302
+            return result
303
+
304
+    @staticmethod
305
+    def _read_timestamp(line):
306
+        start = line.index('[') + 1
307
+        end = line.index(']')
308
+
309
+        if end < start:
310
+            raise ValueError
311
+
312
+        return end, float(line[start:end])
313
+
314
+    def parse_date(self, date_str):
315
+        end, timestamp = self._read_timestamp(date_str)
316
+        self._date_length = end + 1
317
+        return self.start_date + timedelta(seconds=timestamp)
178 318
 
179 319
 
180 320
 def process_logs_limit_memory_usage(logs):
@@ -207,20 +347,28 @@ def process_logs_memory_hog(logs):
207 347
         for entry in log:
208 348
             all_entries.append(entry)
209 349
 
210
-    sorted_entries = sorted(all_entries, key=lambda log_entry: log_entry[0])
350
+    sorted_entries = sorted(all_entries)
211 351
     for entry in sorted_entries:
212 352
         yield entry
213 353
 
214 354
 
355
+LOG_TYPES = [
356
+    ('logfiles', OSLogEntry),
357
+    ('logfiles_m', MsgLogEntry),
358
+    ('logfiles_t', TSLogEntry),
359
+]
360
+
361
+
215 362
 def process_logs(cfg):
216 363
     filename_alias = {}
217 364
     logs = []
218
-    for filename in cfg.logfiles:
219
-        path, alias, is_url = get_path_and_alias(filename,
220
-                                                 cfg.log_base,
221
-                                                 cfg.log_postfix)
222
-        filename_alias[path] = (filename, alias, is_url)
223
-        logs.append(OpenStackLog(path))
365
+    for arg_name, entry_cls in LOG_TYPES:
366
+        for filename in getattr(cfg, arg_name):
367
+            path, alias, is_url = get_path_and_alias(filename,
368
+                                                     cfg.log_base,
369
+                                                     cfg.log_postfix)
370
+            filename_alias[path] = (filename, alias, is_url)
371
+            logs.append(LogFile.factory(entry_cls, path))
224 372
 
225 373
     alias = generate_aliases(filename_alias, cfg)
226 374
 
@@ -230,9 +378,8 @@ def process_logs(cfg):
230 378
         method = process_logs_memory_hog
231 379
 
232 380
     for entry in method(logs):
233
-        (date_object, date_str, filename, pid, level, rest) = entry
234
-        print (' '.join([date_str, '[%s]' % alias[filename], pid,
235
-                         level, rest]).rstrip('\n'))
381
+        print('%s [%s] %s' % (entry.date_str, alias[entry.filename],
382
+              entry.data.rstrip('\n')))
236 383
 
237 384
 
238 385
 def get_path_and_alias(filename, log_base, log_postfix):
@@ -414,9 +561,22 @@ alias. Use the aliases if you want shorter line lengths.
414 561
 
415 562
     Logs are expected to contain lines in the following format:
416 563
 
417
-Y-m-d H:M:S.mmm PID LOG-LEVEL ............
418
-Y-m-d H:M:S.mmm PID LOG-LEVEL ............
564
+Y-m-d H:M:S.mmm ............
565
+Y-m-d H:M:S.mmm ............
419 566
 [  extra line info .....      ]
567
+
568
+    Logs with default /var/log/messages datetime format (Oct 15 14:11:19)
569
+can optionally be merged as well using "--msg-logs" or "-ml"
570
+options.  Year will be taken from the last modified time of the file.
571
+
572
+    Logs with timestamp format -[    0.003036]- are also supported with
573
+options "--timestamp-logs" or "-tl".  Since timestamp many times will
574
+not take epoc time as the source of the timestamp but the time the
575
+system started, the initial datetime will be calculated by substracting
576
+from the file modified datetime the last timestamp in the file.
577
+
578
+    These log files will aso be affected by log base directory and log
579
+postfix.
420 580
 """
421 581
 
422 582
     general_epilog = """
@@ -463,14 +623,20 @@ one has not been provided:'
463 623
                         help='Base path for all the log files')
464 624
     parser.add_argument('--log-postfix ', '-p', dest='log_postfix',
465 625
                         help='Append to all the log files path')
466
-    parser.add_argument('logfiles', nargs='+', metavar='log_file',
467
-                        help='File in the format of log_file[:ALIAS]')
626
+    parser.add_argument('logfiles', nargs='+', metavar='log_file[:ALIAS]',
627
+                        help='OpenStack log file.')
468 628
     parser.add_argument('--alias-level', '-a', type=int, default=0,
469 629
                         dest='alias_level',
470 630
                         help='Level of smart alias naming (0-3)')
471 631
     parser.add_argument('--min-memory', '-m', default=False,
472 632
                         action='store_true', dest='limit_memory',
473 633
                         help='Limit memory usage')
634
+    parser.add_argument('--msg-logs', '-ml', default=[], nargs='+',
635
+                        dest='logfiles_m', metavar='file[:ALIAS]',
636
+                        help='Message log files with format: Oct 15 14:11:19')
637
+    parser.add_argument('--timestamp-logs', '-tl', default=[], nargs='+',
638
+                        dest='logfiles_t', metavar='file[:ALIAS]',
639
+                        help='Message log files with timestamp: [   0.003036]')
474 640
 
475 641
     return parser.parse_args()
476 642
 

Loading…
Cancel
Save