Browse Source

Add auto-detect parser for libvirt domain logs

Change-Id: I7d98327bebf20b01e41525b56ef59c68cbb8bbd3
Matthew Booth 1 year ago
parent
commit
1be8b23dfc
1 changed files with 98 additions and 1 deletions
  1. 98
    1
      oslogmerger/oslogmerger.py

+ 98
- 1
oslogmerger/oslogmerger.py View File

@@ -1,6 +1,7 @@
1 1
 from __future__ import print_function
2 2
 import argparse
3 3
 from datetime import datetime, timedelta
4
+import dateutil.parser
4 5
 import hashlib
5 6
 import heapq
6 7
 import os
@@ -99,6 +100,12 @@ class LogEntry(object):
99 100
 
100 101
 
101 102
 class LogParser(object):
103
+    # Default to UTC if we have no explicit TZ
104
+    default_tz = dateutil.tz.tzutc()
105
+
106
+    def __init__(self, filename):
107
+        pass
108
+
102 109
     def parse_line(self, line):
103 110
         raise NotImplementedError
104 111
 
@@ -118,6 +125,7 @@ class StrptimeParser(LogParser):
118 125
         dt_str = ' '.join(dt_str)
119 126
 
120 127
         dt = datetime.strptime(dt_str, self.date_format)
128
+        dt = dt.replace(tzinfo=self.default_tz)
121 129
 
122 130
         # +1 to remove the separator so we don't have 2 spaces on output
123 131
         return dt, dt_str, data
@@ -145,6 +153,84 @@ class MsgLogParser(StrptimeParser):
145 153
         return dt.replace(self.year), dt_str, data
146 154
 
147 155
 
156
+def make_tzinfo(name, sign, hours, minutes):
157
+    tzoffset = int(minutes) * 60 + int(hours) * 3600
158
+    if sign == '-':
159
+        tzoffset = -tzoffset
160
+    return dateutil.tz.tzoffset(name, tzoffset)
161
+
162
+
163
+class LibvirtdParser(LogParser):
164
+    """Message format: 2017-09-18 18:08:49.163+0000:
165
+       OR:             2017-09-18T18:08:49.216429Z qemu-kvm:
166
+
167
+    This parser handles libvirtd.log and libvirt domain logs. Domain logs
168
+    contain a mixture of libvirt and qemu logs, hence the 2 log formats.
169
+    """
170
+    LIBVIRT = re.compile('(\d{4})-(\d{2})-(\d{2}) '         # Date
171
+                         '(\d{2}):(\d{2}):(\d{2})\.(\d{3})' # Time
172
+                         '('                                #
173
+                          '([+-])(\d{2})(\d{2})'            # Timezone
174
+                         '):\s*')                           #
175
+
176
+    QEMU = re.compile('(\d{4})-(\d{2})-(\d{2})T'            # Date
177
+                      '(\d{2}):(\d{2}):(\d{2})\.(\d+)Z\s*') # Time
178
+
179
+    def parse_line(self, line):
180
+        m = self.LIBVIRT.match(line)
181
+        if m is not None:
182
+            return self._parse_libvirt(line, m)
183
+
184
+        m = self.QEMU.match(line)
185
+        if m is not None:
186
+            return self._parse_qemu(line, m)
187
+
188
+        raise ValueError('Unsupported format')
189
+
190
+    def _parse_libvirt(self, line, match):
191
+        groups = list(match.groups())
192
+
193
+        (tzminutes, tzhours, tzsign, tzstr) = (
194
+                groups.pop(), groups.pop(), groups.pop(), groups.pop())
195
+        tzinfo = make_tzinfo(tzstr, tzsign, tzhours, tzminutes)
196
+
197
+        dt = datetime(
198
+            year=int(groups.pop(0)),
199
+            month=int(groups.pop(0)),
200
+            day=int(groups.pop(0)),
201
+            hour=int(groups.pop(0)),
202
+            minute=int(groups.pop(0)),
203
+            second=int(groups.pop(0)),
204
+            microsecond=int(groups.pop(0)) * 1000,
205
+            tzinfo=tzinfo,
206
+        )
207
+
208
+        # Strip colon and trailing whitespace from full date string
209
+        dt_str = match.group(0).rstrip()[:-1]
210
+
211
+        return dt, dt_str, line[match.end():]
212
+
213
+    def _parse_qemu(self, line, match):
214
+        groups = list(match.groups())
215
+
216
+        dt = datetime(
217
+            year=int(groups.pop(0)),
218
+            month=int(groups.pop(0)),
219
+            day=int(groups.pop(0)),
220
+            hour=int(groups.pop(0)),
221
+            minute=int(groups.pop(0)),
222
+            second=int(groups.pop(0)),
223
+            microsecond=int(groups.pop(0)),
224
+            # The trailing 'Z' means UTC
225
+            tzinfo=dateutil.tz.tzutc(),
226
+        )
227
+
228
+        # Strip trailing whitespace from full date string
229
+        dt_str = match.group(0).rstrip()
230
+
231
+        return dt, dt_str, line[match.end():]
232
+
233
+
148 234
 class TSLogParser(LogParser):
149 235
     """Timestamped log: [275514.814982]"""
150 236
 
@@ -185,6 +271,7 @@ class TSLogParser(LogParser):
185 271
     def parse_line(self, line):
186 272
         end, timestamp = self._read_timestamp(line)
187 273
         dt = self.start_date + timedelta(seconds=timestamp)
274
+        dt = dt.replace(tzinfo = self.default_tz)
188 275
         return dt, line[:end + 1], line[end + 1:]
189 276
 
190 277
 
@@ -193,7 +280,7 @@ class LogFile(object):
193 280
         self.open(filename)
194 281
 
195 282
         parsers = []
196
-        for cls in LOG_TYPES.values():
283
+        for cls in LOG_TYPES.values() + DETECTED_LOG_TYPES:
197 284
             if cls is None:
198 285
                 continue
199 286
 
@@ -208,6 +295,9 @@ class LogFile(object):
208 295
         # the first to successfully parse a line
209 296
         for i in range(0, 5):
210 297
             line = self._readline()
298
+            if line is None:
299
+                continue
300
+
211 301
             for parser in parsers:
212 302
                 try:
213 303
                     parser.parse_line(line)
@@ -320,6 +410,7 @@ class LogFile(object):
320 410
         return cmp(self.peek(), other.peek())
321 411
 
322 412
 
413
+# Log file formats with command line options
323 414
 LOG_TYPES = {
324 415
     'logfiles_detect': None,
325 416
     'logfiles_o': OSLogParser,
@@ -328,6 +419,12 @@ LOG_TYPES = {
328 419
 }
329 420
 
330 421
 
422
+# Log file formats which can only be auto-detected
423
+DETECTED_LOG_TYPES = [
424
+    LibvirtdParser,
425
+]
426
+
427
+
331 428
 def process_logs(cfg):
332 429
     filename_alias = {}
333 430
     logs = []

Loading…
Cancel
Save