Add auto-detect parser for libvirt domain logs

Change-Id: I7d98327bebf20b01e41525b56ef59c68cbb8bbd3
This commit is contained in:
Matthew Booth 2017-09-20 18:32:15 +01:00
parent 11e139a706
commit 1be8b23dfc
1 changed files with 98 additions and 1 deletions

View File

@ -1,6 +1,7 @@
from __future__ import print_function from __future__ import print_function
import argparse import argparse
from datetime import datetime, timedelta from datetime import datetime, timedelta
import dateutil.parser
import hashlib import hashlib
import heapq import heapq
import os import os
@ -99,6 +100,12 @@ class LogEntry(object):
class LogParser(object): class LogParser(object):
# Default to UTC if we have no explicit TZ
default_tz = dateutil.tz.tzutc()
def __init__(self, filename):
pass
def parse_line(self, line): def parse_line(self, line):
raise NotImplementedError raise NotImplementedError
@ -118,6 +125,7 @@ class StrptimeParser(LogParser):
dt_str = ' '.join(dt_str) dt_str = ' '.join(dt_str)
dt = datetime.strptime(dt_str, self.date_format) dt = datetime.strptime(dt_str, self.date_format)
dt = dt.replace(tzinfo=self.default_tz)
# +1 to remove the separator so we don't have 2 spaces on output # +1 to remove the separator so we don't have 2 spaces on output
return dt, dt_str, data return dt, dt_str, data
@ -145,6 +153,84 @@ class MsgLogParser(StrptimeParser):
return dt.replace(self.year), dt_str, data return dt.replace(self.year), dt_str, data
def make_tzinfo(name, sign, hours, minutes):
tzoffset = int(minutes) * 60 + int(hours) * 3600
if sign == '-':
tzoffset = -tzoffset
return dateutil.tz.tzoffset(name, tzoffset)
class LibvirtdParser(LogParser):
"""Message format: 2017-09-18 18:08:49.163+0000:
OR: 2017-09-18T18:08:49.216429Z qemu-kvm:
This parser handles libvirtd.log and libvirt domain logs. Domain logs
contain a mixture of libvirt and qemu logs, hence the 2 log formats.
"""
LIBVIRT = re.compile('(\d{4})-(\d{2})-(\d{2}) ' # Date
'(\d{2}):(\d{2}):(\d{2})\.(\d{3})' # Time
'(' #
'([+-])(\d{2})(\d{2})' # Timezone
'):\s*') #
QEMU = re.compile('(\d{4})-(\d{2})-(\d{2})T' # Date
'(\d{2}):(\d{2}):(\d{2})\.(\d+)Z\s*') # Time
def parse_line(self, line):
m = self.LIBVIRT.match(line)
if m is not None:
return self._parse_libvirt(line, m)
m = self.QEMU.match(line)
if m is not None:
return self._parse_qemu(line, m)
raise ValueError('Unsupported format')
def _parse_libvirt(self, line, match):
groups = list(match.groups())
(tzminutes, tzhours, tzsign, tzstr) = (
groups.pop(), groups.pop(), groups.pop(), groups.pop())
tzinfo = make_tzinfo(tzstr, tzsign, tzhours, tzminutes)
dt = datetime(
year=int(groups.pop(0)),
month=int(groups.pop(0)),
day=int(groups.pop(0)),
hour=int(groups.pop(0)),
minute=int(groups.pop(0)),
second=int(groups.pop(0)),
microsecond=int(groups.pop(0)) * 1000,
tzinfo=tzinfo,
)
# Strip colon and trailing whitespace from full date string
dt_str = match.group(0).rstrip()[:-1]
return dt, dt_str, line[match.end():]
def _parse_qemu(self, line, match):
groups = list(match.groups())
dt = datetime(
year=int(groups.pop(0)),
month=int(groups.pop(0)),
day=int(groups.pop(0)),
hour=int(groups.pop(0)),
minute=int(groups.pop(0)),
second=int(groups.pop(0)),
microsecond=int(groups.pop(0)),
# The trailing 'Z' means UTC
tzinfo=dateutil.tz.tzutc(),
)
# Strip trailing whitespace from full date string
dt_str = match.group(0).rstrip()
return dt, dt_str, line[match.end():]
class TSLogParser(LogParser): class TSLogParser(LogParser):
"""Timestamped log: [275514.814982]""" """Timestamped log: [275514.814982]"""
@ -185,6 +271,7 @@ class TSLogParser(LogParser):
def parse_line(self, line): def parse_line(self, line):
end, timestamp = self._read_timestamp(line) end, timestamp = self._read_timestamp(line)
dt = self.start_date + timedelta(seconds=timestamp) dt = self.start_date + timedelta(seconds=timestamp)
dt = dt.replace(tzinfo = self.default_tz)
return dt, line[:end + 1], line[end + 1:] return dt, line[:end + 1], line[end + 1:]
@ -193,7 +280,7 @@ class LogFile(object):
self.open(filename) self.open(filename)
parsers = [] parsers = []
for cls in LOG_TYPES.values(): for cls in LOG_TYPES.values() + DETECTED_LOG_TYPES:
if cls is None: if cls is None:
continue continue
@ -208,6 +295,9 @@ class LogFile(object):
# the first to successfully parse a line # the first to successfully parse a line
for i in range(0, 5): for i in range(0, 5):
line = self._readline() line = self._readline()
if line is None:
continue
for parser in parsers: for parser in parsers:
try: try:
parser.parse_line(line) parser.parse_line(line)
@ -320,6 +410,7 @@ class LogFile(object):
return cmp(self.peek(), other.peek()) return cmp(self.peek(), other.peek())
# Log file formats with command line options
LOG_TYPES = { LOG_TYPES = {
'logfiles_detect': None, 'logfiles_detect': None,
'logfiles_o': OSLogParser, 'logfiles_o': OSLogParser,
@ -328,6 +419,12 @@ LOG_TYPES = {
} }
# Log file formats which can only be auto-detected
DETECTED_LOG_TYPES = [
LibvirtdParser,
]
def process_logs(cfg): def process_logs(cfg):
filename_alias = {} filename_alias = {}
logs = [] logs = []