Add auto-detect parser for raw syslog

Change-Id: Iba7e80d97cffc809defdfdd87f88a8b698f28019
This commit is contained in:
Matthew Booth 2017-05-05 17:18:54 +01:00
parent 1be8b23dfc
commit 1a31f79327
1 changed files with 45 additions and 0 deletions

View File

@ -2,9 +2,11 @@ from __future__ import print_function
import argparse import argparse
from datetime import datetime, timedelta from datetime import datetime, timedelta
import dateutil.parser import dateutil.parser
import dateutil.tz
import hashlib import hashlib
import heapq import heapq
import os import os
import re
import sys import sys
import tempfile import tempfile
import time import time
@ -231,6 +233,48 @@ class LibvirtdParser(LogParser):
return dt, dt_str, line[match.end():] return dt, dt_str, line[match.end():]
class RawSyslog(LogParser):
"""Raw syslog: <183>1 2017-04-03T21:48:21.781459-03:30"""
# NOTE(mdbooth): Parsing the date in this regexp and reconstructing it
# manually is a *lot* faster than passing the whole string to
# dateutil.parse(). Didn't try strptime due to having to parse tzinfo
# manually anyway.
HEADER = re.compile('<\d+>\d+\s'
'('
'(\d{4})-(\d{2})-(\d{2})T' # Date
'(\d{2}):(\d{2}):(\d{2})\.(\d+)' # Time
'(' #
'([+-])(\d{2}):(\d{2})' # Timezone
')' #
')\s*')
def parse_line(self, line):
m = RawSyslog.HEADER.match(line)
if m is None:
raise ValueError("Not syslog packet")
groups = list(m.groups())
dt_str = groups.pop(0)
(tzminutes, tzhours, tzsign, tzstr) = (
groups.pop(), groups.pop(), groups.pop(), groups.pop())
tzinfo = make_tzinfo(tzstr, tzsign, tzhours, tzminutes)
dt = datetime(
year=int(groups.pop(0)),
month=int(groups.pop(0)),
day=int(groups.pop(0)),
hour=int(groups.pop(0)),
minute=int(groups.pop(0)),
second=int(groups.pop(0)),
microsecond=int(groups.pop(0)),
tzinfo=tzinfo,
)
return dt, dt_str, line[m.end():]
class TSLogParser(LogParser): class TSLogParser(LogParser):
"""Timestamped log: [275514.814982]""" """Timestamped log: [275514.814982]"""
@ -422,6 +466,7 @@ LOG_TYPES = {
# Log file formats which can only be auto-detected # Log file formats which can only be auto-detected
DETECTED_LOG_TYPES = [ DETECTED_LOG_TYPES = [
LibvirtdParser, LibvirtdParser,
RawSyslog,
] ]