diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 08454b7..7c44105 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,8 @@ Changelog - Add base log path option: `-b` `--log-base`. - Log postfix option: `-p` `--log-postfix`. - Auto alias generation: `-a` `--alias-level`. +- Add support for default /var/log/messages datetime format files with + `-ml [FILE [FILE]]` **Bugfixes:** diff --git a/README.rst b/README.rst index 4e1aded..727ed04 100644 --- a/README.rst +++ b/README.rst @@ -93,6 +93,27 @@ Example for Cinder: $ os-log-merger -b /var/log/cinder/ -p .log api:api scheduler:sch volume:vol +/var/log/messages +~~~~~~~~~~~~~~~~~ + +os-log-merger also supports /var/log/messages type of files with options `-ml` +and `--msg-logs` options. + +Since the format for those files is missing year information -MAR 24 14:11:19- +the year from the last file modification will be used. + +These files can also be specified with globs and they support alias definition +as well. + +Beware that openstack files should be listed before `-ml` option files. + +Example for Cinder: + +.. code:: bash + + $ os-log-merger -b /var/log/ cinder/api.log:API -ml messages:MSG *.log + + Auto Alias ~~~~~~~~~~ diff --git a/oslogmerger/oslogmerger.py b/oslogmerger/oslogmerger.py index ab63d52..ec582d8 100644 --- a/oslogmerger/oslogmerger.py +++ b/oslogmerger/oslogmerger.py @@ -5,6 +5,7 @@ import hashlib import os import sys import tempfile +import time import urllib2 @@ -74,15 +75,88 @@ FILE_MAP = { } -class OpenStackLog: - def __init__(self, filename): - self._open(filename) +class LogEntry(object): + separator = ' ' + date_format = None + _date_parse_msg = 'unconverted data remains: ' - def _open(self, filename): + def __init__(self): + self._date_length = None + + def prepare_line(self, line, file_datetime): + return line + + def parse_date(self, line): + try: + dt = datetime.strptime(line, self.date_format) + except ValueError as e: + if not e.args[0].startswith(self._date_parse_msg): + raise + prepared_date_length = (len(line) - len(e.args[0]) + + len(self._date_parse_msg)) + dt = datetime.strptime(line[:prepared_date_length], + self.date_format) + return dt + + def _calculate_date_length(self): + return len(self.date.strftime(self.date_format)) + + @property + def date_length(self): + if not self._date_length: + self._date_length = self._calculate_date_length() + return self._date_length + + @classmethod + def factory(cls, filename, line, file_datetime): + self = cls() + + self.filename = filename + if not line: + raise ValueError + + # Prepare the line for date parsing + prepared_line = self.prepare_line(line, file_datetime) + + # Extract the datetime + self.date = self.parse_date(prepared_line) + + if (len(line) == self.date_length or + line[self.date_length] != self.separator): + raise ValueError + + self.date_str = line[:self.date_length] + # +1 to remove the separator so we don't have 2 spaces on output + self.data = line[self.date_length + 1:] + return self + + def append_line(self, line): + self.data += EXTRALINES_PADDING + line + + def __cmp__(self, other): + return cmp(self.date, other.date) + + +class LogFile(object): + log_entry_class = LogEntry + + @staticmethod + def factory(cls, filename): + instance = LogFile(filename) + instance.log_entry_class = cls + return instance + + def __init__(self, filename): + self.open(filename) + + def open(self, filename): self._filename = filename if filename.startswith("http://"): filename = self._cached_download(filename) + self._file = open(filename, 'r') + stat = os.stat(filename) + self.mtime = datetime.fromtimestamp(stat.st_mtime) def _url_cache_path(self, url): md5 = hashlib.md5() @@ -104,29 +178,16 @@ class OpenStackLog: file_out = open(path, 'w') file_out.write(http_in.read()) file_out.close() + + # Set the file time to the one from the URL + info = http_in.info() + m_date = info.getdate('date') + mtime = time.mktime(m_date) + os.utime(path, (mtime, mtime)) + http_in.close() return path - def _extract_with_date(self, line): - try: - # TODO(mangelajo): We support the default log format - # so far, but we may need to discover - # different ones. - chunks = line.split(" ") - datetime_str = ' '.join(chunks[:2]) - # this is likely to be not necessary, we can just compare - # strings, and that's going to be faster than parsing - # and regenerating later, but, could be useful when mixing - # log and date formats. - date_object = datetime.strptime( - datetime_str, "%Y-%m-%d %H:%M:%S.%f") - pid, level = chunks[2], chunks[3] - rest = ' '.join(chunks[4:]) - return (date_object, datetime_str, self._filename, pid, level, - rest) - except IndexError: - return None - def __iter__(self): self.entry = None self.next_entry = None @@ -139,7 +200,9 @@ class OpenStackLog: return entry, None try: - new_entry = self._extract_with_date(line) + new_entry = self.log_entry_class.factory(self._filename, + line, + self.mtime) if new_entry is None: continue if entry: @@ -150,9 +213,7 @@ class OpenStackLog: # it's a non-dated line, just append to the entry # extra info if entry: - (date_object, date_str, filename, pid, level, rest) = entry - entry = (date_object, date_str, filename, pid, level, - rest + EXTRALINES_PADDING + line) + entry.append_line(line) def __next__(self): return self.next() @@ -174,7 +235,29 @@ class OpenStackLog: if (other.peek() or self.peek()) is None: return 0 if self.peek() is None else -1 - return cmp(self.peek()[0], other.peek()[0]) + return cmp(self.peek(), other.peek()) + + +class MsgLogEntry(LogEntry): + """Message format: Oct 15 14:11:19""" + date_format = '%Y%b %d %H:%M:%S' + + def prepare_line(self, line, file_datetime): + # TODO: If year of file creation and file last modification are + # different we should start with the cration year and then change to + # the next year once the months go back. + return '%s%s' % (file_datetime.year, line) + + def _calculate_date_length(self): + return super(MsgLogEntry, self)._calculate_date_length() - 4 + + +class OSLogEntry(LogEntry): + """OpenStack default log: 2016-02-01 10:22:59.239""" + date_format = '%Y-%m-%d %H:%M:%S.%f' + + def _calculate_date_length(self): + return super(OSLogEntry, self)._calculate_date_length() - 3 def process_logs_limit_memory_usage(logs): @@ -207,20 +290,27 @@ def process_logs_memory_hog(logs): for entry in log: all_entries.append(entry) - sorted_entries = sorted(all_entries, key=lambda log_entry: log_entry[0]) + sorted_entries = sorted(all_entries) for entry in sorted_entries: yield entry +LOG_TYPES = [ + ('logfiles', OSLogEntry), + ('logfiles_m', MsgLogEntry), +] + + def process_logs(cfg): filename_alias = {} logs = [] - for filename in cfg.logfiles: - path, alias, is_url = get_path_and_alias(filename, - cfg.log_base, - cfg.log_postfix) - filename_alias[path] = (filename, alias, is_url) - logs.append(OpenStackLog(path)) + for arg_name, entry_cls in LOG_TYPES: + for filename in getattr(cfg, arg_name): + path, alias, is_url = get_path_and_alias(filename, + cfg.log_base, + cfg.log_postfix) + filename_alias[path] = (filename, alias, is_url) + logs.append(LogFile.factory(entry_cls, path)) alias = generate_aliases(filename_alias, cfg) @@ -230,9 +320,8 @@ def process_logs(cfg): method = process_logs_memory_hog for entry in method(logs): - (date_object, date_str, filename, pid, level, rest) = entry - print (' '.join([date_str, '[%s]' % alias[filename], pid, - level, rest]).rstrip('\n')) + print('%s [%s] %s' % (entry.date_str, alias[entry.filename], + entry.data.rstrip('\n'))) def get_path_and_alias(filename, log_base, log_postfix): @@ -400,9 +489,16 @@ alias. Use the aliases if you want shorter line lengths. Logs are expected to contain lines in the following format: -Y-m-d H:M:S.mmm PID LOG-LEVEL ............ -Y-m-d H:M:S.mmm PID LOG-LEVEL ............ +Y-m-d H:M:S.mmm ............ +Y-m-d H:M:S.mmm ............ [ extra line info ..... ] + + Logs with default /var/log/messages datetime format (Oct 15 14:11:19) +can optionally be merged as well using "--msg-logs" or "-ml" +options. Year will be taken from the last modified time of the file. + + These log files will aso be affected by log base directory and log +postfix. """ general_epilog = """ @@ -449,14 +545,17 @@ one has not been provided:' help='Base path for all the log files') parser.add_argument('--log-postfix ', '-p', dest='log_postfix', help='Append to all the log files path') - parser.add_argument('logfiles', nargs='+', metavar='log_file', - help='File in the format of log_file[:ALIAS]') + parser.add_argument('logfiles', nargs='+', metavar='log_file[:ALIAS]', + help='OpenStack log file.') parser.add_argument('--alias-level', '-a', type=int, default=0, dest='alias_level', help='Level of smart alias naming (0-3)') parser.add_argument('--min-memory', '-m', default=False, action='store_true', dest='limit_memory', help='Limit memory usage') + parser.add_argument('--msg-logs', '-ml', default=[], nargs='+', + dest='logfiles_m', metavar='file[:ALIAS]', + help='Message log files with format: Oct 15 14:11:19') return parser.parse_args()