system-config/modules/openstack_project/templates/logstash/indexer.conf.erb
Sean Dague 686886c8d6 fix error multiline folding for indexing
The previous folding rules for TRACE log level had the effect that all
TRACES were folded into the ERROR that was emitted before them. This
was fine and appropriate. When we dropped TRACE tag for these and used
ERROR, this was extended to ERROR.

This is incorrect.

While ERROR tags are used in stack traces in a multiline way, they
also just emit some times. We don't want to *always* fold them into
the previous line. Doing so means that logstash treats all our ERROR
log messages as what came before. Typically INFO.

One additional indication of a stacktrace is the log message always
has a process id in it. We can minimally modify the grok rule to
require %{NUMBER}, which should do the right thing here.

This should bring back ERROR log lines in logstash being listed under
loglevel:ERROR.

Change-Id: I20372686212d080d4bc5c5578c418546005260fd
2015-11-09 11:32:37 -05:00

123 lines
4.3 KiB
Plaintext

input {
tcp {
host => "localhost"
port => 9999
codec => json_lines {}
type => "jenkins"
}
}
# You can check grok patterns at http://grokdebug.herokuapp.com/
filter {
if "screen" in [tags] and [message] =~ "^\+ " {
drop {}
}
if "console" in [tags] or "console.html" in [tags] {
if [message] == "<pre>" or [message] == "</pre>" {
drop {}
}
multiline {
negate => true
pattern => "^%{TIMESTAMP_ISO8601} \|"
what => "previous"
stream_identity => "%{host}.%{filename}"
}
grok {
# Do multiline matching as the above mutliline filter may add newlines
# to the log messages.
match => { "message" => "(?m)^%{TIMESTAMP_ISO8601:logdate} \| %{GREEDYDATA:logmessage}" }
add_field => { "received_at" => "%{@timestamp}" }
}
} else if "oslofmt" in [tags] {
multiline {
negate => true
pattern => "^%{TIMESTAMP_ISO8601} "
what => "previous"
stream_identity => "%{host}.%{filename}"
}
multiline {
negate => false
# NOTE(mriedem): oslo.log 1.2.0 changed the logging_exception_prefix
# config option from using TRACE to ERROR so we have to handle both.
#
# NOTE(sdague): stack traces always include process id, so
# NUMBER being required element here is important, otherwise
# ERROR messages just fold into the previous messages, which are
# typically INFO.
pattern => "^%{TIMESTAMP_ISO8601}%{SPACE}%{NUMBER}%{SPACE}(TRACE|ERROR)"
what => "previous"
stream_identity => "%{host}.%{filename}"
}
grok {
# Do multiline matching as the above mutliline filter may add newlines
# to the log messages.
# TODO move the LOGLEVELs into a proper grok pattern.
match => { "message" => "(?m)^%{TIMESTAMP_ISO8601:logdate}%{SPACE}%{NUMBER:pid}?%{SPACE}?(?<loglevel>AUDIT|CRITICAL|DEBUG|INFO|TRACE|WARNING|ERROR) \[?\b%{NOTSPACE:module}\b\]?%{SPACE}?%{GREEDYDATA:logmessage}?" }
add_field => { "received_at" => "%{@timestamp}" }
}
} else if "apachecombined" in [tags] {
grok {
match => { "message" => "%{COMBINEDAPACHELOG}" }
add_field => { "received_at" => "%{@timestamp}" }
add_field => { "logdate" => "%{timestamp}" }
add_field => { "logmessage" => "%{verb} %{request} %{response}" }
}
} else if "apacheerror" in [tags] {
grok {
match => { "message" => "\[(?<logdate>%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}%{SPACE}%{TZ}?)\]%{SPACE}\[%{LOGLEVEL:loglevel}\]%{SPACE}%{GREEDYDATA:logmessage}" }
add_field => { "received_at" => "%{@timestamp}" }
}
} else if "libvirt" in [tags] {
grok {
# libvirtd grok filter adapted from
# https://github.com/OpenStratus/openstack-logstash/blob/master/agent.conf
match => { "message" => "%{TIMESTAMP_ISO8601:logdate}:%{SPACE}%{NUMBER:pid}:%{SPACE}%{LOGLEVEL:loglevel}%{SPACE}:%{SPACE}%{GREEDYDATA:logmessage}" }
add_field => { "received_at" => "%{@timestamp}" }
}
} else if "syslog" in [tags] {
grok {
# Syslog grok filter adapted from
# http://cookbook.logstash.net/recipes/syslog-pri/syslog.conf
match => { "message" => "%{SYSLOGTIMESTAMP:logdate}%{SPACE}%{SYSLOGHOST:syslog_host}?%{SPACE}%{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?:? %{GREEDYDATA:logmessage}" }
add_field => { "received_at" => "%{@timestamp}" }
}
}
# Filters below here should be consistent for all Jenkins log formats.
# Remove DEBUG logs to reduce the amount of data that needs to be processed.
if [loglevel] == "DEBUG" {
drop {}
}
if ! ("_grokparsefailure" in [tags]) {
date {
match => [ "logdate",
"yyyy-MM-dd HH:mm:ss.SSS",
"yyyy-MM-dd HH:mm:ss,SSS",
"yyyy-MM-dd HH:mm:ss",
"MMM d HH:mm:ss",
"MMM dd HH:mm:ss",
"dd/MMM/yyyy:HH:mm:ss Z",
"yyyy-MM-dd HH:mm:ss.SSSZ",
"E MMM dd HH:mm:ss yyyy Z",
"E MMM dd HH:mm:ss yyyy"
]
timezone => "UTC"
}
mutate {
replace => { "message" => "%{logmessage}" }
}
mutate {
remove_field => [ "logdate", "logmessage" ]
}
}
}
output {
elasticsearch_http {
host => "localhost"
manage_template => false
flush_size => 1024
}
}