Upgrade Logstash to 1.2.1.

* modules/logstash/manifests/init.pp: Download and install Logstash
1.2.1.

* modules/openstack_project/files/logstash/log-gearman-client.py:
Logstash 1.2.1 comes with a new schema. Update the job data sent to log
push workers to better accomodate the new schema.

* modules/openstack_project/files/logstash/log-gearman-worker.py: Push
Logstash 1.2.1 schema compliant JSON to the Logstash TCP input.

* modules/openstack_project/templates/logstash/indexer.conf.erb:
Logstash 1.2.1 comes with a new schema and many input and filter
changes. Use the newly supported features like conditionals to keep the
config up to date.

* modules/kibana/templates/config.rb.erb: Change the default field for
kibana to 'message'. It was @message which is deprecated in the new
logstash schema.

Change-Id: Id19fc05bcce8d42c5c0cf33df3da7e95f5794107
This commit is contained in:
Clark Boylan 2013-10-16 11:34:47 -07:00
parent 0423b59c6c
commit 1f07114346
5 changed files with 108 additions and 131 deletions

View File

@ -40,7 +40,7 @@ module KibanaConfig
# Change which fields are shown by default. Must be set as an array
# Default_fields = ['@fields.vhost','@fields.response','@fields.request']
Default_fields = ['@message']
Default_fields = ['message']
# If set to true, Kibana will use the Highlight feature of Elasticsearch to
# display highlighted search results
@ -49,7 +49,7 @@ module KibanaConfig
# A field needs to be specified for the highlight feature. By default,
# Elasticsearch doesn't allow highlighting on _all because the field has to
# be either stored or part of the _source field.
Highlighted_field = "@message"
Highlighted_field = "message"
# Make URLs clickable in detailed view
Clickable_URLs = true
@ -122,10 +122,10 @@ module KibanaConfig
# field called _all that is searched when no field is specified.
# Dropping _all can reduce index size significantly. If you do that
# you'll need to change primary_field to be '@message'
Primary_field = '@message'
Primary_field = 'message'
# Default Elastic Search index to query
Default_index = '@message'
Default_index = 'message'
# TODO: This isn't functional yet
# Prevent wildcard search terms which result in extremely slow queries

View File

@ -38,13 +38,13 @@ class logstash {
}
exec { 'get_logstash_jar':
command => 'wget http://logstash.objects.dreamhost.com/release/logstash-1.1.12-monolithic.jar -O /opt/logstash/logstash-1.1.12-monolithic.jar',
command => 'wget https://download.elasticsearch.org/logstash/logstash/logstash-1.2.1-flatjar.jar -O /opt/logstash/logstash-1.2.1-flatjar.jar',
path => '/bin:/usr/bin',
creates => '/opt/logstash/logstash-1.1.12-monolithic.jar',
creates => '/opt/logstash/logstash-1.2.1-flatjar.jar',
require => File['/opt/logstash'],
}
file { '/opt/logstash/logstash-1.1.12-monolithic.jar':
file { '/opt/logstash/logstash-1.2.1-flatjar.jar':
ensure => present,
owner => 'logstash',
group => 'logstash',
@ -57,8 +57,8 @@ class logstash {
file { '/opt/logstash/logstash.jar':
ensure => link,
target => '/opt/logstash/logstash-1.1.12-monolithic.jar',
require => File['/opt/logstash/logstash-1.1.12-monolithic.jar'],
target => '/opt/logstash/logstash-1.2.1-flatjar.jar',
require => File['/opt/logstash/logstash-1.2.1-flatjar.jar'],
}
file { '/var/log/logstash':

View File

@ -106,8 +106,8 @@ class EventProcessor(threading.Thread):
os.path.join(log_dir, fileopts['name'])
fields["log_url"] = source_url
out_event = {}
out_event["@fields"] = fields
out_event["@tags"] = [fileopts['name']] + fileopts.get('tags', [])
out_event["fields"] = fields
out_event["tags"] = [fileopts['name']] + fileopts.get('tags', [])
return source_url, out_event

View File

@ -69,19 +69,20 @@ class LogRetriever(threading.Thread):
retry = arguments['retry']
event = arguments['event']
logging.debug("Handling event: " + json.dumps(event))
fields = event['@fields']
tags = event['@tags']
fields = event.get('fields') or event.get('@fields')
tags = event.get('tags') or event.get('@tags')
if fields['build_status'] != 'ABORTED':
# Handle events ignoring aborted builds. These builds are
# discarded by zuul.
log_lines = self._retrieve_log(source_url, retry)
logging.debug("Pushing " + str(len(log_lines)) + " log lines.")
base_event = {}
base_event.update(fields)
base_event["tags"] = tags
for line in log_lines:
out_event = {}
out_event["@fields"] = fields
out_event["@tags"] = tags
out_event["event_message"] = line
out_event = base_event.copy()
out_event["message"] = line
self.logq.put(out_event)
job.sendWorkComplete()
except Exception as e:

View File

@ -2,132 +2,108 @@ input {
tcp {
host => "localhost"
port => 9999
format => "json"
message_format => "%{event_message}"
codec => line {}
type => "jenkins"
}
}
# You can check grok patterns at http://grokdebug.herokuapp.com/
filter {
grep {
# Remove unneeded html tags.
type => "jenkins"
tags => ["console.html"]
# Drop matches.
negate => true
match => ["@message", "^</?pre>$"]
# This is a work around for a bug. We should be able to set the tcp
# input codec to json, but that codec doesn't support streaming.
# Convert to json here instead.
json {
source => "message"
}
grep {
# Remove screen log headers.
type => "jenkins"
tags => ["screen"]
# Drop matches.
negate => true
match => ["@message", "^\+ "]
if "screen" in [tags] and [message] =~ "^\+ " {
drop {}
}
grep {
# Remove blank lines.
type => "jenkins"
tags => ["keystonefmt"]
# Drop matches.
negate => true
match => ["@message", "^$"]
}
multiline {
type => "jenkins"
tags => ["console.html"]
negate => true
pattern => "^%{DATESTAMP} \|"
what => "previous"
stream_identity => "%{@source_host}.%{filename}"
}
multiline {
type => "jenkins"
tags => ["oslofmt"]
negate => true
pattern => "^%{DATESTAMP} "
what => "previous"
stream_identity => "%{@source_host}.%{filename}"
}
multiline {
type => "jenkins"
tags => ["oslofmt"]
negate => false
pattern => "^%{DATESTAMP}%{SPACE}%{NUMBER}?%{SPACE}?TRACE"
what => "previous"
stream_identity => "%{@source_host}.%{filename}"
}
multiline {
type => "jenkins"
tags => ["keystonefmt"]
negate => true
pattern => "^\(\b%{NOTSPACE}\b\):"
what => "previous"
stream_identity => "%{@source_host}.%{filename}"
}
grok {
type => "jenkins"
tags => ["console.html"]
# Do multiline matching as the above mutliline filter may add newlines
# to the log messages.
pattern => [ "(?m)^%{DATESTAMP:logdate} \| %{GREEDYDATA:logmessage}" ]
add_field => [ "received_at", "%{@timestamp}" ]
}
grok {
type => "jenkins"
tags => ["oslofmt"]
# Do multiline matching as the above mutliline filter may add newlines
# to the log messages.
# TODO move the LOGLEVELs into a proper grok pattern.
pattern => [ "(?m)^%{DATESTAMP:logdate}%{SPACE}%{NUMBER:pid}?%{SPACE}?(?<loglevel>AUDIT|CRITICAL|DEBUG|INFO|TRACE|WARNING|ERROR) \[?\b%{NOTSPACE:module}\b\]?%{SPACE}?%{GREEDYDATA:logmessage}?" ]
add_field => [ "received_at", "%{@timestamp}" ]
}
grok {
type => "jenkins"
tags => ["keystonefmt"]
# Do multiline matching as the above mutliline filter may add newlines
# to the log messages.
# TODO move the LOGLEVELs into a proper grok pattern.
pattern => [ "(?m)^\(\b%{NOTSPACE:module}\b\):%{SPACE}%{DATESTAMP:logdate}%{SPACE}(?<loglevel>AUDIT|CRITICAL|DEBUG|INFO|TRACE|WARNING|ERROR)%{SPACE}%{GREEDYDATA:logmessage}" ]
add_field => [ "received_at", "%{@timestamp}" ]
}
grok {
type => "jenkins"
tags => ["apachecombined"]
pattern => [ "%{COMBINEDAPACHELOG}" ]
add_field => [ "received_at", "%{@timestamp}", "logdate", "%{timestamp}", "logmessage", "%{verb} %{request} %{response}" ]
}
grok {
type => "jenkins"
tags => ["syslog"]
# Syslog grok filter adapted from
# http://cookbook.logstash.net/recipes/syslog-pri/syslog.conf
pattern => [ "%{SYSLOGTIMESTAMP:logdate}%{SPACE}%{SYSLOGHOST:syslog_host}?%{SPACE}%{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?:? %{GREEDYDATA:logmessage}" ]
add_field => [ "received_at", "%{@timestamp}" ]
}
# Remove DEBUG logs to reduce the amount of data that needs to be processed.
grep {
type => "jenkins"
negate => true
match => [ "loglevel", "DEBUG" ]
if "console.html" in [tags] {
if [message] == "<pre>" or [message] == "</pre>" {
drop {}
}
multiline {
negate => true
pattern => "^%{TIMESTAMP_ISO8601} \|"
what => "previous"
stream_identity => "%{host}.%{filename}"
}
grok {
# Do multiline matching as the above mutliline filter may add newlines
# to the log messages.
match => { "message" => "(?m)^%{TIMESTAMP_ISO8601:logdate} \| %{GREEDYDATA:logmessage}" }
add_field => { "received_at" => "%{@timestamp}" }
}
} else if "oslofmt" in [tags] {
multiline {
negate => true
pattern => "^%{TIMESTAMP_ISO8601} "
what => "previous"
stream_identity => "%{host}.%{filename}"
}
multiline {
negate => false
pattern => "^%{TIMESTAMP_ISO8601}%{SPACE}%{NUMBER}?%{SPACE}?TRACE"
what => "previous"
stream_identity => "%{host}.%{filename}"
}
grok {
# Do multiline matching as the above mutliline filter may add newlines
# to the log messages.
# TODO move the LOGLEVELs into a proper grok pattern.
match => { "message" => "(?m)^%{TIMESTAMP_ISO8601:logdate}%{SPACE}%{NUMBER:pid}?%{SPACE}?(?<loglevel>AUDIT|CRITICAL|DEBUG|INFO|TRACE|WARNING|ERROR) \[?\b%{NOTSPACE:module}\b\]?%{SPACE}?%{GREEDYDATA:logmessage}?" }
add_field => { "received_at" => "%{@timestamp}" }
}
} else if "keystonefmt" in [tags] {
if [message] == "" {
drop {}
}
multiline {
negate => true
pattern => "^\(\b%{NOTSPACE}\b\):"
what => "previous"
stream_identity => "%{host}.%{filename}"
}
grok {
# Do multiline matching as the above mutliline filter may add newlines
# to the log messages.
# TODO move the LOGLEVELs into a proper grok pattern.
match => { "message" => "(?m)^\(\b%{NOTSPACE:module}\b\):%{SPACE}%{TIMESTAMP_ISO8601:logdate}%{SPACE}(?<loglevel>AUDIT|CRITICAL|DEBUG|INFO|TRACE|WARNING|ERROR)%{SPACE}%{GREEDYDATA:logmessage}" }
add_field => { "received_at" => "%{@timestamp}" }
}
} else if "apachecombined" in [tags] {
grok {
match => { "message" => "%{COMBINEDAPACHELOG}" }
add_field => { "received_at" => "%{@timestamp}" }
add_field => { "logdate" => "%{timestamp}" }
add_field => { "logmessage" => "%{verb} %{request} %{response}" }
}
} else if "syslog" in [tags] {
grok {
# Syslog grok filter adapted from
# http://cookbook.logstash.net/recipes/syslog-pri/syslog.conf
match => { "message" => "%{SYSLOGTIMESTAMP:logdate}%{SPACE}%{SYSLOGHOST:syslog_host}?%{SPACE}%{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?:? %{GREEDYDATA:logmessage}" }
add_field => { "received_at" => "%{@timestamp}" }
}
}
# Filters below here should be consistent for all Jenkins log formats.
date {
type => "jenkins"
exclude_tags => "_grokparsefailure"
match => [ "logdate", "yyyy-MM-dd HH:mm:ss.SSS", "yyyy-MM-dd HH:mm:ss,SSS", "yyyy-MM-dd HH:mm:ss", "MMM d HH:mm:ss", "MMM dd HH:mm:ss", "dd/MMM/yyyy:HH:mm:ss Z" ]
# Remove DEBUG logs to reduce the amount of data that needs to be processed.
if [loglevel] == "DEBUG" {
drop {}
}
mutate {
type => "jenkins"
exclude_tags => "_grokparsefailure"
replace => [ "@message", "%{logmessage}" ]
}
mutate {
type => "jenkins"
exclude_tags => "_grokparsefailure"
remove => [ "logdate", "logmessage", "event_message" ]
if ! ("_grokparsefailure" in [tags]) {
date {
match => [ "logdate", "yyyy-MM-dd HH:mm:ss.SSS", "yyyy-MM-dd HH:mm:ss,SSS", "yyyy-MM-dd HH:mm:ss", "MMM d HH:mm:ss", "MMM dd HH:mm:ss", "dd/MMM/yyyy:HH:mm:ss Z" ]
timezone => "UTC"
}
mutate {
replace => { "message" => "%{logmessage}" }
}
mutate {
remove_field => [ "logdate", "logmessage" ]
}
}
}