From b2ef46c5c725142d36bee8f0a08dede527e25ca9 Mon Sep 17 00:00:00 2001
From: Clark Boylan <clark.boylan@gmail.com>
Date: Mon, 10 Mar 2014 13:54:39 -0700
Subject: [PATCH] Better logstash field data.

We are currently using a lot of wildcard searches in elasticsearch which
are slow. Provide better field data so that we can replace those
wildcard searches with filters. In particular add a short uuid field and
make the filename tag field the basename of the filepath so that grenade
and non grenade files all end up with the same tags.

Change-Id: If558017fceae96bcf197e611ab5cac1cfe7ae9bf
---
 modules/log_processor/files/log-gearman-client.py          | 6 +++++-
 .../files/logstash/jenkins-log-client.yaml                 | 7 +++++++
 .../openstack_project/templates/logstash/indexer.conf.erb  | 2 +-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/modules/log_processor/files/log-gearman-client.py b/modules/log_processor/files/log-gearman-client.py
index a472c19de0..e9d8383838 100644
--- a/modules/log_processor/files/log-gearman-client.py
+++ b/modules/log_processor/files/log-gearman-client.py
@@ -20,6 +20,7 @@ import gear
 import json
 import logging
 import os
+import os.path
 import re
 import signal
 import threading
@@ -94,7 +95,9 @@ class EventProcessor(threading.Thread):
         fields["build_master"] = event["build"].get("host_name", "UNKNOWN")
         parameters = event["build"].get("parameters", {})
         fields["project"] = parameters.get("ZUUL_PROJECT", "UNKNOWN")
+        # TODO(clarkb) can we do better without duplicated data here?
         fields["build_uuid"] = parameters.get("ZUUL_UUID", "UNKNOWN")
+        fields["build_short_uuid"] = fields["build_uuid"][:7]
         fields["build_queue"] = parameters.get("ZUUL_PIPELINE", "UNKNOWN")
         fields["build_ref"] = parameters.get("ZUUL_REF", "UNKNOWN")
         fields["build_branch"] = parameters.get("ZUUL_BRANCH", "UNKNOWN")
@@ -115,7 +118,8 @@ class EventProcessor(threading.Thread):
         fields["log_url"] = source_url
         out_event = {}
         out_event["fields"] = fields
-        out_event["tags"] = [fileopts['name']] + fileopts.get('tags', [])
+        out_event["tags"] = [os.path.basename(fileopts['name'])] + \
+            fileopts.get('tags', [])
         return source_url, out_event
 
 
diff --git a/modules/openstack_project/files/logstash/jenkins-log-client.yaml b/modules/openstack_project/files/logstash/jenkins-log-client.yaml
index 1a53defa5c..9d7aff3565 100644
--- a/modules/openstack_project/files/logstash/jenkins-log-client.yaml
+++ b/modules/openstack_project/files/logstash/jenkins-log-client.yaml
@@ -15,21 +15,28 @@ zmq-publishers:
 source-files:
   - name: console.html
     retry-get: True
+    tags:
+      - console
   - name: logs/devstack-gate-cleanup-host.txt
     tags:
+      - console
       - console.html
   - name: logs/devstack-gate-setup-host.txt
     tags:
+      - console
       - console.html
   - name: logs/devstack-gate-setup-workspace-new.txt
     tags:
+      - console
       - console.html
   - name: logs/devstack-gate-setup-workspace-old.txt
     tags:
+      - console
       - console.html
     job-filter: '.*grenade.*'
   - name: logs/devstacklog.txt
     tags:
+      - console
       - console.html
   - name: logs/screen-c-api.txt
     tags:
diff --git a/modules/openstack_project/templates/logstash/indexer.conf.erb b/modules/openstack_project/templates/logstash/indexer.conf.erb
index 3ad2c3e7ee..95062f9b05 100644
--- a/modules/openstack_project/templates/logstash/indexer.conf.erb
+++ b/modules/openstack_project/templates/logstash/indexer.conf.erb
@@ -12,7 +12,7 @@ filter {
   if "screen" in [tags] and [message] =~ "^\+ " {
     drop {}
   }
-  if "console.html" in [tags] {
+  if "console" in [tags] or "console.html" in [tags] {
     if [message] == "<pre>" or [message] == "</pre>" {
       drop {}
     }