Add log processing roles

These roles replace the jenkins-log-client portion of the logstash/subunit queue processing system. Much of the code is derived from the jenkins-log-client. Rather than subscribing to ZMQ events emitted from jenkins (and later zuul v2.5), these roles are intended to be run in a late base post playbook (right after log uploading). They examine the logs directory which contains the files that were just uploaded and any matching files have jobs submitted to the gearman queue. Currently the module attempts to maintain compatability as much as possible, but eventually we should represent some of the data in a more v3-like manner, as well as consider adding additional data. Some suggestions are noted in comments. Change-Id: I10ea613d9278465b90d891371d2626b9e99f8f31
2017-09-09 15:59:08 -07:00 · 2017-09-09 15:59:08 -07:00 · 47ae93c061
commit 47ae93c061
parent 4b4f0ed9e2
10 changed files with 371 additions and 0 deletions
--- a/roles/submit-log-processor-jobs/README.rst
+++ b/roles/submit-log-processor-jobs/README.rst
@ -0,0 +1,6 @@
+A module to submit a log processing job.
+
+This role is a container for an Ansible module which processes a log
+directory and submits jobs to a log processing gearman queue.  The
+role itself performs no actions, and is intended only to be used by
+other roles as a dependency to supply the module.
--- a/roles/submit-log-processor-jobs/library/submit_log_processor_jobs.py
+++ b/roles/submit-log-processor-jobs/library/submit_log_processor_jobs.py
@ -0,0 +1,170 @@
+# Copyright 2013 Hewlett-Packard Development Company, L.P.
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import re
+import logging
+
+from ansible.module_utils.six.moves import urllib
+from ansible.module_utils.basic import AnsibleModule
+
+import gear
+import yaml
+
+
+class FileMatcher(object):
+    def __init__(self, name, tags):
+        self._name = name
+        self.name = re.compile(name)
+        self.tags = tags
+
+    def matches(self, s):
+        if self.name.search(s):
+            return True
+
+
+class File(object):
+    def __init__(self, name, tags):
+        self.name = name
+        self.tags = tags
+
+    def toDict(self):
+        return dict(name=self.name,
+                    tags=self.tags)
+
+
+class LogMatcher(object):
+    def __init__(self, server, port, config, success, log_url, host_vars):
+        self.client = gear.Client()
+        self.client.addServer(server, port)
+        self.hosts = host_vars
+        self.zuul = list(host_vars.values())[0]['zuul']
+        self.success = success
+        self.log_url = log_url
+        self.matchers = []
+        for f in config['files']:
+            self.matchers.append(FileMatcher(f['name'], f.get('tags', [])))
+
+    def findFiles(self, path):
+        results = []
+        for (dirpath, dirnames, filenames) in os.walk(path):
+            for filename in filenames:
+                fn = os.path.join(dirpath, filename)
+                partial_name = fn[len(path)+1:]
+                for matcher in self.matchers:
+                    if matcher.matches(partial_name):
+                        results.append(File(partial_name, matcher.tags))
+                        break
+        return results
+
+    def submitJobs(self, jobname, files):
+        self.client.waitForServer()
+        for f in files:
+            output = self.makeOutput(f)
+            job = gear.TextJob(jobname,
+                               json.dumps(output).encode('utf8'))
+            self.client.submitJob(job, background=True)
+
+    def makeOutput(self, file_object):
+        output = {}
+        output['retry'] = False
+        output['event'] = self.makeEvent(file_object)
+        output['source_url'] = output['event']['fields']['log_url']
+        return output
+
+    def makeEvent(self, file_object):
+        out_event = {}
+        out_event["fields"] = self.makeFields(file_object.name)
+        out_event["tags"] = [os.path.basename(file_object.name)] + \
+            file_object.tags
+        return out_event
+
+    def makeFields(self, filename):
+        hosts = self.hosts
+        zuul = self.zuul
+        fields = {}
+        fields["filename"] = filename
+        fields["build_name"] = zuul['job']
+        fields["build_status"] = self.success and 'SUCCESS' or 'FAILURE'
+        # TODO: this is too simplistic for zuul v3 multinode jobs
+        node = list(hosts.values())[0]
+        fields["build_node"] = node['nodepool']['label']
+        # TODO: should be build_executor, or removed completely
+        fields["build_master"] = zuul['executor']['hostname']
+
+        fields["project"] = zuul['project']
+        # The voting value is "1" for voting, "0" for non-voting
+        fields["voting"] = int(zuul['voting'])
+        # TODO(clarkb) can we do better without duplicated data here?
+        fields["build_uuid"] = zuul['build']
+        fields["build_short_uuid"] = fields["build_uuid"][:7]
+        # TODO: this should be build_pipeline
+        fields["build_queue"] = zuul['pipeline']
+        # TODO: this is not interesteding anymore
+        fields["build_ref"] = zuul['ref']
+        fields["build_branch"] = zuul.get('branch', 'UNKNOWN')
+        # TODO: remove
+        fields["build_zuul_url"] = "N/A"
+        if 'change' in zuul:
+            fields["build_change"] = zuul['change']
+            fields["build_patchset"] = zuul['patchset']
+        elif 'newrev' in zuul:
+            fields["build_newrev"] = zuul.get('newrev', 'UNKNOWN')
+        fields["node_provider"] = node['nodepool']['provider']
+        log_url = urllib.parse.urljoin(self.log_url, filename)
+        fields["log_url"] = log_url
+        return fields
+
+
+def main():
+    module = AnsibleModule(
+        argument_spec=dict(
+            gearman_server=dict(type='str'),
+            gearman_port=dict(type='int', default=4730),
+            #TODO: add ssl support
+            host_vars=dict(type='dict'),
+            path=dict(type='path'),
+            config=dict(type='dict'),
+            success=dict(type='bool'),
+            log_url=dict(type='str'),
+            job=dict(type='str'),
+        ),
+    )
+
+    p = module.params
+    results = dict(files=[])
+    try:
+        l = LogMatcher(p.get('gearman_server'),
+                       p.get('gearman_port'),
+                       p.get('config'),
+                       p.get('success'),
+                       p.get('log_url'),
+                       p.get('host_vars'))
+        files = l.findFiles(p['path'])
+        for f in files:
+            results['files'].append(f.toDict())
+        l.submitJobs(p['job'], files)
+        module.exit_json(**results)
+    except Exception:
+        e = get_exception()
+        module.fail_json(msg='Unknown error',
+                         details=repr(e),
+                         **results)
+
+
+if __name__ == '__main__':
+    main()
--- a/roles/submit-logstash-jobs/README.rst
+++ b/roles/submit-logstash-jobs/README.rst
@ -0,0 +1,44 @@
+Submit a log processing job to the logstash workers.
+
+This role examines all of the files in the log subdirectory of the job
+work dir and any matching filenames are submitted to the gearman queue
+for the logstash log processor, along with any tags configured for
+those filenames.
+
+**Role Variables**
+
+.. zuul:rolevar:: logstash_gearman_server
+   :default: logstash.openstack.org
+
+   The gearman server to use.
+
+.. zuul:rolevar:: logstash_processor_config
+   :type: dict
+
+   The default file configuration for the logstash parser.
+
+   This is a dictionary that contains a single entry:
+
+   .. zuul:rolevar:: files
+      :type: list
+
+      A list of files to search for in the ``work/logs/`` directory on
+      the executor.  Each file will be compared to the entries in this
+      list, and if it matches, a processing job will be submitted to
+      the logstash processing queue, along with the tags for the
+      matching entry.  Order is important: the first matcing is used.
+      This field is list of dictionaries, as follows:
+
+      .. zuul:rolevar:: name
+
+         The name of the file to process.  This is treated as an
+         unanchored regular expression.  To match the full path
+         (underneath ``work/logs``) start and end the string with
+         ``^`` and ``$`` respectively.
+
+      .. zuul:rolevar:: tags
+         :type: list
+
+         A list of strings indicating the logstash processing tags
+         associated with this file.  These may be used to indicate the
+         file format to the parser.
--- a/roles/submit-logstash-jobs/defaults/main.yaml
+++ b/roles/submit-logstash-jobs/defaults/main.yaml
@ -0,0 +1,80 @@
+logstash_gearman_server: logstash.openstack.org
+# For every file found in the logs directory (and its subdirs), the
+# module will attempt to match the filenames below.  If there is a
+# match, the file is submitted to the logstash processing queue, along
+# with the tags for that match.  The first match wins, so be sure to
+# list more specific names first.  The names are un-anchored regular
+# expressions (so if you need to match the root (i.e, the work/logs/
+# directory), be sure to anchor them with ^).
+logstash_processor_config:
+  files:
+    - name: console\.html
+      tags:
+        - console
+    - name: grenade\.sh\.txt
+      tags:
+        - console
+        - console.html
+    - name: devstacklog\.txt
+      tags:
+        - console
+        - console.html
+    - name: apache/keystone\.txt
+      tags:
+        - screen
+        - oslofmt
+    - name: apache/horizon_error\.txt
+      tags:
+        - apacheerror
+    # TODO(clarkb) Add swift proxy logs here.
+    - name: syslog\.txt
+      tags:
+        - syslog
+    - name: libvirtd\.txt
+      tags:
+        - libvirt
+    - name: tempest\.txt
+      tags:
+        - screen
+        - oslofmt
+    - name: javelin\.txt
+      tags:
+        - screen
+        - oslofmt
+    # Neutron index log files (files with messages from all test cases)
+    - name: dsvm-functional-index\.txt
+      tags:
+        - oslofmt
+    - name: dsvm-fullstack-index\.txt
+      tags:
+        - oslofmt
+    - name: screen-s-account\.txt
+      tags:
+        - screen
+        - apachecombined
+    - name: screen-s-container\.txt
+      tags:
+        - screen
+        - apachecombined
+    - name: screen-s-object\.txt
+      tags:
+        - screen
+        - apachecombined
+    # tripleo logs
+    - name: postci\.txt
+      tags:
+        - console
+        - postci
+    - name: var/log/extra/logstash\.txt
+      tags:
+        - console
+        - postci
+    # wildcard logs
+    - name: devstack-gate-.*\.txt
+      tags:
+        - console
+        - console.html
+    - name: screen-.*\.txt
+      tags:
+        - screen
+        - oslofmt
--- a/roles/submit-logstash-jobs/meta/main.yaml
+++ b/roles/submit-logstash-jobs/meta/main.yaml
@ -0,0 +1,2 @@
+dependencies:
+  - role: submit-log-processor-jobs
--- a/roles/submit-logstash-jobs/tasks/main.yaml
+++ b/roles/submit-logstash-jobs/tasks/main.yaml
@ -0,0 +1,9 @@
+- name: Submit logstash processing jobs to log processors
+  submit_log_processor_jobs:
+    gearman_server: "{{ logstash_gearman_server }}"
+    job: "push-log"
+    config: "{{ logstash_processor_config }}"
+    success: "{{ success }}"
+    host_vars: "{{ hostvars }}"
+    path: "{{ zuul.executor.log_root }}"
+    log_url: "{{ (lookup('file', zuul.executor.result_data_file) | from_json).get('zuul').get('log_url') }}"
--- a/roles/submit-subunit-jobs/README.rst
+++ b/roles/submit-subunit-jobs/README.rst
@ -0,0 +1,36 @@
+Submit a log processing job to the subunit workers.
+
+This role examines all of the files in the log subdirectory of the job
+work dir and any matching filenames are submitted to the gearman queue
+for the subunit log processor.
+
+**Role Variables**
+
+.. zuul:rolevar:: subunit_gearman_server
+   :default: logstash.openstack.org
+
+   The gearman server to use.
+
+.. zuul:rolevar:: subunit_processor_config
+   :type: dict
+
+   The default file configuration for the subunit parser.
+
+   This is a dictionary that contains a single entry:
+
+   .. zuul:rolevar:: files
+      :type: list
+
+      A list of files to search for in the ``work/logs/`` directory on
+      the executor.  Each file will be compared to the entries in this
+      list, and if it matches, a processing job will be submitted to
+      the subunit processing queue, along with the tags for the
+      matching entry.  Order is important: the first matcing is used.
+      This field is list of dictionaries, as follows:
+
+      .. zuul:rolevar:: name
+
+         The name of the file to process.  This is treated as an
+         unanchored regular expression.  To match the full path
+         (underneath ``work/logs``) start and end the string with
+         ``^`` and ``$`` respectively.
--- a/roles/submit-subunit-jobs/defaults/main.yaml
+++ b/roles/submit-subunit-jobs/defaults/main.yaml
@ -0,0 +1,12 @@
+subunit_gearman_server: logstash.openstack.org
+# For every file found in the logs directory (and its subdirs), the
+# module will attempt to match the filenames below.  If there is a
+# match, the file is submitted to the subunit processing queue, along
+# with the tags for that match.  The first match wins, so be sure to
+# list more specific names first.  The names are un-anchored regular
+# expressions (so if you need to match the root (i.e, the work/logs/
+# directory), be sure to anchor them with ^).
+subunit_processor_config:
+  files:
+    - name: testrepository.subunit
+    - name: karma.subunit
--- a/roles/submit-subunit-jobs/meta/main.yaml
+++ b/roles/submit-subunit-jobs/meta/main.yaml
@ -0,0 +1,2 @@
+dependencies:
+  - role: submit-log-processor-jobs
--- a/roles/submit-subunit-jobs/tasks/main.yaml
+++ b/roles/submit-subunit-jobs/tasks/main.yaml
@ -0,0 +1,10 @@
+- name: Submit subunit processing jobs to log processors
+  when: zuul.pipeline in ['gate', 'periodic', 'post']
+  submit_log_processor_jobs:
+    gearman_server: "{{ subunit_gearman_server }}"
+    job: "push-subunit"
+    config: "{{ subunit_processor_config }}"
+    success: "{{ success }}"
+    host_vars: "{{ hostvars }}"
+    path: "{{ zuul.executor.log_root }}"
+    log_url: "{{ (lookup('file', zuul.executor.result_data_file) | from_json).get('zuul').get('log_url') }}"