Add log processing roles

These roles replace the jenkins-log-client portion of the
logstash/subunit queue processing system.  Much of the code is
derived from the jenkins-log-client.

Rather than subscribing to ZMQ events emitted from jenkins (and
later zuul v2.5), these roles are intended to be run in a late base
post playbook (right after log uploading).  They examine the logs
directory which contains the files that were just uploaded and any
matching files have jobs submitted to the gearman queue.  Currently
the module attempts to maintain compatability as much as possible,
but eventually we should represent some of the data in a more v3-like
manner, as well as consider adding additional data.  Some suggestions
are noted in comments.

Change-Id: I10ea613d9278465b90d891371d2626b9e99f8f31
This commit is contained in:
James E. Blair 2017-09-09 15:59:08 -07:00
parent 4b4f0ed9e2
commit 47ae93c061
10 changed files with 371 additions and 0 deletions

View File

@ -0,0 +1,6 @@
A module to submit a log processing job.
This role is a container for an Ansible module which processes a log
directory and submits jobs to a log processing gearman queue. The
role itself performs no actions, and is intended only to be used by
other roles as a dependency to supply the module.

View File

@ -0,0 +1,170 @@
# Copyright 2013 Hewlett-Packard Development Company, L.P.
# Copyright (C) 2017 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import re
import logging
from ansible.module_utils.six.moves import urllib
from ansible.module_utils.basic import AnsibleModule
import gear
import yaml
class FileMatcher(object):
def __init__(self, name, tags):
self._name = name
self.name = re.compile(name)
self.tags = tags
def matches(self, s):
if self.name.search(s):
return True
class File(object):
def __init__(self, name, tags):
self.name = name
self.tags = tags
def toDict(self):
return dict(name=self.name,
tags=self.tags)
class LogMatcher(object):
def __init__(self, server, port, config, success, log_url, host_vars):
self.client = gear.Client()
self.client.addServer(server, port)
self.hosts = host_vars
self.zuul = list(host_vars.values())[0]['zuul']
self.success = success
self.log_url = log_url
self.matchers = []
for f in config['files']:
self.matchers.append(FileMatcher(f['name'], f.get('tags', [])))
def findFiles(self, path):
results = []
for (dirpath, dirnames, filenames) in os.walk(path):
for filename in filenames:
fn = os.path.join(dirpath, filename)
partial_name = fn[len(path)+1:]
for matcher in self.matchers:
if matcher.matches(partial_name):
results.append(File(partial_name, matcher.tags))
break
return results
def submitJobs(self, jobname, files):
self.client.waitForServer()
for f in files:
output = self.makeOutput(f)
job = gear.TextJob(jobname,
json.dumps(output).encode('utf8'))
self.client.submitJob(job, background=True)
def makeOutput(self, file_object):
output = {}
output['retry'] = False
output['event'] = self.makeEvent(file_object)
output['source_url'] = output['event']['fields']['log_url']
return output
def makeEvent(self, file_object):
out_event = {}
out_event["fields"] = self.makeFields(file_object.name)
out_event["tags"] = [os.path.basename(file_object.name)] + \
file_object.tags
return out_event
def makeFields(self, filename):
hosts = self.hosts
zuul = self.zuul
fields = {}
fields["filename"] = filename
fields["build_name"] = zuul['job']
fields["build_status"] = self.success and 'SUCCESS' or 'FAILURE'
# TODO: this is too simplistic for zuul v3 multinode jobs
node = list(hosts.values())[0]
fields["build_node"] = node['nodepool']['label']
# TODO: should be build_executor, or removed completely
fields["build_master"] = zuul['executor']['hostname']
fields["project"] = zuul['project']
# The voting value is "1" for voting, "0" for non-voting
fields["voting"] = int(zuul['voting'])
# TODO(clarkb) can we do better without duplicated data here?
fields["build_uuid"] = zuul['build']
fields["build_short_uuid"] = fields["build_uuid"][:7]
# TODO: this should be build_pipeline
fields["build_queue"] = zuul['pipeline']
# TODO: this is not interesteding anymore
fields["build_ref"] = zuul['ref']
fields["build_branch"] = zuul.get('branch', 'UNKNOWN')
# TODO: remove
fields["build_zuul_url"] = "N/A"
if 'change' in zuul:
fields["build_change"] = zuul['change']
fields["build_patchset"] = zuul['patchset']
elif 'newrev' in zuul:
fields["build_newrev"] = zuul.get('newrev', 'UNKNOWN')
fields["node_provider"] = node['nodepool']['provider']
log_url = urllib.parse.urljoin(self.log_url, filename)
fields["log_url"] = log_url
return fields
def main():
module = AnsibleModule(
argument_spec=dict(
gearman_server=dict(type='str'),
gearman_port=dict(type='int', default=4730),
#TODO: add ssl support
host_vars=dict(type='dict'),
path=dict(type='path'),
config=dict(type='dict'),
success=dict(type='bool'),
log_url=dict(type='str'),
job=dict(type='str'),
),
)
p = module.params
results = dict(files=[])
try:
l = LogMatcher(p.get('gearman_server'),
p.get('gearman_port'),
p.get('config'),
p.get('success'),
p.get('log_url'),
p.get('host_vars'))
files = l.findFiles(p['path'])
for f in files:
results['files'].append(f.toDict())
l.submitJobs(p['job'], files)
module.exit_json(**results)
except Exception:
e = get_exception()
module.fail_json(msg='Unknown error',
details=repr(e),
**results)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,44 @@
Submit a log processing job to the logstash workers.
This role examines all of the files in the log subdirectory of the job
work dir and any matching filenames are submitted to the gearman queue
for the logstash log processor, along with any tags configured for
those filenames.
**Role Variables**
.. zuul:rolevar:: logstash_gearman_server
:default: logstash.openstack.org
The gearman server to use.
.. zuul:rolevar:: logstash_processor_config
:type: dict
The default file configuration for the logstash parser.
This is a dictionary that contains a single entry:
.. zuul:rolevar:: files
:type: list
A list of files to search for in the ``work/logs/`` directory on
the executor. Each file will be compared to the entries in this
list, and if it matches, a processing job will be submitted to
the logstash processing queue, along with the tags for the
matching entry. Order is important: the first matcing is used.
This field is list of dictionaries, as follows:
.. zuul:rolevar:: name
The name of the file to process. This is treated as an
unanchored regular expression. To match the full path
(underneath ``work/logs``) start and end the string with
``^`` and ``$`` respectively.
.. zuul:rolevar:: tags
:type: list
A list of strings indicating the logstash processing tags
associated with this file. These may be used to indicate the
file format to the parser.

View File

@ -0,0 +1,80 @@
logstash_gearman_server: logstash.openstack.org
# For every file found in the logs directory (and its subdirs), the
# module will attempt to match the filenames below. If there is a
# match, the file is submitted to the logstash processing queue, along
# with the tags for that match. The first match wins, so be sure to
# list more specific names first. The names are un-anchored regular
# expressions (so if you need to match the root (i.e, the work/logs/
# directory), be sure to anchor them with ^).
logstash_processor_config:
files:
- name: console\.html
tags:
- console
- name: grenade\.sh\.txt
tags:
- console
- console.html
- name: devstacklog\.txt
tags:
- console
- console.html
- name: apache/keystone\.txt
tags:
- screen
- oslofmt
- name: apache/horizon_error\.txt
tags:
- apacheerror
# TODO(clarkb) Add swift proxy logs here.
- name: syslog\.txt
tags:
- syslog
- name: libvirtd\.txt
tags:
- libvirt
- name: tempest\.txt
tags:
- screen
- oslofmt
- name: javelin\.txt
tags:
- screen
- oslofmt
# Neutron index log files (files with messages from all test cases)
- name: dsvm-functional-index\.txt
tags:
- oslofmt
- name: dsvm-fullstack-index\.txt
tags:
- oslofmt
- name: screen-s-account\.txt
tags:
- screen
- apachecombined
- name: screen-s-container\.txt
tags:
- screen
- apachecombined
- name: screen-s-object\.txt
tags:
- screen
- apachecombined
# tripleo logs
- name: postci\.txt
tags:
- console
- postci
- name: var/log/extra/logstash\.txt
tags:
- console
- postci
# wildcard logs
- name: devstack-gate-.*\.txt
tags:
- console
- console.html
- name: screen-.*\.txt
tags:
- screen
- oslofmt

View File

@ -0,0 +1,2 @@
dependencies:
- role: submit-log-processor-jobs

View File

@ -0,0 +1,9 @@
- name: Submit logstash processing jobs to log processors
submit_log_processor_jobs:
gearman_server: "{{ logstash_gearman_server }}"
job: "push-log"
config: "{{ logstash_processor_config }}"
success: "{{ success }}"
host_vars: "{{ hostvars }}"
path: "{{ zuul.executor.log_root }}"
log_url: "{{ (lookup('file', zuul.executor.result_data_file) | from_json).get('zuul').get('log_url') }}"

View File

@ -0,0 +1,36 @@
Submit a log processing job to the subunit workers.
This role examines all of the files in the log subdirectory of the job
work dir and any matching filenames are submitted to the gearman queue
for the subunit log processor.
**Role Variables**
.. zuul:rolevar:: subunit_gearman_server
:default: logstash.openstack.org
The gearman server to use.
.. zuul:rolevar:: subunit_processor_config
:type: dict
The default file configuration for the subunit parser.
This is a dictionary that contains a single entry:
.. zuul:rolevar:: files
:type: list
A list of files to search for in the ``work/logs/`` directory on
the executor. Each file will be compared to the entries in this
list, and if it matches, a processing job will be submitted to
the subunit processing queue, along with the tags for the
matching entry. Order is important: the first matcing is used.
This field is list of dictionaries, as follows:
.. zuul:rolevar:: name
The name of the file to process. This is treated as an
unanchored regular expression. To match the full path
(underneath ``work/logs``) start and end the string with
``^`` and ``$`` respectively.

View File

@ -0,0 +1,12 @@
subunit_gearman_server: logstash.openstack.org
# For every file found in the logs directory (and its subdirs), the
# module will attempt to match the filenames below. If there is a
# match, the file is submitted to the subunit processing queue, along
# with the tags for that match. The first match wins, so be sure to
# list more specific names first. The names are un-anchored regular
# expressions (so if you need to match the root (i.e, the work/logs/
# directory), be sure to anchor them with ^).
subunit_processor_config:
files:
- name: testrepository.subunit
- name: karma.subunit

View File

@ -0,0 +1,2 @@
dependencies:
- role: submit-log-processor-jobs

View File

@ -0,0 +1,10 @@
- name: Submit subunit processing jobs to log processors
when: zuul.pipeline in ['gate', 'periodic', 'post']
submit_log_processor_jobs:
gearman_server: "{{ subunit_gearman_server }}"
job: "push-subunit"
config: "{{ subunit_processor_config }}"
success: "{{ success }}"
host_vars: "{{ hostvars }}"
path: "{{ zuul.executor.log_root }}"
log_url: "{{ (lookup('file', zuul.executor.result_data_file) | from_json).get('zuul').get('log_url') }}"