From 42e3402806acfb57191d84c2f90df4ba15e7df38 Mon Sep 17 00:00:00 2001 From: Sean Dague Date: Sun, 20 Oct 2013 07:34:20 -0400 Subject: [PATCH] refactor templates into query_builder as part of trying to simplify the core elasticRecheck, refactor the query creation into a separate set of query_builder routines. This takes away some of the duplication between the queries, and attempts to add documentation to the uses for each of them. add elasticRecheck fake pyelasticsearch testing build basic fixtures for unit testing that let us fake out the interaction to pyelasticsearch. This uses the json samples added for previous testing as the return results should an inbound query match one of the queries we know about. If the query is unknown to us, return an empty result set. Unit testing for both cases included going all the way from the top level Classifier class. Change-Id: I0d23b649274b31e8f281aaac588c4c6113a11a47 --- elastic_recheck/elasticRecheck.py | 68 ++----------- elastic_recheck/query_builder.py | 98 +++++++++++++++++++ elastic_recheck/tests/unit/__init__.py | 56 +++++++++++ elastic_recheck/tests/unit/queries.yaml | 83 ++++++++++++++++ .../tests/unit/samples/no-results.json | 14 +++ .../tests/unit/test_elastic_recheck.py | 34 +++++++ 6 files changed, 291 insertions(+), 62 deletions(-) create mode 100644 elastic_recheck/query_builder.py create mode 100644 elastic_recheck/tests/unit/queries.yaml create mode 100644 elastic_recheck/tests/unit/samples/no-results.json create mode 100644 elastic_recheck/tests/unit/test_elastic_recheck.py diff --git a/elastic_recheck/elasticRecheck.py b/elastic_recheck/elasticRecheck.py index f9a52f52..380a7457 100755 --- a/elastic_recheck/elasticRecheck.py +++ b/elastic_recheck/elasticRecheck.py @@ -20,13 +20,13 @@ import pyelasticsearch import urllib2 import ConfigParser -import copy import logging import os import sys import time import yaml +import elastic_recheck.query_builder as qb from elastic_recheck import results logging.basicConfig() @@ -103,54 +103,6 @@ class Classifier(): """ log = logging.getLogger("recheckwatchbot") ES_URL = "http://logstash.openstack.org/elasticsearch" - targeted_template = { - "sort": { - "@timestamp": {"order": "desc"} - }, - "query": { - "query_string": { - "query": '%s AND build_change:"%s" AND build_patchset:"%s"' - } - } - } - files_ready_template = { - "sort": { - "@timestamp": {"order": "desc"} - }, - "query": { - "query_string": { - "query": 'build_status:"FAILURE" AND build_change:"%s" AND build_patchset:"%s"' - } - }, - "facets": { - "tag": { - "terms": { - "field": "filename", - "size": 80 - } - } - } - } - ready_template = { - "sort": { - "@timestamp": {"order": "desc"} - }, - "query": { - "query_string": { - "query": 'filename:"console.html" AND (@message:"Finished: FAILURE" OR message:"Finished: FAILURE") AND build_change:"%s" AND build_patchset:"%s"' - } - } - } - general_template = { - "sort": { - "@timestamp": {"order": "desc"} - }, - "query": { - "query_string": { - "query": '%s' - } - } - } queries = None @@ -159,13 +111,8 @@ class Classifier(): self.queries = yaml.load(open(queries).read()) self.queries_filename = queries - def _apply_template(self, template, values): - query = copy.deepcopy(template) - query['query']['query_string']['query'] = query['query']['query_string']['query'] % values - return query - - def hits_by_query(self, query, size=100): - es_query = self._apply_template(self.general_template, query) + def hits_by_query(self, query, facet=None, size=100): + es_query = qb.generic(query, facet=facet) return self.es.search(es_query, size=size) def classify(self, change_number, patch_number, comment): @@ -183,8 +130,7 @@ class Classifier(): bug_matches = [] for x in self.queries: self.log.debug("Looking for bug: https://bugs.launchpad.net/bugs/%s" % x['bug']) - query = self._apply_template(self.targeted_template, (x['query'], - change_number, patch_number)) + query = qb.single_patch(x['query'], change_number, patch_number) results = self.es.search(query, size='10') if self._urls_match(comment, results): bug_matches.append(x['bug']) @@ -194,8 +140,7 @@ class Classifier(): """Wait till ElasticSearch is ready, but return False if timeout.""" NUMBER_OF_RETRIES = 20 SLEEP_TIME = 40 - query = self._apply_template(self.ready_template, (change_number, - patch_number)) + query = qb.result_ready(change_number, patch_number) for i in range(NUMBER_OF_RETRIES): try: results = self.es.search(query, size='10') @@ -212,8 +157,7 @@ class Classifier(): if i == NUMBER_OF_RETRIES - 1: return False self.log.debug("Found hits for change_number: %s, patch_number: %s" % (change_number, patch_number)) - query = self._apply_template(self.files_ready_template, (change_number, - patch_number)) + query = qb.files_ready(change_number, patch_number) for i in range(NUMBER_OF_RETRIES): results = self.es.search(query, size='80') files = [x['term'] for x in results.terms] diff --git a/elastic_recheck/query_builder.py b/elastic_recheck/query_builder.py new file mode 100644 index 00000000..1cde8cc1 --- /dev/null +++ b/elastic_recheck/query_builder.py @@ -0,0 +1,98 @@ +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Query builder for pyelasticsearch + +A serious of utility methods to build the kinds of queries that are needed +by elastic recheck to talk with elastic search. +""" + + +def generic(raw_query, facet=None): + """Base query builder + + Takes a raw_query string for elastic search. This is typically the same + content that you've typed into logstash to get to a unique result. + + Optionally supports a facet, which is required for certain opperations, + like ensuring that all the expected log files for a job have been + uploaded. + """ + + # they pyelasticsearch inputs are incredibly structured dictionaries + # so be it + query = { + "sort": { + "@timestamp": {"order": "desc"} + }, + "query": { + "query_string": { + "query": raw_query + } + } + } + # if we have a facet, the query gets expanded + if facet: + data = dict(field=facet, size=200) + # yes, elasticsearch is odd, and the way to do multiple facets + # is to specify the plural key value + if type(facet) == list: + data = dict(fields=facet, size=200) + + query['facets'] = { + "tag": { + "terms": data + } + } + + return query + + +def result_ready(review=None, patch=None): + """A query to determine if we have a failure for a particular patch. + + This is looking for a particular FAILURE line in the console log, which + lets us know that we've got results waiting that we need to process. + """ + return generic('filename:"console.html" AND ' + '(@message:"Finished: FAILURE" OR message:"Finished: FAILURE") ' + 'AND build_change:"%s" ' + 'AND build_patchset:"%s"' % + (review, patch)) + + +def files_ready(review, patch): + """A facetted query to ensure all the required files exist. + + When changes are uploaded to elastic search there is a delay in + getting all the required log fixes into the system. This query returns + facets for the failure on the filename, which ensures that we've + found all the files in the system. + """ + return generic('build_status:"FAILURE" ' + 'AND build_change:"%s" ' + 'AND build_patchset:"%s"' % (review, patch), + facet='filename') + + +def single_patch(query, review, patch): + """A query for a single patch (review + revision). + + This is used to narrow down a particular kind of failure found in a + particular patch iteration. + """ + return generic('%s ' + 'AND build_change:"%s" ' + 'AND build_patchset:"%s"' % + (query, review, patch)) diff --git a/elastic_recheck/tests/unit/__init__.py b/elastic_recheck/tests/unit/__init__.py index e69de29b..d8bfc47c 100644 --- a/elastic_recheck/tests/unit/__init__.py +++ b/elastic_recheck/tests/unit/__init__.py @@ -0,0 +1,56 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import fixtures +import json +import yaml + +import elastic_recheck.tests + + +def load_empty(): + with open("elastic_recheck/tests/unit/samples/no-results.json") as f: + return json.load(f) + + +def load_by_bug(bug): + with open("elastic_recheck/tests/unit/samples/bug-%s.json" % bug) as f: + return json.load(f) + + +class FakeES(object): + """A fake elastic search interface. + + This provides a stub of the elastic search interface, so we can return + fake results based on the samples we've already collected to use for + other unit tests. It does this by buiding a reverse mapping from our + queries.yaml file, and grabbing the results we'd find for known bugs. + """ + def __init__(self, url): + self._yaml = yaml.load(open('elastic_recheck/tests/unit/queries.yaml').read()) + self._queries = {} + for item in self._yaml: + self._queries[item['query'].rstrip()] = item['bug'] + + def search(self, query, **kwargs): + qstring = query['query']['query_string']['query'] + if qstring in self._queries: + return load_by_bug(self._queries[qstring]) + return load_empty() + + +class UnitTestCase(elastic_recheck.tests.TestCase): + def setUp(self): + super(UnitTestCase, self).setUp() + + self.useFixture(fixtures.MonkeyPatch('pyelasticsearch.ElasticSearch', + FakeES)) diff --git a/elastic_recheck/tests/unit/queries.yaml b/elastic_recheck/tests/unit/queries.yaml new file mode 100644 index 00000000..df05921f --- /dev/null +++ b/elastic_recheck/tests/unit/queries.yaml @@ -0,0 +1,83 @@ +- bug: 1226337 + query: > + ( @message:"NovaException: iSCSI device not found at" + OR message:"NovaException: iSCSI device not found at" ) + AND filename:"logs/screen-n-cpu.txt" +- bug: 1211915 + query: > + ( @message:"ConnectionFailed: Connection to neutron failed: Maximum attempts reached" + OR message:"ConnectionFailed: Connection to neutron failed: Maximum attempts reached" ) + AND filename:"console.html" +- bug: 1217734 + query: > + ( @message:"CalledProcessError: Command 'openssl' returned non-zero exit status" + OR message:"CalledProcessError: Command 'openssl' returned non-zero exit status" ) +- bug: 1191960 + query: > + (( @message:"Exit code: 5" + AND @message:" sudo cinder-rootwrap /etc/cinder/rootwrap.conf lvremove -f" ) + OR ( message:"Exit code: 5" + AND message:" sudo cinder-rootwrap /etc/cinder/rootwrap.conf lvremove -f" )) + AND filename:"logs/screen-c-vol.txt" +- bug: 1225664 + query: > + ( @message:"Details: Time Limit Exceeded! (400s)while waiting for active, but we got killed." + OR message:"Details: Time Limit Exceeded! (400s)while waiting for active, but we got killed." ) + AND filename:"console.html" +- bug: 1218391 + query: > + ( @message:"Cannot 'createImage'" + OR message:"Cannot 'createImage'" ) + AND filename:"console.html" +- bug: 1229475 + query: > + ( @message:"Second simultaneous read on fileno" + OR message:"Second simultaneous read on fileno" ) +- bug: 1230407 + query: > + ( @message:"Lock wait timeout exceeded; try restarting transaction" + OR message:"Lock wait timeout exceeded; try restarting transaction" ) + AND filename:"logs/screen-q-svc.txt" +- bug: 1224001 + query: > + ( @message:"tempest.scenario.test_network_basic_ops AssertionError: Timed out waiting for" + OR message:"tempest.scenario.test_network_basic_ops AssertionError: Timed out waiting for" ) + AND filename:"console.html" +- bug: 1235486 + query: > + ( @message:"update or delete on table \"networks\" violates foreign key constraint" + OR message:"update or delete on table \"networks\" violates foreign key constraint" ) + AND filename:"logs/screen-q-svc.txt" +- bug: 1232748 + query: > + ( @message:"OperationalError: (OperationalError) could not translate host name \"localhost\" to address" + OR message:"OperationalError: (OperationalError) could not translate host name \"localhost\" to address" ) + AND filename:"logs/screen-n-api.txt" +- bug: 1235435 + query: > + (( @message:"One or more ports have an IP allocation from this subnet" + AND @message:" SubnetInUse: Unable to complete operation on subnet" ) + OR ( message:"One or more ports have an IP allocation from this subnet" + AND message:" SubnetInUse: Unable to complete operation on subnet" )) + AND filename:"logs/screen-q-svc.txt" +- bug: 1235437 + query: > + ( @message:"failed to reach ACTIVE status within the required time (400 s). Current status: BUILD" + OR message:"failed to reach ACTIVE status within the required time (400 s). Current status: BUILD" ) + AND filename:"console.html" +- bug: 1239637 + query: > + ( @message:"DBError: (IntegrityError) null value in column \"network_id\" violates not-null constraint" + OR message:"DBError: (IntegrityError) null value in column \"network_id\" violates not-null constraint" ) + AND filename:"logs/screen-q-svc.txt" +- bug: 1239856 + query: > + (( @message:"tempest/services" AND @message:"/images_client.py" AND @message:"wait_for_image_status" ) + OR (message:"tempest/services" AND message:"/images_client.py" AND message:"wait_for_image_status" )) + AND filename:"console.html" +- bug: 1240256 + query: > + ( @message:" 503" + OR message:" 503" ) + AND filename:"logs/syslog.txt" + AND syslog_program:"proxy-server" diff --git a/elastic_recheck/tests/unit/samples/no-results.json b/elastic_recheck/tests/unit/samples/no-results.json new file mode 100644 index 00000000..e287d96e --- /dev/null +++ b/elastic_recheck/tests/unit/samples/no-results.json @@ -0,0 +1,14 @@ +{ + "hits": { + "hits": [], + "total": 0, + "max_score": null + }, + "_shards": { + "successful": 75, + "failed": 0, + "total": 75 + }, + "took": 53, + "timed_out": false +} \ No newline at end of file diff --git a/elastic_recheck/tests/unit/test_elastic_recheck.py b/elastic_recheck/tests/unit/test_elastic_recheck.py new file mode 100644 index 00000000..589647af --- /dev/null +++ b/elastic_recheck/tests/unit/test_elastic_recheck.py @@ -0,0 +1,34 @@ +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from elastic_recheck import elasticRecheck as er +from elastic_recheck.tests import unit + + +class TestElasticRecheck(unit.UnitTestCase): + def test_hits_by_query_no_results(self): + c = er.Classifier("queries.yaml") + results = c.hits_by_query("this should find no bugs") + self.assertEqual(len(results), 0) + self.assertEqual(results.took, 53) + self.assertEqual(results.timed_out, False) + + def test_hits_by_query(self): + c = er.Classifier("queries.yaml") + q = ('''( @message:"Cannot 'createImage'" OR message:"''' + '''Cannot 'createImage'" ) AND filename:"console.html"''') + results = c.hits_by_query(q) + self.assertEqual(len(results), 20) + self.assertEqual(results.took, 46) + self.assertEqual(results.timed_out, False)