
The above exception [1] occurs for example [2] when elasticsearch returns data with more than one zuul_executor as a list. This is what l#58 is able to sort [(12.5, '5'), (12.5, '4'), (12.5, '3'), (25.0, '6'), (18.75, '2'), (6.25, '13'), (12.5, '1')] This is when it throws exception [(8.13953488372093, 'ze06.opendev.org'), (12.790697674418604, 'ze10.opendev.org'), (5.813953488372093, 'ze05.opendev.org'), (8.13953488372093, 'ze01.opendev.org'), (16.27906976744186, 'ze04.opendev.org'), (4.651162790697675, 'ze03.opendev.org'), (3.488372093023256, 'ze02.opendev.org'), (4.651162790697675, 'ze08.opendev.org'), (12.790697674418604, 'ze09.opendev.org'), (20.930232558139537, 'ze12.opendev.org'), (1.1627906976744187, 'ze11.opendev.org'), (1.1627906976744187, ['ze12.opendev.org', 'ze11.opendev.org'])] [1] https://0050cb9fd8118437e3e0-3c2a18acb5109e625907972e3aa6a592.ssl.cf5.rackcdn.com/790065/7/check/openstack-tox-py38/4968a73/tox/test_results/1449136.yaml.log [2] https://review.opendev.org/c/openstack/tripleo-ci-health-queries/+/787569/6/output/elastic-recheck/1449136.yaml Change-Id: Ie559d5764d9f68420119a7f9608389f0745a9c02
128 lines
4.1 KiB
Python
Executable File
128 lines
4.1 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import argparse
|
|
import itertools
|
|
import json
|
|
import yaml
|
|
|
|
import elastic_recheck.config as er_conf
|
|
import elastic_recheck.log as logging
|
|
import elastic_recheck.results as er_results
|
|
|
|
LOG = logging.getLogger('erquery')
|
|
|
|
DEFAULT_NUMBER_OF_DAYS = 10
|
|
DEFAULT_MAX_QUANTITY = 5
|
|
IGNORED_ATTRIBUTES = [
|
|
'build_master',
|
|
'build_patchset',
|
|
'build_ref',
|
|
'build_short_uuid',
|
|
'build_uuid',
|
|
'error_pr',
|
|
'host',
|
|
'received_at',
|
|
'type',
|
|
]
|
|
|
|
|
|
def analyze_attributes(attributes):
|
|
analysis = {}
|
|
for attribute, values in attributes.items():
|
|
if attribute[0] == '@' or attribute == 'message':
|
|
# skip meta attributes and raw messages
|
|
continue
|
|
|
|
analysis[attribute] = []
|
|
|
|
total_hits = sum(values.values())
|
|
for value_hash, hits in values.items():
|
|
value = json.loads(value_hash)
|
|
analysis[attribute].append((100.0 * hits / total_hits, value))
|
|
|
|
# sort by hit percentage descending, and then by value ascending
|
|
analysis[attribute] = sorted(
|
|
analysis[attribute],
|
|
key=lambda x: (1 - x[0], str(x[1])))
|
|
|
|
return analysis
|
|
|
|
|
|
def query(query_file_name, config=None, days=DEFAULT_NUMBER_OF_DAYS,
|
|
quantity=DEFAULT_MAX_QUANTITY, verbose=False,):
|
|
_config = config or er_conf.Config()
|
|
es = er_results.SearchEngine(url=_config.es_url,
|
|
indexfmt=_config.es_index_format)
|
|
|
|
with open(query_file_name) as f:
|
|
query_file = yaml.safe_load(f.read())
|
|
query = query_file['query']
|
|
|
|
r = es.search(query, days=days)
|
|
print('total hits: %s' % r.hits['total'])
|
|
|
|
attributes = {}
|
|
|
|
for hit in r.hits['hits']:
|
|
for key, value in hit['_source'].items():
|
|
value_hash = json.dumps(value)
|
|
attributes.setdefault(key, {}).setdefault(value_hash, 0)
|
|
attributes[key][value_hash] += 1
|
|
|
|
analysis = analyze_attributes(attributes)
|
|
for attribute, results in sorted(analysis.items()):
|
|
if not verbose and attribute in IGNORED_ATTRIBUTES:
|
|
# skip less-than-useful attributes to reduce noise in the report
|
|
continue
|
|
|
|
print(attribute)
|
|
for percentage, value in itertools.islice(results, quantity):
|
|
if isinstance(value, list):
|
|
value = ' '.join(str(x) for x in value)
|
|
print(' %d%% %s' % (percentage, value))
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Execute elastic-recheck query files and analyze the '
|
|
'results.')
|
|
parser.add_argument(
|
|
'query_file', type=argparse.FileType('r'),
|
|
help='Path to an elastic-recheck YAML query file.')
|
|
parser.add_argument(
|
|
'--quantity', '-q', type=int, default=DEFAULT_MAX_QUANTITY,
|
|
help='Maximum quantity of values to show for each attribute.')
|
|
parser.add_argument(
|
|
'--days', '-d', type=int, default=DEFAULT_NUMBER_OF_DAYS,
|
|
help='Timespan to query, in days.')
|
|
parser.add_argument(
|
|
'--verbose', '-v', action='store_true', default=False,
|
|
help='Report on additional query metadata.')
|
|
parser.add_argument('-c', '--conf', help="Elastic Recheck Configuration "
|
|
"file to use for data_source options such as "
|
|
"elastic search url, logstash url, and database uri.")
|
|
args = parser.parse_args()
|
|
|
|
config = er_conf.Config(config_file=args.conf)
|
|
|
|
query(args.query_file.name, config=config, days=args.days,
|
|
quantity=args.quantity, verbose=args.verbose)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|