openstack-helm-images/nagios/plugins/query_elasticsearch.py
Gage Hugo e9b2ff0c74 Remove OSH Authors copyright
The current copyright refers to a non-existent group
"openstack helm authors" with often out-of-date references that
are confusing when adding a new file to the repo.

This change removes all references to this copyright by the
non-existent group and any blank lines underneath.

Change-Id: Ic78d29883364378cc14b11402f16d99dcec1fc96
2020-05-07 02:11:23 +00:00

243 lines
9.7 KiB
Python
Executable File

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Query elasticsearch using a combination of simple query pattern,
field matches, and/or query clause and evaluate the results against
the alert threshold"""
from __future__ import print_function
import sys
import argparse
import datetime
import json
from pprint import pprint
import requests
from nagiosutil import NagiosUtil
def check_range(value):
"""validate range"""
try:
minutes = int(value)
except ValueError:
raise argparse.ArgumentTypeError('%s is an invalid data type, Integer value expected.' % value)
if minutes < 1 or minutes > 1440:
raise argparse.ArgumentTypeError('%s is an invalid search time range.'
' Valid values are between 1 and 1440'
'(1 day) minutes.' % value)
return minutes
def check_threshold(value):
"""validate threshold"""
try:
threshold = int(value)
except ValueError:
raise argparse.ArgumentTypeError('%s is an invalid data type, Integer value expected.' % value)
if threshold < 1:
raise argparse.ArgumentTypeError('%s is an invalid threshold'
' Valid threshold is > 0' % value)
return threshold
def check_match(value):
"""validate match clauses"""
if not value: # tolerate empty match clause
return
try:
match_fields = dict(item.split(':') for item in value.split(','))
return match_fields
except BaseException:
raise argparse.ArgumentTypeError('%s is an invalid match clause(s) list.'
' Valid format is a comma separated'
' list of field name and value pairs.'
' ex. field1:value1,field2:value2,...'
% value)
def setup_argparse(parser):
"""setup argparse parser with arguments and help texts"""
range_help = ('relative time range between now and x minutes ago.'
' must be between 1 <= x <=1440 minutes, default is 5')
endpoint_help = 'elasticsearch API service endpoint url'
simple_query_fields_help = ('fields to perform the parsed simple query'
' against')
match_help = ('elasticsearch match clause(s), in the format of'
' comma separated list of field name and value pairs.'
' ex. field1:value1,field2:value2,...')
critical_threshold_help = ('Status is Critical if the'
' number of hits >= the threshold')
parser.add_argument('endpoint', help=endpoint_help)
parser.add_argument('index', help='elasticsearch index')
parser.add_argument('index_type', help='elasticsearch index type')
parser.add_argument('ok_msg', help='OK status display message')
parser.add_argument('critical_msg', help='Critical status display message')
parser.add_argument('critical_threshold', type=check_threshold,
help=critical_threshold_help)
parser.add_argument('--query_file', help='elasticsearch query file name')
parser.add_argument('--query_clause', help='elasticsearch query clause name')
parser.add_argument('--simple_query', help='elasticsearch simple query str')
parser.add_argument('--simple_query_fields', help=simple_query_fields_help)
parser.add_argument('--match', type=check_match, help=match_help)
parser.add_argument('--range', type=check_range, default=5,
help=range_help)
parser.add_argument( '--timeout', metavar='timeout', type=int, default=30,
required=False, help='Number of seconds to wait for response.')
parser.add_argument('--usr')
parser.add_argument('--pwd')
parser.add_argument('--debug', action='store_true')
def get_index_name(args, lt_time, gte_time):
"""build the index name(s) based on the inputs and current time"""
log_lt = args.index + '-' + lt_time.strftime('%Y.%m.%d')
log_gte = args.index + '-' + gte_time.strftime('%Y.%m.%d')
if log_lt == log_gte:
es_index = log_lt
else:
es_index = log_lt + ',' + log_gte
return es_index
def evaluate_results(response, args):
"""evaluate the results of the query against the threshold to
determine the nagios service status"""
if (not response or not hasattr(response, 'status_code')
or response.status_code < 200 or response.status_code >= 400
or not response.json()):
NagiosUtil.service_unknown('Unexpected results found. ' + response.text)
elif (not response.json()['hits']
or int(response.json()['hits']['total']['value']) < 0):
NagiosUtil.service_unknown('Unexpected results found. ' + str(response.json()))
if args.debug:
pprint(response.json())
results = response.json()
hits = int(results['hits']['total']['value'])
crit_message = ('Found %s >= %s(threshold) occurrences within the last %s minute(s). %s')
message = ('Found %s >= %s(threshold) occurrences within the last %s minute(s).')
ok_message = ('Found %s [threshold: %s] occurrences within the last %s minute(s).')
if hits > args.critical_threshold:
if args.simple_query_fields:
allmsgs = ', '.join([m["_source"][args.simple_query_fields] for m in results['hits']['hits']])
NagiosUtil.service_critical(crit_message % (str(hits), args.critical_threshold, args.range, allmsgs))
else:
NagiosUtil.service_critical(message % (str(hits), args.critical_threshold, args.range))
else:
NagiosUtil.service_ok(ok_message % (str(hits), args.critical_threshold, args.range))
def main():
"""Query elasticsearch using a combination of simple query pattern,
field matches, and/or query clause, then evaluate the results against
the alert threshold, and finally return a status to nagios."""
desc = ('Elasticsearch query using a combination of simple query pattern,'
' field matches, and/or query clause. Evaluate the results'
' against the alert threshold, and return a status to'
' nagios.'
' ex. \"query_elasticsearch.py endpoint index index_type'
' ok_msg critical_msg critical_threshold'
' --query_file query_file_name'
' --simple_query simple_query'
' --simple_query_fields fields'
' --match f1:v1,f2:v2 --range 5 --debug\"')
parser = argparse.ArgumentParser(description=desc)
setup_argparse(parser)
args = parser.parse_args()
lt_time = datetime.datetime.utcnow()
gte_time = lt_time - datetime.timedelta(minutes=(int(args.range)))
data = {
"inline": {
"query": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"lt": "{{lt_timestamp}}",
"gte": "{{gte_timestamp}}"
}
}
}
]
}
},
"size": 10
}
}
simple_query_clause = {
"simple_query_string": {
"fields": ["{{fields}}"],
"query": "{{query}}"
}
}
if args.query_file and args.query_clause:
with open(args.query_file, 'r') as queryfile:
clause = json.loads(queryfile.read())[args.query_clause]
data['inline']['query']['bool']['must'].append(clause)
if args.match:
for key in args.match:
element = {'match': {key: args.match[key]}}
data['inline']['query']['bool']['must'].append(element)
params = {}
params['lt_timestamp'] = lt_time.isoformat()
params['gte_timestamp'] = gte_time.isoformat()
if args.simple_query and args.simple_query_fields:
data['inline']['query']['bool']['must'].append(simple_query_clause)
params['fields'] = args.simple_query_fields
params['query'] = args.simple_query
data['params'] = params
url = (args.endpoint + '/' + get_index_name(args, lt_time, gte_time) + '/'
+ args.index_type + '/' + '_search/template/?ignore_unavailable=true')
if args.debug:
print('query url:\n' + url)
print('query data:')
pprint(data)
try:
if args.usr and args.pwd:
response = requests.post(url, data=json.dumps(data), timeout=args.timeout,
headers={"Content-Type": "application/json"},
auth=(args.usr, args.pwd))
else:
response = requests.post(url, data=json.dumps(data), timeout=args.timeout,
headers={"Content-Type": "application/json"})
except requests.exceptions.Timeout as con_ex:
NagiosUtil.service_unknown('Elasticsearch connection timed out ' + str(con_ex))
except requests.exceptions.RequestException as req_ex:
NagiosUtil.service_unknown('Unexpected Error Occurred. ' + str(req_ex))
evaluate_results(response, args)
if __name__ == '__main__':
sys.exit(main())