Helena McGough 4180fb5a76 Updated _get_alarm_state function
- Get the state of an alarm based on the severity of the collectd
   notification that is sent and not the message.
 - Added a bug fix reno for this.
 - Reduced the probability that the state will return "insufficient
   data".

Change-Id: Ic993337aa49aed1d189708f5d8cd3935e6d1bbe1
Closes-bug: #1681392
Closes-bug: #1672301
2017-06-26 17:46:37 +00:00

299 lines
11 KiB
Python

# -*- coding: utf-8 -*-
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Aodh collectd plugin implementation."""
from __future__ import division
from __future__ import unicode_literals
import collectd_ceilometer
from collectd_ceilometer.common.keystone_light import ClientV3
from collectd_ceilometer.common.keystone_light import KeystoneException
from collectd_ceilometer.common.settings import Config
import json
import logging
import requests
from requests.exceptions import RequestException
import six
import threading
LOGGER = logging.getLogger(__name__)
ROOT_LOGGER = logging.getLogger(collectd_ceilometer.__name__)
# HTTP status codes
HTTP_CREATED = 201
HTTP_UNAUTHORIZED = 401
HTTP_NOT_FOUND = 404
class Sender(object):
"""Sends the JSON serialized data to Aodh."""
def __init__(self):
"""Create the Sender instance.
The cofiguration must be initialized before the object is created.
"""
self._url_base = None
self._keystone = None
self._auth_token = None
self._auth_lock = threading.Lock()
self._failed_auth = False
self._alarm_ids = {}
def _authenticate(self):
"""Authenticate and renew the authentication token."""
# if auth_token is available, just return it
if self._auth_token is not None:
return self._auth_token
# aquire the authentication lock
with self._auth_lock:
# re-check the auth_token as another thread could set it
if self._auth_token is not None:
return self._auth_token
LOGGER.debug('Authenticating request')
# pylint: disable=broad-except
try:
# create a keystone client if it doesn't exist
if self._keystone is None:
cfg = Config.instance()
self._keystone = ClientV3(
auth_url=cfg.OS_AUTH_URL,
username=cfg.OS_USERNAME,
password=cfg.OS_PASSWORD,
tenant_name=cfg.OS_TENANT_NAME
)
# store the authentication token
self._auth_token = self._keystone.auth_token
# get the uri of service endpoint
endpoint = self._get_endpoint("aodh")
self._url_base = "{}/v2/alarms/%s/state".format(endpoint)
LOGGER.info('Authenticating request - success')
self._failed_auth = False
except KeystoneException as exc:
log_level = logging.DEBUG
if not self._failed_auth:
log_level = logging.ERROR
LOGGER.error(
'Suspending error logs until successful auth'
)
LOGGER.log(log_level, 'Authentication error: %s',
six.text_type(exc),
exc_info=0)
if exc.response:
LOGGER.debug('Response: %s', exc.response)
self._auth_token = None
self._failed_auth = True
return self._auth_token
def send(self, metername, severity, resource_id):
"""Send the payload to Aodh."""
# get the auth_token
auth_token = self._authenticate()
LOGGER.info('Auth_token: %s',
auth_token,
)
# if auth_token is not set, there is nothing to do
if auth_token is None:
LOGGER.debug('Unable to send data. Not authenticated')
return
if self._url_base is None:
LOGGER.debug(
'Unable to send data. Missing endpoint from ident server')
return
# Create alarm name
alarm_name = self._get_alarm_name(metername, resource_id)
# Update or create this alarm
result = self._update_or_create_alarm(alarm_name, auth_token,
severity, metername)
if result is None:
return
LOGGER.info('Result: %s %s',
six.text_type(result.status_code),
result.text)
# if the request failed due to an auth error
if result.status_code == HTTP_UNAUTHORIZED:
# reset the auth token in order to force the subsequent
# _authenticate() call to renew it
# Here, it can happen that the token is reset right after
# another thread has finished the authentication and thus
# the authentication may be performed twice
self._auth_token = None
# renew the authentication token
auth_token = self._authenticate()
if auth_token is not None:
result = self._update_or_create_alarm(alarm_name, auth_token,
severity, metername)
if result.status_code == HTTP_NOT_FOUND:
LOGGER.debug("Received 404 error when submitting %s notification, \
creating a new alarm",
alarm_name)
# check for and/or get alarm_id
result = self._update_or_create_alarm(alarm_name, auth_token,
severity, metername)
if result.status_code == HTTP_CREATED:
LOGGER.debug('Result: %s', HTTP_CREATED)
else:
LOGGER.info('Result: %s %s',
result.status_code,
result.text)
def _get_endpoint(self, service):
"""Get the uri of service endpoint."""
endpoint = self._keystone.get_service_endpoint(
service,
Config.instance().CEILOMETER_URL_TYPE)
return endpoint
def _update_or_create_alarm(self, alarm_name, auth_token,
severity, metername):
# check for an alarm and update
try:
alarm_id = self._get_alarm_id(alarm_name)
result = self._update_alarm(alarm_id, severity, auth_token)
# or create a new alarm
except KeyError as ke:
LOGGER.warn(ke)
endpoint = self._get_endpoint("aodh")
LOGGER.warn('No known ID for %s', alarm_name)
result, self._alarm_ids[alarm_name] = \
self._create_alarm(endpoint, severity, metername, alarm_name)
return result
def _create_alarm(self, endpoint, severity, metername, alarm_name):
"""Create a new alarm with a new alarm_id."""
url = "{}/v2/alarms/".format(endpoint)
rule = {'event_type': metername, }
payload = json.dumps({'state': self._get_alarm_state(severity),
'name': alarm_name,
'severity': severity,
'type': "event",
'event_rule': rule,
})
result = self._perform_post_request(url, payload, self._auth_token)
alarm_id = json.loads(result.text)['alarm_id']
LOGGER.debug("alarm_id=%s", alarm_id)
return result, alarm_id
def _get_alarm_id(self, alarm_name):
"""Try and return an alarm_id for an collectd notification"""
return self._alarm_ids[alarm_name]
def _get_alarm_state(self, severity):
"""Get the state of the alarm."""
if severity in ('critical', 'moderate'):
alarm_state = "alarm"
elif severity == 'low':
alarm_state = "ok"
else:
alarm_state = "insufficient data"
return alarm_state
def _get_alarm_name(self, metername, resource_id):
"""Get the alarm name."""
alarm_name = metername + "(" + resource_id + ")"
return alarm_name
def _update_alarm(self, alarm_id, severity, auth_token):
"""Perform the alarm update."""
url = self._url_base % (alarm_id)
# create the payload and update the state of the alarm
payload = json.dumps(self._get_alarm_state(severity))
return self._perform_update_request(url, auth_token, payload)
@classmethod
def _perform_post_request(cls, url, payload, auth_token):
"""Perform the POST request."""
LOGGER.debug('Performing request to %s', url)
# request headers
headers = {'X-Auth-Token': auth_token,
'Content-type': 'application/json'}
# perform request and return its result
response = None
try:
LOGGER.debug(
"Performing request to: %s with data=%s and headers=%s",
url, payload, headers)
response = requests.post(
url, data=payload, headers=headers,
timeout=(Config.instance().CEILOMETER_TIMEOUT / 1000.))
LOGGER.info('Response: %s: %s',
response.status_code, response.text
)
except RequestException as exc:
LOGGER.error('aodh request error: %s', six.text_type(exc))
finally:
LOGGER.debug(
"Returning response from _perform_post_request(): %s",
response.status_code)
return response
@classmethod
def _perform_update_request(cls, url, auth_token, payload):
"""Perform the PUT/update request."""
LOGGER.debug('Performing request to %s', url)
# request headers
headers = {'X-Auth-Token': auth_token,
'Content-type': 'application/json'}
# perform request and return its result
response = None
try:
LOGGER.debug(
"Performing request to: %s with data=%s and headers=%s",
url, payload, headers)
response = requests.put(
url, data=payload, headers=headers,
timeout=(Config.instance().CEILOMETER_TIMEOUT / 1000.))
LOGGER.info('Response: %s: %s',
response.status_code, response.text
)
except RequestException as exc:
LOGGER.error('aodh request error: %s', six.text_type(exc))
finally:
LOGGER.debug(
'Returning response from _perform_update_request(): %s',
response.status_code)
return response