217723ce6c
This patch enables event-alarm evaluator to cache alarms per project when it get those alarms from the DB. So the evaluator won't access the DB while the cache exists and does not expired. Cached alarms will be updated when the alarm fired for consistency. 'event_alarm_cache_ttl' is added to config. DocImpact Change-Id: I4e6aa14947cf060c44e82c5f43dc02be586eb855 Implements: blueprint event-alarm-evaluator
214 lines
7.4 KiB
Python
214 lines
7.4 KiB
Python
#
|
|
# Copyright 2015 NEC Corporation.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import copy
|
|
import fnmatch
|
|
import operator
|
|
|
|
from oslo_config import cfg
|
|
from oslo_log import log
|
|
from oslo_utils import timeutils
|
|
|
|
from aodh import evaluator
|
|
from aodh.i18n import _, _LE
|
|
|
|
LOG = log.getLogger(__name__)
|
|
|
|
COMPARATORS = {
|
|
'gt': operator.gt,
|
|
'lt': operator.lt,
|
|
'ge': operator.ge,
|
|
'le': operator.le,
|
|
'eq': operator.eq,
|
|
'ne': operator.ne,
|
|
}
|
|
|
|
OPTS = [
|
|
cfg.IntOpt('event_alarm_cache_ttl',
|
|
default=60,
|
|
help='TTL of event alarm caches, in seconds. '
|
|
'Set to 0 to disable caching.'),
|
|
]
|
|
|
|
|
|
class EventAlarmEvaluator(evaluator.Evaluator):
|
|
|
|
def __init__(self, conf, notifier):
|
|
super(EventAlarmEvaluator, self).__init__(conf, notifier)
|
|
self.caches = {}
|
|
|
|
def evaluate_events(self, events):
|
|
"""Evaluate the events by referring related alarms."""
|
|
|
|
if not isinstance(events, list):
|
|
events = [events]
|
|
|
|
LOG.debug('Starting event alarm evaluation: #events = %d',
|
|
len(events))
|
|
for event in events:
|
|
LOG.debug('Evaluating event: event = %s', event)
|
|
|
|
if not self._validate(event):
|
|
LOG.debug('Aborting evaluation of the event.')
|
|
continue
|
|
|
|
project = self._get_project(event)
|
|
alarms = self._get_project_alarms(project)
|
|
LOG.debug('Found %(num)d alarms related to the event '
|
|
'(message_id=%(id)s)',
|
|
{'num': len(alarms), 'id': event['message_id']})
|
|
|
|
for alarm in alarms:
|
|
try:
|
|
self._evaluate_alarm(alarm, event)
|
|
except Exception:
|
|
LOG.exception(_LE('Failed to evaluate alarm (id=%(a)s) '
|
|
'triggered by event = %(e)s.'),
|
|
{'a': alarm.alarm_id, 'e': event})
|
|
|
|
LOG.debug('Finished event alarm evaluation.')
|
|
|
|
@staticmethod
|
|
def _validate(event):
|
|
"""Validate received event has mandatory parameters."""
|
|
|
|
if not event:
|
|
LOG.error(_LE('Received invalid event (empty or None)'))
|
|
return False
|
|
|
|
if not event.get('event_type'):
|
|
LOG.error(_LE('Failed to extract event_type from event = %s'),
|
|
event)
|
|
return False
|
|
|
|
if not event.get('message_id'):
|
|
LOG.error(_LE('Failed to extract message_id from event = %s'),
|
|
event)
|
|
return False
|
|
|
|
return True
|
|
|
|
@staticmethod
|
|
def _get_project(event):
|
|
"""Extract project ID from the event."""
|
|
for trait in event.get('traits') or []:
|
|
if trait[0] in (u'tenant_id', u'project_id'):
|
|
return trait[2]
|
|
return ''
|
|
|
|
def _get_project_alarms(self, project):
|
|
if self.conf.event_alarm_cache_ttl and project in self.caches:
|
|
if timeutils.is_older_than(self.caches[project]['updated'],
|
|
self.conf.event_alarm_cache_ttl):
|
|
del self.caches[project]
|
|
else:
|
|
return self.caches[project]['alarms']
|
|
|
|
alarms = self._storage_conn.get_alarms(enabled=True,
|
|
alarm_type='event',
|
|
project=project)
|
|
|
|
if self.conf.event_alarm_cache_ttl:
|
|
self.caches[project] = {
|
|
'alarms': alarms,
|
|
'updated': timeutils.utcnow()
|
|
}
|
|
|
|
return alarms
|
|
|
|
@staticmethod
|
|
def _sanitize(event):
|
|
"""Change traits format to dict."""
|
|
e = copy.deepcopy(event)
|
|
e['traits'] = {t[0]: t[2] for t in event.get('traits', [])}
|
|
return e
|
|
|
|
def _evaluate_alarm(self, alarm, event):
|
|
"""Evaluate the alarm by referring the received event.
|
|
|
|
This function compares each condition of the alarm on the assumption
|
|
that all conditions are combined by AND operator.
|
|
When the received event met conditions defined in alarm 'event_type'
|
|
and 'query', the alarm will be fired and updated to state='alarm'
|
|
(alarmed).
|
|
Note: by this evaluator, the alarm won't be changed to state='ok'
|
|
nor state='insufficient data'.
|
|
"""
|
|
|
|
LOG.debug('Evaluating alarm (id=%(a)s) triggered by event '
|
|
'(message_id=%(e)s).',
|
|
{'a': alarm.alarm_id, 'e': event['message_id']})
|
|
|
|
if not alarm.repeat_actions and alarm.state == evaluator.ALARM:
|
|
LOG.debug('Skip evaluation of the alarm id=%s which have already '
|
|
'fired.', alarm.alarm_id)
|
|
return
|
|
|
|
event_pattern = alarm.rule['event_type']
|
|
if not fnmatch.fnmatch(event['event_type'], event_pattern):
|
|
LOG.debug('Aborting evaluation of the alarm (id=%s) due to '
|
|
'uninterested event_type.', alarm.alarm_id)
|
|
return
|
|
|
|
value = self._sanitize(event)
|
|
|
|
def _compare(condition):
|
|
op = COMPARATORS[condition.get('op', 'eq')]
|
|
v = value
|
|
for f in condition['field'].split('.'):
|
|
if hasattr(v, 'get'):
|
|
v = v.get(f)
|
|
else:
|
|
break
|
|
LOG.debug('Comparing value=%(v)s against condition=%(c)s .',
|
|
{'v': v, 'c': condition})
|
|
return op(v, condition['value'])
|
|
|
|
for condition in alarm.rule['query']:
|
|
if not _compare(condition):
|
|
LOG.debug('Aborting evaluation of the alarm due to '
|
|
'unmet condition=%s .', condition)
|
|
return
|
|
|
|
self._fire_alarm(alarm, event)
|
|
|
|
def _fire_alarm(self, alarm, event):
|
|
"""Update alarm state and fire alarm via alarm notifier."""
|
|
|
|
state = evaluator.ALARM
|
|
reason = (_('Event (message_id=%(message)s) hit the query of alarm '
|
|
'(id=%(alarm)s)') %
|
|
{'message': event['message_id'], 'alarm': alarm.alarm_id})
|
|
reason_data = {'type': 'event', 'event': event}
|
|
self._refresh(alarm, state, reason, reason_data)
|
|
|
|
def _refresh(self, alarm, state, reason, reason_data):
|
|
super(EventAlarmEvaluator, self)._refresh(alarm, state,
|
|
reason, reason_data)
|
|
|
|
project = alarm.project_id
|
|
if self.conf.event_alarm_cache_ttl and project in self.caches:
|
|
for index, a in enumerate(self.caches[project]['alarms']):
|
|
if a.alarm_id == alarm.alarm_id:
|
|
alarm.state = state
|
|
self.caches[project]['alarms'][index] = alarm
|
|
break
|
|
|
|
# NOTE(r-mibu): This method won't be used, but we have to define here in
|
|
# order to overwrite the abstract method in the super class.
|
|
# TODO(r-mibu): Change the base (common) class design for evaluators.
|
|
def evaluate(self, alarm):
|
|
pass
|