fuel-plugin-lma-collector/deployment_scripts/puppet/modules/lma_collector/files/plugins/common/afd_alarm.lua

217 lines
6.0 KiB
Lua

-- Copyright 2015 Mirantis, Inc.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
local assert = assert
local ipairs = ipairs
local pairs = pairs
local string = string
local setmetatable = setmetatable
-- LMA libs
local utils = require 'lma_utils'
local table_utils = require 'table_utils'
local consts = require 'gse_constants'
local afd = require 'afd'
local Rule = require 'afd_rule'
local SEVERITIES = {
warning = consts.WARN,
critical = consts.CRIT,
down = consts.DOWN,
unknown = consts.UNKW,
okay = consts.OKAY,
}
local Alarm = {}
Alarm.__index = Alarm
setfenv(1, Alarm) -- Remove external access to contain everything in the module
function Alarm.new(alarm)
local a = {}
setmetatable(a, Alarm)
a._metrics_list = nil
a.name = alarm.name
a.description = alarm.description
if alarm.trigger.logical_operator then
a.logical_operator = string.lower(alarm.trigger.logical_operator)
else
a.logical_operator = 'or'
end
a.severity_str = string.upper(alarm.severity)
a.severity = SEVERITIES[string.lower(alarm.severity)]
assert(a.severity ~= nil)
a.skip_when_no_data = false
if alarm.no_data_policy then
if string.lower(alarm.no_data_policy) == 'skip' then
a.skip_when_no_data = true
else
a.no_data_severity = SEVERITIES[string.lower(alarm.no_data_policy)]
end
else
a.no_data_severity = consts.UNKW
end
assert(a.skip_when_no_data or a.no_data_severity ~= nil)
a.rules = {}
a.initial_wait = 0
for _, rule in ipairs(alarm.trigger.rules) do
local r = Rule.new(rule)
a.rules[#a.rules+1] = r
local wait = r.window * r.periods
if wait > a.initial_wait then
a.initial_wait = wait * 1e9
end
end
a.start_time_ns = 0
return a
end
-- return the Set of metrics used by the alarm
function Alarm:get_metrics()
if not self._metrics_list then
self._metrics_list = {}
for _, rule in ipairs(self.rules) do
if not table_utils.item_find(rule.metric, metrics) then
self._metrics_list[#self._metrics_list+1] = rule.metric
end
end
end
return self._metrics_list
end
-- return a list of field names used for the metric
-- (can have duplicate names)
function Alarm:get_metric_fields(metric_name)
local fields = {}
for _, rule in ipairs(self.rules) do
if rule.metric == metric_name then
for k, _ in pairs(rule.fields) do
fields[#fields+1] = k
end
for _, g in ipairs(rule.group_by) do
fields[#fields+1] = g
end
end
end
return fields
end
function Alarm:has_metric(metric)
return table_utils.item_find(metric, self:get_metrics())
end
-- dispatch datapoint in datastores
function Alarm:add_value(ts, metric, value, fields)
local data
for id, rule in pairs(self.rules) do
if rule.metric == metric then
rule:add_value(ts, value, fields)
end
end
end
-- return: state of alarm and a list of alarm details.
--
-- with alarm list when state != OKAY:
-- {
-- {
-- value = <current value>,
-- fields = <metric fields table>,
-- message = <string>,
-- },
-- }
function Alarm:evaluate(ns)
local state = consts.OKAY
local matches = 0
local all_alerts = {}
local function add_alarm(rule, value, message, fields)
all_alerts[#all_alerts+1] = {
severity = self.severity_str,
['function'] = rule.fct,
metric = rule.metric,
operator = rule.relational_operator,
threshold = rule.threshold,
window = rule.window,
periods = rule.periods,
value = value,
fields = fields,
message = message
}
end
local one_unknown = false
local msg
for _, rule in ipairs(self.rules) do
local eval, context_list = rule:evaluate(ns)
if eval == afd.MATCH then
matches = matches + 1
msg = self.description
elseif eval == afd.MISSING_DATA then
msg = 'No datapoint have been received over the last ' .. rule.observation_window .. ' seconds'
one_unknown = true
elseif eval == afd.NO_DATA then
msg = 'No datapoint have been received ever'
one_unknown = true
end
for _, context in ipairs(context_list) do
add_alarm(rule, context.value, msg,
context.fields)
end
end
if self.logical_operator == 'and' then
if one_unknown then
if self.skip_when_no_data then
state = nil
else
state = self.no_data_severity
end
elseif #self.rules == matches then
state = self.severity
end
elseif self.logical_operator == 'or' then
if matches > 0 then
state = self.severity
elseif one_unknown then
if self.skip_when_no_data then
state = nil
else
state = self.no_data_severity
end
end
end
if state == nil or state == consts.OKAY then
all_alerts = {}
end
return state, all_alerts
end
function Alarm:set_start_time(ns)
self.start_time_ns = ns
end
function Alarm:is_evaluation_time(ns)
local delta = ns - self.start_time_ns
if delta >= self.initial_wait then
return true
end
return false
end
return Alarm