diff --git a/deployment_scripts/puppet/modules/fuel_lma_collector/templates/alarming.yaml.erb b/deployment_scripts/puppet/modules/fuel_lma_collector/templates/alarming.yaml.erb index 8607d8d3b..ce75b1961 100644 --- a/deployment_scripts/puppet/modules/fuel_lma_collector/templates/alarming.yaml.erb +++ b/deployment_scripts/puppet/modules/fuel_lma_collector/templates/alarming.yaml.erb @@ -223,7 +223,7 @@ lma_collector: severity: 'critical' # If the local RabbitMQ instance is down, it will be caught by the # rabbitmq-check alarm - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -239,7 +239,7 @@ lma_collector: severity: 'warning' # If the local RabbitMQ instance is down, it will be caught by the # rabbitmq-check alarm - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -255,7 +255,7 @@ lma_collector: severity: 'critical' # If the local RabbitMQ instance is down, it will be caught by the # rabbitmq-check alarm - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -271,7 +271,7 @@ lma_collector: severity: 'warning' # If the local RabbitMQ instance is down, it will be caught by the # rabbitmq-check alarm - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -287,7 +287,7 @@ lma_collector: severity: 'warning' # If the local RabbitMQ instance is down, it will be caught by the # rabbitmq-check alarm - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -301,7 +301,6 @@ lma_collector: - name: 'rabbitmq-pacemaker-down' description: 'The RabbitMQ cluster is down' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the DC node enabled: 'true' trigger: logical_operator: 'and' @@ -318,7 +317,6 @@ lma_collector: - name: 'rabbitmq-pacemaker-critical' description: 'The RabbitMQ cluster is critical because less than half of the nodes are up' severity: 'critical' - no_data_policy: 'skip' # the metric is only collected from the DC node enabled: 'true' trigger: logical_operator: 'and' @@ -335,7 +333,6 @@ lma_collector: - name: 'rabbitmq-pacemaker-warning' description: 'The RabbitMQ cluster is degraded because some RabbitMQ nodes are missing' severity: 'warning' - no_data_policy: 'skip' # the metric is only collected from the DC node enabled: 'true' trigger: logical_operator: 'and' @@ -490,7 +487,7 @@ lma_collector: description: "The filesystem's free space is low" severity: 'warning' enabled: 'true' - no_data_policy: 'okay' + no_data_severity: okay trigger: rules: - metric: fs_space_percent_free @@ -506,7 +503,7 @@ lma_collector: description: "The filesystem's free space is too low" severity: 'warning' enabled: 'true' - no_data_policy: 'okay' + no_data_severity: okay trigger: rules: - metric: fs_space_percent_free @@ -552,7 +549,7 @@ lma_collector: - name: 'nova-logs-error' description: 'Too many errors have been detected in Nova logs' severity: 'warning' - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -584,7 +581,7 @@ lma_collector: - name: 'heat-logs-error' description: 'Too many errors have been detected in Heat logs' severity: 'warning' - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -616,7 +613,7 @@ lma_collector: - name: 'swift-logs-error' description: 'Too many errors have been detected in Swift logs' severity: 'warning' - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -648,7 +645,7 @@ lma_collector: - name: 'cinder-logs-error' description: 'Too many errors have been detected in Cinder logs' severity: 'warning' - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -680,7 +677,7 @@ lma_collector: - name: 'glance-logs-error' description: 'Too many errors have been detected in Glance logs' severity: 'warning' - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -712,7 +709,7 @@ lma_collector: - name: 'neutron-logs-error' description: 'Too many errors have been detected in Neutron logs' severity: 'warning' - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -729,7 +726,7 @@ lma_collector: - name: 'keystone-response-time-duration' description: 'Keystone API is too slow' severity: 'warning' - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -792,7 +789,7 @@ lma_collector: - name: 'keystone-logs-error' description: 'Too many errors have been detected in Keystone logs' severity: 'warning' - no_data_policy: 'okay' + no_data_severity: okay enabled: 'true' trigger: logical_operator: 'or' @@ -1128,7 +1125,7 @@ lma_collector: - name: 'instance-creation-time-warning' description: "Instance creation takes too much time" severity: 'warning' - no_data_policy: 'okay' # This is a sporadic metric + no_data_severity: okay # This is a sporadic metric enabled: 'true' trigger: rules: @@ -1142,7 +1139,7 @@ lma_collector: description: 'Errors on hard drive(s) have been detected' severity: 'critical' enabled: 'true' - no_data_policy: okay + no_data_severity: okay trigger: rules: - metric: hdd_errors_rate @@ -1156,7 +1153,6 @@ lma_collector: description: 'There is none VCPU available for new instances' severity: 'warning' enabled: 'true' - no_data_policy: skip # the metric is only collected from the aggregator node trigger: rules: - metric: openstack_nova_total_free_vcpus @@ -1169,7 +1165,6 @@ lma_collector: description: 'There is none memory available for new instances' severity: 'warning' enabled: 'true' - no_data_policy: skip # the metric is only collected from the aggregator node trigger: rules: - metric: openstack_nova_total_free_ram @@ -1298,7 +1293,6 @@ lma_collector: - name: 'influxdb-api-check-failed' description: 'Endpoint check for InfluxDB is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1313,7 +1307,6 @@ lma_collector: - name: 'nova-api-check-failed' description: 'Endpoint check for nova-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1328,7 +1321,6 @@ lma_collector: - name: 'neutron-api-check-failed' description: 'Endpoint check for neutron-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1343,7 +1335,6 @@ lma_collector: - name: 'cinder-api-check-failed' description: 'Endpoint check for cinder-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1358,7 +1349,6 @@ lma_collector: - name: 'cinder-v2-api-check-failed' description: 'Endpoint check for cinder-v2-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1373,7 +1363,6 @@ lma_collector: - name: 'glance-api-check-failed' description: 'Endpoint check for glance-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1388,7 +1377,6 @@ lma_collector: - name: 'heat-api-check-failed' description: 'Endpoint check for heat-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1403,7 +1391,6 @@ lma_collector: - name: 'heat-cfn-api-check-failed' description: 'Endpoint check for heat-cfn-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1418,7 +1405,6 @@ lma_collector: - name: 'swift-api-check-failed' description: 'Endpoint check for swift-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1433,7 +1419,6 @@ lma_collector: - name: 'swift-s3-api-check-failed' description: 'Endpoint check for swift-s3-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1448,7 +1433,6 @@ lma_collector: - name: 'keystone-public-api-check-failed' description: 'Endpoint check for keystone-public-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: @@ -1463,7 +1447,6 @@ lma_collector: - name: 'ceilometer-api-check-failed' description: 'Endpoint check for ceilometer-api is failed' severity: 'down' - no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP enabled: 'true' trigger: rules: diff --git a/deployment_scripts/puppet/modules/lma_collector/files/plugins/common/afd_alarm.lua b/deployment_scripts/puppet/modules/lma_collector/files/plugins/common/afd_alarm.lua index 4fd660f0a..5d5fa2b4b 100644 --- a/deployment_scripts/puppet/modules/lma_collector/files/plugins/common/afd_alarm.lua +++ b/deployment_scripts/puppet/modules/lma_collector/files/plugins/common/afd_alarm.lua @@ -52,18 +52,12 @@ function Alarm.new(alarm) a.severity_str = string.upper(alarm.severity) a.severity = SEVERITIES[string.lower(alarm.severity)] assert(a.severity ~= nil) - - a.skip_when_no_data = false - if alarm.no_data_policy then - if string.lower(alarm.no_data_policy) == 'skip' then - a.skip_when_no_data = true - else - a.no_data_severity = SEVERITIES[string.lower(alarm.no_data_policy)] - end - else + if alarm.no_data_severity then + a.no_data_severity = SEVERITIES[string.lower(alarm.no_data_severity)] + end + if not a.no_data_severity then a.no_data_severity = consts.UNKW end - assert(a.skip_when_no_data or a.no_data_severity ~= nil) a.rules = {} a.initial_wait = 0 @@ -175,11 +169,7 @@ function Alarm:evaluate(ns) if self.logical_operator == 'and' then if one_unknown then - if self.skip_when_no_data then - state = nil - else - state = self.no_data_severity - end + state = self.no_data_severity elseif #self.rules == matches then state = self.severity end @@ -187,11 +177,7 @@ function Alarm:evaluate(ns) if matches > 0 then state = self.severity elseif one_unknown then - if self.skip_when_no_data then - state = nil - else - state = self.no_data_severity - end + state = self.no_data_severity end end diff --git a/deployment_scripts/puppet/modules/lma_collector/templates/lma_alarms.lua.erb b/deployment_scripts/puppet/modules/lma_collector/templates/lma_alarms.lua.erb index 6ee5b3fd3..cf968d98c 100644 --- a/deployment_scripts/puppet/modules/lma_collector/templates/lma_alarms.lua.erb +++ b/deployment_scripts/puppet/modules/lma_collector/templates/lma_alarms.lua.erb @@ -10,8 +10,8 @@ local alarms = { ['name'] = '<%= alarm_name %>', ['description'] = '<%= alarm["description"].to_s().gsub("'"){"\\'"} %>', ['severity'] = '<%= alarm["severity"] %>', -<%- if alarm.key?("no_data_policy") -%> - ['no_data_policy'] = '<%= alarm["no_data_policy"] %>', +<%- if alarm.key?("no_data_severity") -%> + ['no_data_severity'] = '<%= alarm["no_data_severity"] %>', <%- end -%> ['trigger'] = { ['logical_operator'] = '<%= alarm["trigger"]["logical_operator"] || 'or' %>', diff --git a/deployment_scripts/puppet/modules/lma_collector/tests/lua/test_afd_alarm.lua b/deployment_scripts/puppet/modules/lma_collector/tests/lua/test_afd_alarm.lua index 55588f615..38be4dfd2 100644 --- a/deployment_scripts/puppet/modules/lma_collector/tests/lua/test_afd_alarm.lua +++ b/deployment_scripts/puppet/modules/lma_collector/tests/lua/test_afd_alarm.lua @@ -980,7 +980,7 @@ function TestLMAAlarm:test_group_by_missing_field_is_unknown() assertEquals(state, consts.UNKW) end -function TestLMAAlarm:test_no_data_policy_okay() +function TestLMAAlarm:test_no_data_severity_okay() local alarm = { name = 'foo-alarm', description = 'foo description', @@ -1000,7 +1000,7 @@ function TestLMAAlarm:test_no_data_policy_okay() }, }, severity = 'warning', - no_data_policy = 'okay', + no_data_severity = 'okay', } lma_alarm.load_alarm(alarm) lma_alarm.set_start_time(current_time) @@ -1012,7 +1012,7 @@ function TestLMAAlarm:test_no_data_policy_okay() assertEquals(state, consts.OKAY) end -function TestLMAAlarm:test_no_data_policy_critical() +function TestLMAAlarm:test_no_data_severity_critical() local alarm = { name = 'foo-alarm', description = 'foo description', @@ -1032,7 +1032,7 @@ function TestLMAAlarm:test_no_data_policy_critical() }, }, severity = 'critical', - no_data_policy = 'critical', + no_data_severity = 'critical', } lma_alarm.load_alarm(alarm) lma_alarm.set_start_time(current_time) @@ -1044,37 +1044,6 @@ function TestLMAAlarm:test_no_data_policy_critical() assertEquals(state, consts.CRIT) end -function TestLMAAlarm:test_no_data_policy_skip() - local alarm = { - name = 'foo-alarm', - description = 'foo description', - enabled = true, - trigger = { - rules = { - { - metric = 'foo_metric_name', - window = 30, - periods = 1, - ['function'] = 'avg', - fields = { foo = 'bar', bar = 'foo' }, - group_by = {'fs'}, - relational_operator = '<=', - threshold = 5, - }, - }, - }, - severity = 'critical', - no_data_policy = 'skip', - } - lma_alarm.load_alarm(alarm) - lma_alarm.set_start_time(current_time) - - lma_alarm.add_value(next_time(100), 'another_metric', 5) - - local state, result = lma_alarm.evaluate(next_time()) - assertEquals(state, nil) -end - lu = LuaUnit lu:setVerbosity( 1 ) os.exit( lu:run() )