From 8794ee5b3bf9ff368e3eea1b56c9cec089159610 Mon Sep 17 00:00:00 2001
From: Swann Croiset <scroiset@mirantis.com>
Date: Tue, 11 Oct 2016 09:32:18 +0200
Subject: [PATCH] Revert "Remove the no_data_policy=skip for AFD"

This reverts commit 1612638e62aee7b547271dea4b1c4126dfa97394.

Change-Id: I9ed3f4c48835e799a08442b5ba8470ca6f676922
---
 .../templates/alarming.yaml.erb               | 51 ++++++++++++-------
 .../files/plugins/common/afd_alarm.lua        | 26 +++++++---
 .../templates/lma_alarms.lua.erb              |  4 +-
 .../tests/lua/test_afd_alarm.lua              | 39 ++++++++++++--
 4 files changed, 91 insertions(+), 29 deletions(-)

diff --git a/deployment_scripts/puppet/modules/fuel_lma_collector/templates/alarming.yaml.erb b/deployment_scripts/puppet/modules/fuel_lma_collector/templates/alarming.yaml.erb
index 8e1bdcafb..d9b31f413 100644
--- a/deployment_scripts/puppet/modules/fuel_lma_collector/templates/alarming.yaml.erb
+++ b/deployment_scripts/puppet/modules/fuel_lma_collector/templates/alarming.yaml.erb
@@ -223,7 +223,7 @@ lma_collector:
       severity: 'critical'
       # If the local RabbitMQ instance is down, it will be caught by the
       # rabbitmq-check alarm
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -239,7 +239,7 @@ lma_collector:
       severity: 'warning'
       # If the local RabbitMQ instance is down, it will be caught by the
       # rabbitmq-check alarm
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -255,7 +255,7 @@ lma_collector:
       severity: 'critical'
       # If the local RabbitMQ instance is down, it will be caught by the
       # rabbitmq-check alarm
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -271,7 +271,7 @@ lma_collector:
       severity: 'warning'
       # If the local RabbitMQ instance is down, it will be caught by the
       # rabbitmq-check alarm
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -287,7 +287,7 @@ lma_collector:
       severity: 'warning'
       # If the local RabbitMQ instance is down, it will be caught by the
       # rabbitmq-check alarm
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -301,6 +301,7 @@ lma_collector:
     - name: 'rabbitmq-pacemaker-down'
       description: 'The RabbitMQ cluster is down'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the DC node
       enabled: 'true'
       trigger:
         logical_operator: 'and'
@@ -317,6 +318,7 @@ lma_collector:
     - name: 'rabbitmq-pacemaker-critical'
       description: 'The RabbitMQ cluster is critical because less than half of the nodes are up'
       severity: 'critical'
+      no_data_policy: 'skip' # the metric is only collected from the DC node
       enabled: 'true'
       trigger:
         logical_operator: 'and'
@@ -333,6 +335,7 @@ lma_collector:
     - name: 'rabbitmq-pacemaker-warning'
       description: 'The RabbitMQ cluster is degraded because some RabbitMQ nodes are missing'
       severity: 'warning'
+      no_data_policy: 'skip' # the metric is only collected from the DC node
       enabled: 'true'
       trigger:
         logical_operator: 'and'
@@ -487,7 +490,7 @@ lma_collector:
       description: "The filesystem's free space is low"
       severity: 'warning'
       enabled: 'true'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       trigger:
         rules:
           - metric: fs_space_percent_free
@@ -503,7 +506,7 @@ lma_collector:
       description: "The filesystem's free space is too low"
       severity: 'critical'
       enabled: 'true'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       trigger:
         rules:
           - metric: fs_space_percent_free
@@ -549,7 +552,7 @@ lma_collector:
     - name: 'nova-logs-error'
       description: 'Too many errors have been detected in Nova logs'
       severity: 'warning'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -581,7 +584,7 @@ lma_collector:
     - name: 'heat-logs-error'
       description: 'Too many errors have been detected in Heat logs'
       severity: 'warning'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -613,7 +616,7 @@ lma_collector:
     - name: 'swift-logs-error'
       description: 'Too many errors have been detected in Swift logs'
       severity: 'warning'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -645,7 +648,7 @@ lma_collector:
     - name: 'cinder-logs-error'
       description: 'Too many errors have been detected in Cinder logs'
       severity: 'warning'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -677,7 +680,7 @@ lma_collector:
     - name: 'glance-logs-error'
       description: 'Too many errors have been detected in Glance logs'
       severity: 'warning'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -709,7 +712,7 @@ lma_collector:
     - name: 'neutron-logs-error'
       description: 'Too many errors have been detected in Neutron logs'
       severity: 'warning'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -726,7 +729,7 @@ lma_collector:
     - name: 'keystone-response-time-duration'
       description: 'Keystone API is too slow'
       severity: 'warning'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -789,7 +792,7 @@ lma_collector:
     - name: 'keystone-logs-error'
       description: 'Too many errors have been detected in Keystone logs'
       severity: 'warning'
-      no_data_severity: okay
+      no_data_policy: 'okay'
       enabled: 'true'
       trigger:
         logical_operator: 'or'
@@ -1125,7 +1128,7 @@ lma_collector:
     - name: 'instance-creation-time-warning'
       description: "Instance creation takes too much time"
       severity: 'warning'
-      no_data_severity: okay # This is a sporadic metric
+      no_data_policy: 'okay' # This is a sporadic metric
       enabled: 'true'
       trigger:
         rules:
@@ -1139,7 +1142,7 @@ lma_collector:
       description: 'Errors on hard drive(s) have been detected'
       severity: 'critical'
       enabled: 'true'
-      no_data_severity: okay
+      no_data_policy: okay
       trigger:
         rules:
           - metric: hdd_errors_rate
@@ -1153,6 +1156,7 @@ lma_collector:
       description: 'There is none VCPU available for new instances'
       severity: 'warning'
       enabled: 'true'
+      no_data_policy: skip # the metric is only collected from the aggregator node
       trigger:
         rules:
           - metric: openstack_nova_total_free_vcpus
@@ -1165,6 +1169,7 @@ lma_collector:
       description: 'There is none memory available for new instances'
       severity: 'warning'
       enabled: 'true'
+      no_data_policy: skip  # the metric is only collected from the aggregator node
       trigger:
         rules:
           - metric: openstack_nova_total_free_ram
@@ -1293,6 +1298,7 @@ lma_collector:
     - name: 'influxdb-api-check-failed'
       description: 'Endpoint check for InfluxDB is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1307,6 +1313,7 @@ lma_collector:
     - name: 'nova-api-check-failed'
       description: 'Endpoint check for nova-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1321,6 +1328,7 @@ lma_collector:
     - name: 'neutron-api-check-failed'
       description: 'Endpoint check for neutron-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1335,6 +1343,7 @@ lma_collector:
     - name: 'cinder-api-check-failed'
       description: 'Endpoint check for cinder-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1349,6 +1358,7 @@ lma_collector:
     - name: 'cinder-v2-api-check-failed'
       description: 'Endpoint check for cinder-v2-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1363,6 +1373,7 @@ lma_collector:
     - name: 'glance-api-check-failed'
       description: 'Endpoint check for glance-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1377,6 +1388,7 @@ lma_collector:
     - name: 'heat-api-check-failed'
       description: 'Endpoint check for heat-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1391,6 +1403,7 @@ lma_collector:
     - name: 'heat-cfn-api-check-failed'
       description: 'Endpoint check for heat-cfn-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1405,6 +1418,7 @@ lma_collector:
     - name: 'swift-api-check-failed'
       description: 'Endpoint check for swift-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1419,6 +1433,7 @@ lma_collector:
     - name: 'swift-s3-api-check-failed'
       description: 'Endpoint check for swift-s3-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1433,6 +1448,7 @@ lma_collector:
     - name: 'keystone-public-api-check-failed'
       description: 'Endpoint check for keystone-public-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
@@ -1447,6 +1463,7 @@ lma_collector:
     - name: 'ceilometer-api-check-failed'
       description: 'Endpoint check for ceilometer-api is failed'
       severity: 'down'
+      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
       enabled: 'true'
       trigger:
         rules:
diff --git a/deployment_scripts/puppet/modules/lma_collector/files/plugins/common/afd_alarm.lua b/deployment_scripts/puppet/modules/lma_collector/files/plugins/common/afd_alarm.lua
index 5d5fa2b4b..4fd660f0a 100644
--- a/deployment_scripts/puppet/modules/lma_collector/files/plugins/common/afd_alarm.lua
+++ b/deployment_scripts/puppet/modules/lma_collector/files/plugins/common/afd_alarm.lua
@@ -52,12 +52,18 @@ function Alarm.new(alarm)
     a.severity_str = string.upper(alarm.severity)
     a.severity = SEVERITIES[string.lower(alarm.severity)]
     assert(a.severity ~= nil)
-    if alarm.no_data_severity  then
-        a.no_data_severity = SEVERITIES[string.lower(alarm.no_data_severity)]
-    end
-    if not a.no_data_severity then
+
+    a.skip_when_no_data = false
+    if alarm.no_data_policy then
+        if string.lower(alarm.no_data_policy) == 'skip' then
+            a.skip_when_no_data = true
+        else
+            a.no_data_severity = SEVERITIES[string.lower(alarm.no_data_policy)]
+        end
+    else
         a.no_data_severity = consts.UNKW
     end
+    assert(a.skip_when_no_data or a.no_data_severity ~= nil)
 
     a.rules = {}
     a.initial_wait = 0
@@ -169,7 +175,11 @@ function Alarm:evaluate(ns)
 
     if self.logical_operator == 'and' then
         if one_unknown then
-             state = self.no_data_severity
+            if self.skip_when_no_data then
+                state = nil
+            else
+                state = self.no_data_severity
+            end
         elseif #self.rules == matches then
             state = self.severity
         end
@@ -177,7 +187,11 @@ function Alarm:evaluate(ns)
         if matches > 0 then
             state = self.severity
         elseif one_unknown then
-            state = self.no_data_severity
+            if self.skip_when_no_data then
+                state = nil
+            else
+                state = self.no_data_severity
+            end
         end
     end
 
diff --git a/deployment_scripts/puppet/modules/lma_collector/templates/lma_alarms.lua.erb b/deployment_scripts/puppet/modules/lma_collector/templates/lma_alarms.lua.erb
index cf968d98c..6ee5b3fd3 100644
--- a/deployment_scripts/puppet/modules/lma_collector/templates/lma_alarms.lua.erb
+++ b/deployment_scripts/puppet/modules/lma_collector/templates/lma_alarms.lua.erb
@@ -10,8 +10,8 @@ local alarms = {
     ['name'] = '<%= alarm_name %>',
     ['description'] = '<%= alarm["description"].to_s().gsub("'"){"\\'"} %>',
     ['severity'] = '<%= alarm["severity"] %>',
-<%- if alarm.key?("no_data_severity") -%>
-    ['no_data_severity'] = '<%= alarm["no_data_severity"] %>',
+<%- if alarm.key?("no_data_policy") -%>
+    ['no_data_policy'] = '<%= alarm["no_data_policy"] %>',
 <%- end -%>
     ['trigger'] = {
       ['logical_operator'] = '<%= alarm["trigger"]["logical_operator"] || 'or' %>',
diff --git a/deployment_scripts/puppet/modules/lma_collector/tests/lua/test_afd_alarm.lua b/deployment_scripts/puppet/modules/lma_collector/tests/lua/test_afd_alarm.lua
index 38be4dfd2..55588f615 100644
--- a/deployment_scripts/puppet/modules/lma_collector/tests/lua/test_afd_alarm.lua
+++ b/deployment_scripts/puppet/modules/lma_collector/tests/lua/test_afd_alarm.lua
@@ -980,7 +980,7 @@ function TestLMAAlarm:test_group_by_missing_field_is_unknown()
     assertEquals(state, consts.UNKW)
 end
 
-function TestLMAAlarm:test_no_data_severity_okay()
+function TestLMAAlarm:test_no_data_policy_okay()
     local alarm = {
         name = 'foo-alarm',
         description = 'foo description',
@@ -1000,7 +1000,7 @@ function TestLMAAlarm:test_no_data_severity_okay()
             },
         },
         severity = 'warning',
-        no_data_severity = 'okay',
+        no_data_policy = 'okay',
     }
     lma_alarm.load_alarm(alarm)
     lma_alarm.set_start_time(current_time)
@@ -1012,7 +1012,7 @@ function TestLMAAlarm:test_no_data_severity_okay()
     assertEquals(state, consts.OKAY)
 end
 
-function TestLMAAlarm:test_no_data_severity_critical()
+function TestLMAAlarm:test_no_data_policy_critical()
     local alarm = {
         name = 'foo-alarm',
         description = 'foo description',
@@ -1032,7 +1032,7 @@ function TestLMAAlarm:test_no_data_severity_critical()
             },
         },
         severity = 'critical',
-        no_data_severity = 'critical',
+        no_data_policy = 'critical',
     }
     lma_alarm.load_alarm(alarm)
     lma_alarm.set_start_time(current_time)
@@ -1044,6 +1044,37 @@ function TestLMAAlarm:test_no_data_severity_critical()
     assertEquals(state, consts.CRIT)
 end
 
+function TestLMAAlarm:test_no_data_policy_skip()
+    local alarm = {
+        name = 'foo-alarm',
+        description = 'foo description',
+        enabled = true,
+        trigger = {
+            rules = {
+                {
+                    metric = 'foo_metric_name',
+                    window = 30,
+                    periods = 1,
+                    ['function'] = 'avg',
+                    fields = { foo = 'bar', bar = 'foo' },
+                    group_by = {'fs'},
+                    relational_operator = '<=',
+                    threshold = 5,
+                },
+            },
+        },
+        severity = 'critical',
+        no_data_policy = 'skip',
+    }
+    lma_alarm.load_alarm(alarm)
+    lma_alarm.set_start_time(current_time)
+
+    lma_alarm.add_value(next_time(100), 'another_metric', 5)
+
+    local state, result = lma_alarm.evaluate(next_time())
+    assertEquals(state, nil)
+end
+
 lu = LuaUnit
 lu:setVerbosity( 1 )
 os.exit( lu:run() )