Generate alarms in Lua and Heka filters from YAML
This commit creates two new functions: - get_cluster_names() - get_afd_filters() Change-Id: Ifc660de86a952dbfeccafffd35491a7acccde16d
This commit is contained in:
parent
d49b5fb1c8
commit
216926e555
|
@ -0,0 +1,30 @@
|
||||||
|
# Copyright 2015 Mirantis, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
$lma = hiera_hash('lma_collector', {})
|
||||||
|
|
||||||
|
$alarms_definitions = $lma['alarms']
|
||||||
|
if $alarms_definitions == undef {
|
||||||
|
fail('Alarms definitions not found. Check files in /etc/hiera/override.')
|
||||||
|
}
|
||||||
|
|
||||||
|
class { 'lma_collector::afds':
|
||||||
|
roles => hiera('roles'),
|
||||||
|
node_cluster_roles => $lma['node_cluster_roles'],
|
||||||
|
service_cluster_roles => $lma['service_cluster_roles'],
|
||||||
|
node_cluster_alarms => $lma['node_cluster_alarms'],
|
||||||
|
service_cluster_alarms => $lma['service_cluster_alarms'],
|
||||||
|
alarms => $alarms_definitions,
|
||||||
|
}
|
|
@ -18,12 +18,20 @@ local afd = require 'afd'
|
||||||
|
|
||||||
-- node or service
|
-- node or service
|
||||||
local afd_type = read_config('afd_type') or error('afd_type must be specified!')
|
local afd_type = read_config('afd_type') or error('afd_type must be specified!')
|
||||||
local msg_type = string.format('afd_%s_metric', afd_type)
|
local msg_type
|
||||||
local msg_field_name = string.format('%s_status', afd_type)
|
local msg_field_name
|
||||||
|
local afd_entity
|
||||||
|
|
||||||
local afd_entity = 'role'
|
if afd_type == 'node' then
|
||||||
if afd_type == 'service' then
|
msg_type = 'afd_node_metric'
|
||||||
|
msg_field_name = 'node_status'
|
||||||
|
afd_entity = 'node_role'
|
||||||
|
elseif afd_type == 'service' then
|
||||||
|
msg_type = 'afd_service_metric'
|
||||||
|
msg_field_name = 'service_status'
|
||||||
afd_entity = 'service'
|
afd_entity = 'service'
|
||||||
|
else
|
||||||
|
error('invalid afd_type value')
|
||||||
end
|
end
|
||||||
|
|
||||||
-- ie: controller for node AFD / rabbitmq for service AFD
|
-- ie: controller for node AFD / rabbitmq for service AFD
|
||||||
|
|
|
@ -0,0 +1,148 @@
|
||||||
|
# Copyright 2015 Mirantis, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# This returns a hash that contains the filename of the alarm as key and
|
||||||
|
# list of alarms associated.
|
||||||
|
#
|
||||||
|
# ARG0: The hash table with all information
|
||||||
|
# ARG1: The hash with the list of cluster nodes and cluster services
|
||||||
|
#
|
||||||
|
# Ex:
|
||||||
|
#
|
||||||
|
# ARG0: cluster alarms
|
||||||
|
# [{"rabbitmq"=>[{"queue"=>["rabbitmq-queue-warning"]}]},
|
||||||
|
# {"apache"=>[{"worker"=>["apache-warning"]}]},
|
||||||
|
# {"memcached"=>[{"all"=>["memcached-warning"]}]},
|
||||||
|
# {"haproxy"=>[{"alive"=>["haproxy-warning"]}]}]
|
||||||
|
#
|
||||||
|
# ARG1: array of alarms
|
||||||
|
#
|
||||||
|
# [
|
||||||
|
# {"name"=>"rabbitmq-queue-warning",
|
||||||
|
# "description"=>"Number of message in queues too high",
|
||||||
|
# "severity"=>"warning",
|
||||||
|
# "trigger"=>
|
||||||
|
# {"logical_operator"=>"or",
|
||||||
|
# "rules"=>
|
||||||
|
# [{"metric"=>"rabbitmq_messages",
|
||||||
|
# "relational_operator"=>">=",
|
||||||
|
# "threshold"=>200,
|
||||||
|
# "window"=>120,
|
||||||
|
# "periods"=>0,
|
||||||
|
# "function"=>"avg"}]}},
|
||||||
|
# {"name"=>"apache-warning",
|
||||||
|
# "description"=>"",
|
||||||
|
# "severity"=>"warning",
|
||||||
|
# "trigger"=>
|
||||||
|
# {"logical_operator"=>"or",
|
||||||
|
# "rules"=>
|
||||||
|
# [{"metric"=>"apache_idle_workers",
|
||||||
|
# "relational_operator"=>"=",
|
||||||
|
# "threshold"=>0,
|
||||||
|
# "window"=>60,
|
||||||
|
# "periods"=>0,
|
||||||
|
# "function"=>"min"},
|
||||||
|
# {"metric"=>"apache_status",
|
||||||
|
# "relational_operator"=>"=",
|
||||||
|
# "threshold"=>0,
|
||||||
|
# "window"=>60,
|
||||||
|
# "periods"=>0,
|
||||||
|
# "function"=>"min"}]}}
|
||||||
|
# ]
|
||||||
|
#
|
||||||
|
# ARG2: ["rabbitmq", "apache"]
|
||||||
|
#
|
||||||
|
# ARG3: type (node|service)
|
||||||
|
#
|
||||||
|
# Results -> {
|
||||||
|
# 'rabbitmq_queue' => {
|
||||||
|
# 'type' => 'service',
|
||||||
|
# 'cluster_name' => 'rabbitmq',
|
||||||
|
# 'logical_name' => 'queue',
|
||||||
|
# 'alarms' => ['rabbitmq-queue-warning'],
|
||||||
|
# 'alarms_definitions' => {...},
|
||||||
|
# 'message_matcher' => "Fields[name] == 'rabbitmq_messages'"
|
||||||
|
# },
|
||||||
|
# 'apache_worker' => {
|
||||||
|
# 'type' => 'service',
|
||||||
|
# 'cluster_name' => 'apache',
|
||||||
|
# 'logical_name' => 'worker',
|
||||||
|
# 'alarms' => ['apache-warning'],
|
||||||
|
# 'alarms_definitions' => {...},
|
||||||
|
# 'message_matcher' => "Fields[name] == 'apache_idle_workers' || Fields[name] == 'apache_status'"
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
|
||||||
|
module Puppet::Parser::Functions
|
||||||
|
newfunction(:get_afd_filters, :type => :rvalue) do |args|
|
||||||
|
|
||||||
|
cluster_alarms = args[0]
|
||||||
|
alarms_definitions = args[1]
|
||||||
|
cluster_names = args[2]
|
||||||
|
type = args[3]
|
||||||
|
afd_filters = {}
|
||||||
|
|
||||||
|
cluster_names.each do |cluster_name|
|
||||||
|
# find alarms that belongs to the cluster_name
|
||||||
|
cluster_alarms.each do |cluster_alarm|
|
||||||
|
cluster_alarm.each do |name, alarms_list|
|
||||||
|
if name == cluster_name
|
||||||
|
# We need to get the list of metrics associated to alarms
|
||||||
|
alarms_list.each do |alarm|
|
||||||
|
alarm.each do |alarm_name, alarm_list|
|
||||||
|
|
||||||
|
# Get the list of metrics associated to alarm_list to
|
||||||
|
# build the message matcher
|
||||||
|
metrics = [].to_set
|
||||||
|
alarm_list.each do |a_name|
|
||||||
|
alarms_definitions.each do |definition|
|
||||||
|
if definition['name'] == a_name
|
||||||
|
rules = definition['trigger']['rules']
|
||||||
|
rules.each do |r|
|
||||||
|
metrics.add(r['metric'])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
message_matcher = ""
|
||||||
|
metrics.each do |m|
|
||||||
|
if message_matcher.empty?
|
||||||
|
message_matcher = "Fields[name] == \'#{m}\'"
|
||||||
|
else
|
||||||
|
message_matcher = message_matcher + " || Fields[name] == \'#{m}\'"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
afd_filters["#{name}_#{alarm_name}"] = {
|
||||||
|
'type' => type,
|
||||||
|
'cluster_name' => cluster_name,
|
||||||
|
'logical_name' => alarm_name,
|
||||||
|
'alarms' => alarm_list,
|
||||||
|
'alarms_definitions' => alarms_definitions,
|
||||||
|
'message_matcher' => message_matcher
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return afd_filters
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,64 @@
|
||||||
|
# Copyright 2015 Mirantis, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# This returns an array that contains the list of services or nodes related
|
||||||
|
# to a role.
|
||||||
|
#
|
||||||
|
# ARG0: An array of hash table that contains relation between node/service and
|
||||||
|
# roles.
|
||||||
|
# ARG1: An array of roles
|
||||||
|
#
|
||||||
|
# Ex:
|
||||||
|
#
|
||||||
|
# ARG0:
|
||||||
|
# [{"controller"=>["primary-controller", "controller"]},
|
||||||
|
# {"compute"=>["compute"]},
|
||||||
|
# {"storage"=>["cinder", "ceph-osd"]},
|
||||||
|
# {"influxdb"=>["influxdb-grafana"]}]
|
||||||
|
#
|
||||||
|
# ARG1: ['primary-controller']
|
||||||
|
#
|
||||||
|
# Results -> ['controller']
|
||||||
|
#
|
||||||
|
|
||||||
|
module Puppet::Parser::Functions
|
||||||
|
newfunction(:get_cluster_names, :type => :rvalue) do |args|
|
||||||
|
|
||||||
|
data = args[0]
|
||||||
|
roles = args[1]
|
||||||
|
|
||||||
|
raise Puppet::ParseError, "data passed to get_cluster_names is not a list" unless data.is_a?(Array)
|
||||||
|
raise Puppet::ParseError, "roles passed to get_cluster_names is not a list" unless roles.is_a?(Array)
|
||||||
|
|
||||||
|
cluster_names = [].to_set
|
||||||
|
has_default = false
|
||||||
|
|
||||||
|
roles.each do |role|
|
||||||
|
data.each do |v|
|
||||||
|
v.each { |name, t|
|
||||||
|
cluster_names.add(name) if t.include?(role)
|
||||||
|
has_default = (name == 'default')
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# if cluster_names["node"] is empty, it means that we didn't find a cluster
|
||||||
|
# name that matches with role. So add "default" name if there is a default
|
||||||
|
# value
|
||||||
|
cluster_names.add("default") if cluster_names.empty? and has_default
|
||||||
|
end
|
||||||
|
|
||||||
|
return cluster_names.to_a()
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,56 @@
|
||||||
|
# Copyright 2015 Mirantis, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
define lma_collector::afd_filter (
|
||||||
|
$type,
|
||||||
|
$cluster_name,
|
||||||
|
$logical_name,
|
||||||
|
$alarms,
|
||||||
|
$alarms_definitions,
|
||||||
|
$message_matcher,
|
||||||
|
) {
|
||||||
|
include lma_collector::params
|
||||||
|
include lma_collector::service
|
||||||
|
include heka::params
|
||||||
|
|
||||||
|
$alarms_dir = $heka::params::lua_modules_dir
|
||||||
|
$afd_file = "${alarms_dir}/lma_alarms_${name}.lua"
|
||||||
|
|
||||||
|
|
||||||
|
# Create the Lua structures that describe alarms
|
||||||
|
file { $afd_file:
|
||||||
|
ensure => present,
|
||||||
|
content => template('lma_collector/lma_alarms.erb'),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create the confguration file for Heka
|
||||||
|
heka::filter::sandbox { "afd_${type}_${cluster_name}_${logical_name}":
|
||||||
|
config_dir => $lma_collector::params::config_dir,
|
||||||
|
filename => "${lma_collector::params::plugins_dir}/filters/afd.lua",
|
||||||
|
message_matcher => "(Type == \'metric\' || Type == \'heka.sandbox.metric\') && (${message_matcher})",
|
||||||
|
ticker_interval => 10,
|
||||||
|
config => {
|
||||||
|
hostname => $::hostname,
|
||||||
|
afd_type => $type,
|
||||||
|
afd_file => "lma_alarms_${name}",
|
||||||
|
afd_cluster_name => $cluster_name,
|
||||||
|
afd_logical_name => $logical_name,
|
||||||
|
},
|
||||||
|
require => File[$afd_file],
|
||||||
|
notify => Class['lma_collector::service'],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
# Copyright 2015 Mirantis, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
class lma_collector::afds (
|
||||||
|
$roles = undef,
|
||||||
|
$node_cluster_roles = undef,
|
||||||
|
$service_cluster_roles = undef,
|
||||||
|
$node_cluster_alarms = undef,
|
||||||
|
$service_cluster_alarms = undef,
|
||||||
|
$alarms = undef,
|
||||||
|
){
|
||||||
|
|
||||||
|
validate_array($roles)
|
||||||
|
validate_array($node_cluster_roles)
|
||||||
|
validate_array($service_cluster_roles)
|
||||||
|
validate_array($node_cluster_alarms)
|
||||||
|
validate_array($service_cluster_alarms)
|
||||||
|
validate_array($alarms)
|
||||||
|
|
||||||
|
$node_cluster_names = get_cluster_names($node_cluster_roles, $roles)
|
||||||
|
$service_cluster_names = get_cluster_names($service_cluster_roles, $roles)
|
||||||
|
|
||||||
|
$node_afd_filters = get_afd_filters($node_cluster_alarms,
|
||||||
|
$alarms,
|
||||||
|
$node_cluster_names,
|
||||||
|
'node')
|
||||||
|
|
||||||
|
$service_afd_filters = get_afd_filters($service_cluster_alarms,
|
||||||
|
$alarms,
|
||||||
|
$service_cluster_names,
|
||||||
|
'service')
|
||||||
|
|
||||||
|
create_resources(lma_collector::afd_filter, $node_afd_filters)
|
||||||
|
create_resources(lma_collector::afd_filter, $service_afd_filters)
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
# Copyright 2015 Mirantis, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
require 'spec_helper'
|
||||||
|
|
||||||
|
describe 'lma_collector::afds' do
|
||||||
|
let(:facts) do
|
||||||
|
{:kernel => 'Linux', :operatingsystem => 'Ubuntu',
|
||||||
|
:osfamily => 'Debian'}
|
||||||
|
end
|
||||||
|
|
||||||
|
describe 'with defaults' do
|
||||||
|
let(:params) do
|
||||||
|
{:roles => ['primary-controller'],
|
||||||
|
:node_cluster_roles => [{'controller' => ['primary-controller']}],
|
||||||
|
:service_cluster_roles => [{'mysql' => ['primary-controller']}],
|
||||||
|
:node_cluster_alarms => [{'controller' => [{'cpu' => ['cpu_warning']}]}],
|
||||||
|
:service_cluster_alarms => [{'mysql' => [{'all' => ['db_warning']}]}],
|
||||||
|
:alarms => [
|
||||||
|
{"name"=>"cpu_warning",
|
||||||
|
"description"=>"Fake alarm",
|
||||||
|
"severity"=>"warning",
|
||||||
|
"trigger"=>
|
||||||
|
{"logical_operator"=>"or",
|
||||||
|
"rules"=>
|
||||||
|
[{"metric"=>"fake_cpu",
|
||||||
|
"relational_operator"=>">=",
|
||||||
|
"threshold"=>200,
|
||||||
|
"window"=>120,
|
||||||
|
"periods"=>0,
|
||||||
|
"function"=>"avg"}]}},
|
||||||
|
{"name"=>"db_warning",
|
||||||
|
"description"=>"Fake alarm",
|
||||||
|
"severity"=>"warning",
|
||||||
|
"trigger"=>
|
||||||
|
{"logical_operator"=>"or",
|
||||||
|
"rules"=>
|
||||||
|
[{"metric"=>"db-warning",
|
||||||
|
"relational_operator"=>">=",
|
||||||
|
"threshold"=>200,
|
||||||
|
"window"=>120,
|
||||||
|
"periods"=>0,
|
||||||
|
"function"=>"avg"}]}}]}
|
||||||
|
end
|
||||||
|
|
||||||
|
it { is_expected.to contain_heka__filter__sandbox('afd_node_controller_cpu') }
|
||||||
|
it { is_expected.to contain_file('/usr/share/heka/lua_modules/lma_alarms_controller_cpu.lua') }
|
||||||
|
|
||||||
|
it { is_expected.to contain_heka__filter__sandbox('afd_service_mysql_all') }
|
||||||
|
it { is_expected.to contain_file('/usr/share/heka/lua_modules/lma_alarms_mysql_all.lua') }
|
||||||
|
end
|
||||||
|
end
|
|
@ -32,7 +32,7 @@ lma_collector:
|
||||||
periods: 0
|
periods: 0
|
||||||
function: avg
|
function: avg
|
||||||
- metric: cpu_wait
|
- metric: cpu_wait
|
||||||
relational_operator: '<='
|
relational_operator: '>='
|
||||||
threshold: 25
|
threshold: 25
|
||||||
window: 120
|
window: 120
|
||||||
periods: 0
|
periods: 0
|
||||||
|
@ -46,11 +46,11 @@ lma_collector:
|
||||||
- metric: cpu_wait
|
- metric: cpu_wait
|
||||||
relational_operator: '>='
|
relational_operator: '>='
|
||||||
threshold: 30
|
threshold: 30
|
||||||
window: 300
|
window: 120
|
||||||
periods: 0
|
periods: 0
|
||||||
function: avg
|
function: avg
|
||||||
- name: 'cpu-warning-compute'
|
- name: 'cpu-warning-compute'
|
||||||
description: 'CPU critical on compute'
|
description: 'CPU warning on compute'
|
||||||
severity: 'warning'
|
severity: 'warning'
|
||||||
trigger:
|
trigger:
|
||||||
logical_operator: 'or'
|
logical_operator: 'or'
|
||||||
|
@ -164,7 +164,7 @@ lma_collector:
|
||||||
periods: 0
|
periods: 0
|
||||||
function: min
|
function: min
|
||||||
- name: 'rabbitmq-queue-warning'
|
- name: 'rabbitmq-queue-warning'
|
||||||
description: 'Number of message in queues too high'
|
description: 'Number of messages sitting in queues is too high'
|
||||||
severity: 'warning'
|
severity: 'warning'
|
||||||
trigger:
|
trigger:
|
||||||
logical_operator: 'or'
|
logical_operator: 'or'
|
||||||
|
@ -224,7 +224,7 @@ lma_collector:
|
||||||
periods: 0
|
periods: 0
|
||||||
function: avg
|
function: avg
|
||||||
- name: 'fs-warning'
|
- name: 'fs-warning'
|
||||||
description: 'Filesystem usage'
|
description: 'Filesystem free space is low'
|
||||||
severity: 'warning'
|
severity: 'warning'
|
||||||
trigger:
|
trigger:
|
||||||
rules:
|
rules:
|
||||||
|
@ -235,9 +235,9 @@ lma_collector:
|
||||||
threshold: 5
|
threshold: 5
|
||||||
window: 60
|
window: 60
|
||||||
periods: 0
|
periods: 0
|
||||||
function: avg
|
function: min
|
||||||
- name: 'fs-warning-storage'
|
- name: 'fs-warning-storage'
|
||||||
description: 'Filesystem usage'
|
description: 'Filesystem free space is low'
|
||||||
severity: 'warning'
|
severity: 'warning'
|
||||||
trigger:
|
trigger:
|
||||||
rules:
|
rules:
|
||||||
|
@ -248,9 +248,9 @@ lma_collector:
|
||||||
threshold: 10
|
threshold: 10
|
||||||
window: 60
|
window: 60
|
||||||
periods: 0
|
periods: 0
|
||||||
function: avg
|
function: min
|
||||||
- name: 'fs-critical-storage'
|
- name: 'fs-critical-storage'
|
||||||
description: 'Filesystem usage'
|
description: 'Filesystem free space is low'
|
||||||
severity: 'critical'
|
severity: 'critical'
|
||||||
trigger:
|
trigger:
|
||||||
rules:
|
rules:
|
||||||
|
@ -261,9 +261,9 @@ lma_collector:
|
||||||
threshold: 5
|
threshold: 5
|
||||||
window: 60
|
window: 60
|
||||||
periods: 0
|
periods: 0
|
||||||
function: avg
|
function: min
|
||||||
- name: 'fs-critical-compute'
|
- name: 'fs-critical-compute'
|
||||||
description: 'Filesystem usage'
|
description: 'Filesystem free space is low'
|
||||||
severity: 'critical'
|
severity: 'critical'
|
||||||
trigger:
|
trigger:
|
||||||
rules:
|
rules:
|
||||||
|
@ -274,9 +274,9 @@ lma_collector:
|
||||||
threshold: 7
|
threshold: 7
|
||||||
window: 60
|
window: 60
|
||||||
periods: 0
|
periods: 0
|
||||||
function: avg
|
function: min
|
||||||
- name: 'fs-critical'
|
- name: 'fs-critical'
|
||||||
description: 'Filesystem usage'
|
description: 'Filesystem free space is low'
|
||||||
severity: 'critical'
|
severity: 'critical'
|
||||||
trigger:
|
trigger:
|
||||||
rules:
|
rules:
|
||||||
|
@ -285,9 +285,9 @@ lma_collector:
|
||||||
fs: '*'
|
fs: '*'
|
||||||
relational_operator: '<'
|
relational_operator: '<'
|
||||||
threshold: 2
|
threshold: 2
|
||||||
window: 30
|
window: 60
|
||||||
periods: 0
|
periods: 0
|
||||||
function: avg
|
function: min
|
||||||
node_cluster_roles:
|
node_cluster_roles:
|
||||||
- controller: ['primary-controller', 'controller']
|
- controller: ['primary-controller', 'controller']
|
||||||
- compute: ['compute']
|
- compute: ['compute']
|
||||||
|
@ -302,16 +302,16 @@ lma_collector:
|
||||||
node_cluster_alarms:
|
node_cluster_alarms:
|
||||||
- controller:
|
- controller:
|
||||||
- system: ['cpu-critical-controller', 'cpu-warning-controller']
|
- system: ['cpu-critical-controller', 'cpu-warning-controller']
|
||||||
- fs: ['fs-warning', 'fs-critical']
|
- fs: ['fs-critical', 'fs-warning']
|
||||||
- compute:
|
- compute:
|
||||||
- system: ['cpu-critical-compute', 'cpu-warning-compute']
|
- system: ['cpu-critical-compute', 'cpu-warning-compute']
|
||||||
- fs: ['fs-warning', 'fs-critical-compute']
|
- fs: ['fs-critical', 'fs-critical-compute', 'fs-warning']
|
||||||
- storage:
|
- storage:
|
||||||
- system: ['cpu-critical-storage', 'cpu-warning-storage']
|
- system: ['cpu-critical-storage', 'cpu-warning-storage']
|
||||||
- fs: ['fs-warning-storage', 'fs-critical-storage']
|
- fs: ['fs-critical-storage', 'fs-warning-storage']
|
||||||
- _default:
|
- default:
|
||||||
- cpu: ['cpu-critical-default']
|
- cpu: ['cpu-critical-default']
|
||||||
- fs: ['fs-warning', 'fs-critical']
|
- fs: ['fs-critical', 'fs-warning']
|
||||||
service_cluster_alarms:
|
service_cluster_alarms:
|
||||||
- rabbitmq:
|
- rabbitmq:
|
||||||
- queue: ['rabbitmq-queue-warning']
|
- queue: ['rabbitmq-queue-warning']
|
||||||
|
|
|
@ -127,7 +127,7 @@ lma_collector:
|
||||||
- afd_node_metric
|
- afd_node_metric
|
||||||
aggregator_flag: true
|
aggregator_flag: true
|
||||||
# the field in the input messages to identify the cluster
|
# the field in the input messages to identify the cluster
|
||||||
cluster_field: hostname
|
cluster_field: node_role
|
||||||
# the field in the input messages to identify the cluster's member
|
# the field in the input messages to identify the cluster's member
|
||||||
member_field: source
|
member_field: source
|
||||||
output_message_type: gse_node_cluster_metric
|
output_message_type: gse_node_cluster_metric
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
local M = {}
|
||||||
|
setfenv(1, M) -- Remove external access to contain everything in the module
|
||||||
|
|
||||||
|
local alarms = {
|
||||||
|
<% @alarms.each do |alarm_name| -%>
|
||||||
|
<% @alarms_definitions.each do |alarm| -%>
|
||||||
|
<% if alarm_name == alarm["name"] -%>
|
||||||
|
{
|
||||||
|
['name'] = '<%= alarm_name %>',
|
||||||
|
['description'] = '<%= alarm["description"] %>',
|
||||||
|
['severity'] = '<%= alarm["severity"] %>',
|
||||||
|
['trigger'] = {
|
||||||
|
['logical_operator'] = '<%= alarm["trigger"]["logical_operator"] || 'or' %>',
|
||||||
|
['rules'] = {
|
||||||
|
<% alarm["trigger"]["rules"].each do |rule| -%>
|
||||||
|
{
|
||||||
|
['metric'] = '<%= rule["metric"] %>',
|
||||||
|
['fields'] = {
|
||||||
|
<% (rule["fields"] || []).each do |k, v| -%>
|
||||||
|
['<%= k %>'] = '<%= v %>',
|
||||||
|
<% end -%>
|
||||||
|
},
|
||||||
|
['relational_operator'] = '<%= rule["relational_operator"] %>',
|
||||||
|
['threshold'] = '<%= rule["threshold"] %>',
|
||||||
|
['window'] = '<%= rule["window"] %>',
|
||||||
|
['periods'] = '<%= rule["periods"] || 0 %>',
|
||||||
|
['function'] = '<%= rule["function"] %>',
|
||||||
|
},
|
||||||
|
<% end -%>
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
<% end -%>
|
||||||
|
<% end -%>
|
||||||
|
<% end -%>
|
||||||
|
}
|
||||||
|
|
||||||
|
return alarms
|
|
@ -76,6 +76,14 @@
|
||||||
puppet_modules: puppet/modules
|
puppet_modules: puppet/modules
|
||||||
timeout: 600
|
timeout: 600
|
||||||
|
|
||||||
|
- role: '*'
|
||||||
|
stage: post_deployment/8200
|
||||||
|
type: puppet
|
||||||
|
parameters:
|
||||||
|
puppet_manifest: puppet/manifests/configure_afd_filters.pp
|
||||||
|
puppet_modules: puppet/modules
|
||||||
|
timeout: 600
|
||||||
|
|
||||||
- role: '*'
|
- role: '*'
|
||||||
stage: post_deployment/8200
|
stage: post_deployment/8200
|
||||||
type: puppet
|
type: puppet
|
||||||
|
|
Loading…
Reference in New Issue