Support alerting attribute per AFD
Change-Id: I29aba65d35a12cc56a91c10f893e38a35ea3abf9
This commit is contained in:
parent
84defe6131
commit
731265cdc8
|
@ -24,10 +24,10 @@
|
|||
# Ex:
|
||||
#
|
||||
# ARG0:
|
||||
# {"rabbitmq"=>{"apply_to_node" => "controller", "alarms" => {"queue"=>["rabbitmq-queue-warning"]}},
|
||||
# "apache"=>{"apply_to_node" => "controller", "alarms" => {"worker"=>["apache-warning"]}},
|
||||
# "memcached"=>{"apply_to_node"=>"controller", "alarms" => {"all"=>["memcached-warning"]}},
|
||||
# "haproxy"=>{"apply_to_node" => "controller", "alarms" => {"alive"=>["haproxy-warning"]}}}
|
||||
# {"rabbitmq"=>{"apply_to_node" => "controller", "members" => {"queue"=> {"alarms" => ["rabbitmq-queue-warning"]}}},
|
||||
# "apache"=>{"apply_to_node" => "controller", "members" => {"worker"=> {"alarms" => ["apache-warning"]}}},
|
||||
# "memcached"=>{"apply_to_node"=>"controller", "members" => {"all"=> {"alarms" => ["memcached-warning"]}}},
|
||||
# "haproxy"=>{"apply_to_node" => "controller", "members" => {"alive"=> {"alarms" => ["haproxy-warning"]}}}}
|
||||
#
|
||||
# ARG1:
|
||||
#
|
||||
|
@ -114,28 +114,30 @@ module Puppet::Parser::Functions
|
|||
default_profile = false
|
||||
end
|
||||
|
||||
activate_alerting=true
|
||||
enable_notification=false
|
||||
default_activate_alerting=true
|
||||
default_enable_notification=false
|
||||
if afds.has_key?('alerting')
|
||||
if afds['alerting'] == 'disabled'
|
||||
activate_alerting=false
|
||||
default_activate_alerting=false
|
||||
elsif afds['alerting'] == 'enabled_with_notification'
|
||||
enable_notification = true
|
||||
default_enable_notification = true
|
||||
end
|
||||
end
|
||||
afds['alarms'].each do |afd_name, alarms|
|
||||
afds['members'].each do |afd_name, alarms|
|
||||
metrics = Set.new([])
|
||||
matches = false
|
||||
alarms.each do |a_name|
|
||||
activate_alerting = default_activate_alerting
|
||||
enable_notification = default_enable_notification
|
||||
if alarms.has_key?('alerting')
|
||||
if alarms['alerting'] == 'disabled'
|
||||
activate_alerting=false
|
||||
elsif alarms['alerting'] == 'enabled_with_notification'
|
||||
enable_notification = true
|
||||
end
|
||||
end
|
||||
alarms['alarms'].each do |a_name|
|
||||
afd = alarm_definitions.select {|defi| defi['name'] == a_name}
|
||||
next if afd.empty? # user mention an unknown alarm for this AFD
|
||||
#if afd[0].has_key('alerting')
|
||||
# if afd[0]['alerting'] == 'disabled'
|
||||
# activate_alerting=false
|
||||
# elsif afd[0]['alerting'] == 'enabled_with_notification'
|
||||
# enable_notification = true
|
||||
# end
|
||||
#end
|
||||
|
||||
afd[0]['trigger']['rules'].each do |r|
|
||||
if metric_defs.has_key?(r['metric']) and metric_defs[r['metric']].has_key?('collected_on') and afd_profiles.include? metric_defs[r['metric']]['collected_on']
|
||||
|
@ -154,7 +156,7 @@ module Puppet::Parser::Functions
|
|||
'type' => type,
|
||||
'cluster_name' => cluster_name,
|
||||
'logical_name' => afd_name,
|
||||
'alarms' => alarms,
|
||||
'alarms' => alarms['alarms'],
|
||||
'alarms_definitions' => alarm_definitions,
|
||||
'message_matcher' => message_matcher,
|
||||
'activate_alerting' => activate_alerting,
|
||||
|
|
|
@ -27,16 +27,20 @@ describe 'fuel_lma_collector::afds' do
|
|||
'controller' =>
|
||||
{
|
||||
'apply_to_node' => 'controller',
|
||||
'alarms' => {
|
||||
'cpu' => ['cpu_warning']
|
||||
'members' => {
|
||||
'cpu' => {
|
||||
"alarms" => ['cpu_warning']
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
:service_cluster_alarms => {
|
||||
:service_cluster_alarms=> {
|
||||
'mysql' => {
|
||||
'apply_to_node' => 'controller',
|
||||
'alarms' => {
|
||||
'all' => ['db_warning']
|
||||
'members' => {
|
||||
'all' => {
|
||||
"alarms" => ['db_warning']
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -81,8 +85,10 @@ describe 'fuel_lma_collector::afds' do
|
|||
:node_cluster_alarms => {
|
||||
'controller' => {
|
||||
'apply_to_node' => 'controller',
|
||||
'alarms' => {
|
||||
'cpu' => ['cpu_warning']
|
||||
'members' => {
|
||||
'cpu' => {
|
||||
"alarms" => ['cpu_warning']
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -115,8 +121,10 @@ describe 'fuel_lma_collector::afds' do
|
|||
'others' =>
|
||||
{
|
||||
'apply_to_node' => 'default',
|
||||
'alarms' => {
|
||||
'cpu' => ['cpu_warning']
|
||||
'members' => {
|
||||
'cpu' => {
|
||||
"alarms" => ['cpu_warning']
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
|
@ -104,17 +104,27 @@ describe 'get_afd_filters' do
|
|||
afds_nodes = {
|
||||
"controller" => {
|
||||
"apply_to_node" => "controller",
|
||||
"alerting" => 'enabled_with_notification',
|
||||
"alarms" => {
|
||||
"system" => ["cpu-critical-controller", "cpu-warning-controller"],
|
||||
"alerting" => 'enabled',
|
||||
"members" => {
|
||||
"system" => {
|
||||
"alerting" => 'enabled_with_notification',
|
||||
"alarms" => ["cpu-critical-controller", "cpu-warning-controller"],
|
||||
},
|
||||
"foo" => {
|
||||
"alarms" => ["cpu-critical-controller", "cpu-warning-controller"],
|
||||
}
|
||||
},
|
||||
},
|
||||
"compute" => {
|
||||
"apply_to_node" => "compute",
|
||||
"alerting" => 'enabled_with_notification',
|
||||
"alarms" => {
|
||||
"system" => ["cpu-critical-compute", "cpu-warning-compute"],
|
||||
"fs" => ["fs-critical"],
|
||||
"members" => {
|
||||
"system" => {
|
||||
"alarms" => ["cpu-critical-compute", "cpu-warning-compute"],
|
||||
},
|
||||
"fs" => {
|
||||
"alarms" => ["fs-critical"],
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -131,6 +141,16 @@ describe 'get_afd_filters' do
|
|||
"message_matcher"=>"Fields[name] == 'cpu_idle' || Fields[name] == 'cpu_wait'",
|
||||
"enable_notification" => true,
|
||||
"activate_alerting" => true,
|
||||
},
|
||||
"controller_foo"=>
|
||||
{"type"=>"node",
|
||||
"cluster_name"=>"controller",
|
||||
"logical_name"=>"foo",
|
||||
"alarms"=>["cpu-critical-controller", "cpu-warning-controller"],
|
||||
"alarms_definitions"=> alarms_nodes,
|
||||
"message_matcher"=>"Fields[name] == 'cpu_idle' || Fields[name] == 'cpu_wait'",
|
||||
"enable_notification" => false,
|
||||
"activate_alerting" => true,
|
||||
}
|
||||
})
|
||||
|
||||
|
@ -194,6 +214,16 @@ describe 'get_afd_filters' do
|
|||
"message_matcher"=>"Fields[name] == 'cpu_idle' || Fields[name] == 'cpu_wait'",
|
||||
"activate_alerting" => true,
|
||||
"enable_notification" => true,
|
||||
},
|
||||
"controller_foo"=>
|
||||
{"type"=>"node",
|
||||
"cluster_name"=>"controller",
|
||||
"logical_name"=>"foo",
|
||||
"alarms"=>["cpu-critical-controller", "cpu-warning-controller"],
|
||||
"alarms_definitions"=> alarms_nodes,
|
||||
"message_matcher"=>"Fields[name] == 'cpu_idle' || Fields[name] == 'cpu_wait'",
|
||||
"enable_notification" => false,
|
||||
"activate_alerting" => true,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -235,15 +265,19 @@ describe 'get_afd_filters' do
|
|||
"rabbitmq" => {
|
||||
"apply_to_node" => "controller",
|
||||
"alerting" => 'enabled',
|
||||
"alarms" => {
|
||||
"queue" => ["rabbitmq-queue-warning"]
|
||||
"members" => {
|
||||
"queue" => {
|
||||
"alarms" => ["rabbitmq-queue-warning"]
|
||||
}
|
||||
},
|
||||
},
|
||||
"apache" => {
|
||||
"apply_to_node" => "controller",
|
||||
"alerting" => 'enabled',
|
||||
"alarms" => {
|
||||
"worker" => ['apache-warning'],
|
||||
"members" => {
|
||||
"worker" => {
|
||||
"alarms" => ['apache-warning'],
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -339,21 +373,28 @@ describe 'get_afd_filters' do
|
|||
"nova-free-resources" => {
|
||||
"apply_to_node" => "compute",
|
||||
"alerting" => 'enabled',
|
||||
"alarms" => {
|
||||
"free-vcpu" => ['free_vcpu_warning'],
|
||||
"members" => {
|
||||
"free-vcpu" => {
|
||||
"alerting" => 'disabled',
|
||||
"alarms" => ['free_vcpu_warning'],
|
||||
}
|
||||
},
|
||||
},
|
||||
"nova-total-free-resources" => {
|
||||
"alerting" => 'enabled',
|
||||
"alarms" => {
|
||||
"total-free-vcpu" => ['total_free_vcpu_warning'],
|
||||
"members" => {
|
||||
"total-free-vcpu" => {
|
||||
"alarms" => ['total_free_vcpu_warning'],
|
||||
}
|
||||
},
|
||||
},
|
||||
"controller" => {
|
||||
"apply_to_node" => "controller",
|
||||
"alerting" => 'enabled_with_notification',
|
||||
"alarms" => {
|
||||
"system" => ["cpu-critical-controller", "cpu-warning-controller"],
|
||||
"members" => {
|
||||
"system" => {
|
||||
"alarms" => ["cpu-critical-controller", "cpu-warning-controller"],
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -376,7 +417,7 @@ describe 'get_afd_filters' do
|
|||
"alarms_definitions"=> alarms_services_o,
|
||||
"alarms"=>["free_vcpu_warning"],
|
||||
"message_matcher"=>"Fields[name] == 'free_vcpu'",
|
||||
"activate_alerting" => true,
|
||||
"activate_alerting" => false,
|
||||
"enable_notification" => false,
|
||||
},
|
||||
"nova-total-free-resources_total-free-vcpu"=>
|
||||
|
|
|
@ -2915,459 +2915,589 @@ lma_collector:
|
|||
controller:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
cpu: ['cpu-critical-controller', 'cpu-warning-controller']
|
||||
network-rx: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs: ['root-fs-critical', 'root-fs-warning']
|
||||
log-fs: ['log-fs-critical', 'log-fs-warning']
|
||||
other-fs: ['other-fs-critical', 'other-fs-warning']
|
||||
swap: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors: ['hdd-errors-critical']
|
||||
members:
|
||||
cpu:
|
||||
alarms: ['cpu-critical-controller', 'cpu-warning-controller']
|
||||
network-rx:
|
||||
alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx:
|
||||
alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs:
|
||||
alarms: ['root-fs-critical', 'root-fs-warning']
|
||||
log-fs:
|
||||
alarms: ['log-fs-critical', 'log-fs-warning']
|
||||
other-fs:
|
||||
alarms: ['other-fs-critical', 'other-fs-warning']
|
||||
swap:
|
||||
alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors:
|
||||
alarms: ['hdd-errors-critical']
|
||||
<% if @detach_rabbitmq_enabled -%>
|
||||
rabbitmq-nodes:
|
||||
apply_to_node: rabbitmq-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
cpu: ['cpu-critical-rabbitmq', 'cpu-warning-rabbitmq']
|
||||
network-rx: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs: ['root-fs-critical', 'root-fs-warning']
|
||||
other-fs: ['other-fs-critical', 'other-fs-warning']
|
||||
swap: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors: ['hdd-errors-critical']
|
||||
members:
|
||||
cpu:
|
||||
alarms: ['cpu-critical-rabbitmq', 'cpu-warning-rabbitmq']
|
||||
network-rx:
|
||||
alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx:
|
||||
alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs:
|
||||
alarms: ['root-fs-critical', 'root-fs-warning']
|
||||
other-fs:
|
||||
alarms: ['other-fs-critical', 'other-fs-warning']
|
||||
swap:
|
||||
alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors:
|
||||
alarms: ['hdd-errors-critical']
|
||||
<% end -%>
|
||||
mysql-nodes:
|
||||
apply_to_node: mysql-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
<% if @detach_database_enabled -%>
|
||||
cpu: ['cpu-critical-mysql', 'cpu-warning-mysql']
|
||||
network-rx: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs: ['root-fs-critical', 'root-fs-warning']
|
||||
other-fs: ['other-fs-critical', 'other-fs-warning']
|
||||
swap: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors: ['hdd-errors-critical']
|
||||
cpu:
|
||||
alarms: ['cpu-critical-mysql', 'cpu-warning-mysql']
|
||||
network-rx:
|
||||
alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx:
|
||||
alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs:
|
||||
alarms: ['root-fs-critical', 'root-fs-warning']
|
||||
other-fs:
|
||||
alarms: ['other-fs-critical', 'other-fs-warning']
|
||||
swap:
|
||||
alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors:
|
||||
alarms: ['hdd-errors-critical']
|
||||
<% end -%>
|
||||
mysql-fs: ['mysql-fs-critical', 'mysql-fs-warning']
|
||||
mysql-fs:
|
||||
alarms: ['mysql-fs-critical', 'mysql-fs-warning']
|
||||
compute:
|
||||
apply_to_node: compute
|
||||
alerting: enabled
|
||||
alarms:
|
||||
cpu: ['cpu-critical-compute', 'cpu-warning-compute']
|
||||
network-rx: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs: ['root-fs-critical', 'root-fs-warning']
|
||||
nova-fs: ['nova-fs-critical', 'nova-fs-warning']
|
||||
other-fs: ['other-fs-critical', 'other-fs-warning']
|
||||
swap: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors: ['hdd-errors-critical']
|
||||
members:
|
||||
cpu:
|
||||
alarms: ['cpu-critical-compute', 'cpu-warning-compute']
|
||||
network-rx:
|
||||
alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx:
|
||||
alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs:
|
||||
alarms: ['root-fs-critical', 'root-fs-warning']
|
||||
nova-fs:
|
||||
alarms: ['nova-fs-critical', 'nova-fs-warning']
|
||||
other-fs:
|
||||
alarms: ['other-fs-critical', 'other-fs-warning']
|
||||
swap:
|
||||
alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors:
|
||||
alarms: ['hdd-errors-critical']
|
||||
storage:
|
||||
apply_to_node: storage
|
||||
alerting: enabled
|
||||
alarms:
|
||||
cpu: ['cpu-critical-storage', 'cpu-warning-storage']
|
||||
network-rx: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs: ['root-fs-critical', 'root-fs-warning']
|
||||
other-fs: ['other-fs-critical', 'other-fs-warning']
|
||||
swap: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors: ['hdd-errors-critical']
|
||||
members:
|
||||
cpu:
|
||||
alarms: ['cpu-critical-storage', 'cpu-warning-storage']
|
||||
network-rx:
|
||||
alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx:
|
||||
alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs:
|
||||
alarms: ['root-fs-critical', 'root-fs-warning']
|
||||
other-fs:
|
||||
alarms: ['other-fs-critical', 'other-fs-warning']
|
||||
swap:
|
||||
alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors:
|
||||
alarms: ['hdd-errors-critical']
|
||||
<% if @storage_options["volumes_ceph"] then -%>
|
||||
osd-disk: ['osd-disk-critical']
|
||||
osd-disk:
|
||||
alarms: ['osd-disk-critical']
|
||||
<% end -%>
|
||||
elasticsearch-nodes:
|
||||
apply_to_node: elasticsearch-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
cpu: ['cpu-critical-default']
|
||||
network-rx: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs: ['root-fs-critical', 'root-fs-warning']
|
||||
data-fs: ['elasticsearch-fs-critical', 'elasticsearch-fs-warning']
|
||||
swap: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors: ['hdd-errors-critical']
|
||||
members:
|
||||
cpu:
|
||||
alarms: ['cpu-critical-default']
|
||||
network-rx:
|
||||
alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx:
|
||||
alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs:
|
||||
alarms: ['root-fs-critical', 'root-fs-warning']
|
||||
data-fs:
|
||||
alarms: ['elasticsearch-fs-critical', 'elasticsearch-fs-warning']
|
||||
swap:
|
||||
alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors:
|
||||
alarms: ['hdd-errors-critical']
|
||||
influxdb-nodes:
|
||||
apply_to_node: influxdb-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
cpu: ['cpu-critical-default']
|
||||
network-rx: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs: ['root-fs-critical', 'root-fs-warning']
|
||||
data-fs: ['influxdb-fs-critical', 'influxdb-fs-warning']
|
||||
swap: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors: ['hdd-errors-critical']
|
||||
# This is the default alarms configured for all nodes with unknown roles
|
||||
members:
|
||||
cpu:
|
||||
alarms: ['cpu-critical-default']
|
||||
network-rx:
|
||||
alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx:
|
||||
alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs:
|
||||
alarms: ['root-fs-critical', 'root-fs-warning']
|
||||
data-fs:
|
||||
alarms: ['influxdb-fs-critical', 'influxdb-fs-warning']
|
||||
swap:
|
||||
alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors:
|
||||
alarms: ['hdd-errors-critical']
|
||||
# This is the default members configured for all nodes with unknown roles
|
||||
default:
|
||||
apply_to_node: default
|
||||
# Operator wants to receive alert notification for individual nodes
|
||||
alerting: enabled_with_notification
|
||||
alarms:
|
||||
cpu: ['cpu-critical-default']
|
||||
network-rx: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs: ['root-fs-critical', 'root-fs-warning']
|
||||
other-fs: ['other-fs-critical', 'other-fs-warning']
|
||||
swap: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors: ['hdd-errors-critical']
|
||||
members:
|
||||
cpu:
|
||||
alarms: ['cpu-critical-default']
|
||||
network-rx:
|
||||
alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
|
||||
network-tx:
|
||||
alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
|
||||
root-fs:
|
||||
alarms: ['root-fs-critical', 'root-fs-warning']
|
||||
other-fs:
|
||||
alarms: ['other-fs-critical', 'other-fs-warning']
|
||||
swap:
|
||||
alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
|
||||
hdd-errors:
|
||||
alarms: ['hdd-errors-critical']
|
||||
|
||||
# Definition of the AFD service filters
|
||||
service_cluster_alarms:
|
||||
rabbitmq-cluster:
|
||||
apply_to_node: rabbitmq-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
pacemaker: ['rabbitmq-pacemaker-down', 'rabbitmq-pacemaker-critical', 'rabbitmq-pacemaker-warning']
|
||||
queue: ['rabbitmq-queue-warning']
|
||||
memory: ['rabbitmq-memory-limit-critical', 'rabbitmq-memory-limit-warning']
|
||||
disk: ['rabbitmq-disk-limit-critical', 'rabbitmq-disk-limit-warning']
|
||||
members:
|
||||
pacemaker:
|
||||
alarms: ['rabbitmq-pacemaker-down', 'rabbitmq-pacemaker-critical', 'rabbitmq-pacemaker-warning']
|
||||
queue:
|
||||
alarms: ['rabbitmq-queue-warning']
|
||||
memory:
|
||||
alarms: ['rabbitmq-memory-limit-critical', 'rabbitmq-memory-limit-warning']
|
||||
disk:
|
||||
alarms: ['rabbitmq-disk-limit-critical', 'rabbitmq-disk-limit-warning']
|
||||
rabbitmq-service:
|
||||
apply_to_node: rabbitmq-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
check: ['rabbitmq-check']
|
||||
members:
|
||||
check:
|
||||
alarms: ['rabbitmq-check']
|
||||
mysql:
|
||||
apply_to_node: mysql-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
node-status: ['mysql-node-connected', 'mysql-node-ready']
|
||||
check: ['mysql-check']
|
||||
members:
|
||||
node-status:
|
||||
alarms: ['mysql-node-connected', 'mysql-node-ready']
|
||||
check:
|
||||
alarms: ['mysql-check']
|
||||
apache:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
worker: ['apache-warning']
|
||||
check: ['apache-check']
|
||||
members:
|
||||
worker:
|
||||
alarms: ['apache-warning']
|
||||
check:
|
||||
alarms: ['apache-check']
|
||||
nova-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
http_errors: ['nova-api-http-errors']
|
||||
members:
|
||||
http_errors:
|
||||
alarms: ['nova-api-http-errors']
|
||||
backends:
|
||||
- 'nova-api-backends-all-down'
|
||||
- 'nova-api-backends-majority-down'
|
||||
- 'nova-api-backends-one-down'
|
||||
alarms:
|
||||
- 'nova-api-backends-all-down'
|
||||
- 'nova-api-backends-majority-down'
|
||||
- 'nova-api-backends-one-down'
|
||||
nova-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['nova-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['nova-api-check-failed']
|
||||
nova-metadata-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
backends:
|
||||
- 'nova-metadata-api-backends-all-down'
|
||||
- 'nova-metadata-api-backends-majority-down'
|
||||
- 'nova-metadata-api-backends-one-down'
|
||||
alarms:
|
||||
- 'nova-metadata-api-backends-all-down'
|
||||
- 'nova-metadata-api-backends-majority-down'
|
||||
- 'nova-metadata-api-backends-one-down'
|
||||
nova-novncproxy-websocket:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
backends:
|
||||
- 'nova-novncproxy-websocket-api-backends-all-down'
|
||||
- 'nova-novncproxy-websocket-api-backends-majority-down'
|
||||
- 'nova-novncproxy-websocket-api-backends-one-down'
|
||||
alarms:
|
||||
- 'nova-novncproxy-websocket-api-backends-all-down'
|
||||
- 'nova-novncproxy-websocket-api-backends-majority-down'
|
||||
- 'nova-novncproxy-websocket-api-backends-one-down'
|
||||
nova-api-endpoint:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
endpoint: ['nova-api-local-endpoint']
|
||||
members:
|
||||
endpoint:
|
||||
alarms: ['nova-api-local-endpoint']
|
||||
nova-logs:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['nova-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['nova-logs-error']
|
||||
nova-logs-compute:
|
||||
apply_to_node: compute
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['nova-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['nova-logs-error']
|
||||
nova-cert:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'nova-cert-all-down'
|
||||
- 'nova-cert-majority-down'
|
||||
- 'nova-cert-one-down'
|
||||
alarms:
|
||||
- 'nova-cert-all-down'
|
||||
- 'nova-cert-majority-down'
|
||||
- 'nova-cert-one-down'
|
||||
nova-consoleauth:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'nova-consoleauth-all-down'
|
||||
- 'nova-consoleauth-majority-down'
|
||||
- 'nova-consoleauth-one-down'
|
||||
alarms:
|
||||
- 'nova-consoleauth-all-down'
|
||||
- 'nova-consoleauth-majority-down'
|
||||
- 'nova-consoleauth-one-down'
|
||||
nova-compute:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'nova-compute-all-down'
|
||||
- 'nova-compute-majority-down'
|
||||
- 'nova-compute-one-down'
|
||||
alarms:
|
||||
- 'nova-compute-all-down'
|
||||
- 'nova-compute-majority-down'
|
||||
- 'nova-compute-one-down'
|
||||
nova-conductor:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'nova-conductor-all-down'
|
||||
- 'nova-conductor-majority-down'
|
||||
- 'nova-conductor-one-down'
|
||||
alarms:
|
||||
- 'nova-conductor-all-down'
|
||||
- 'nova-conductor-majority-down'
|
||||
- 'nova-conductor-one-down'
|
||||
nova-scheduler:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'nova-scheduler-all-down'
|
||||
- 'nova-scheduler-majority-down'
|
||||
- 'nova-scheduler-one-down'
|
||||
alarms:
|
||||
- 'nova-scheduler-all-down'
|
||||
- 'nova-scheduler-majority-down'
|
||||
- 'nova-scheduler-one-down'
|
||||
heat-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
http_errors: ['heat-api-http-errors']
|
||||
members:
|
||||
http_errors:
|
||||
alarms: ['heat-api-http-errors']
|
||||
backends:
|
||||
- 'heat-api-backends-all-down'
|
||||
- 'heat-api-backends-majority-down'
|
||||
- 'heat-api-backends-one-down'
|
||||
alarms:
|
||||
- 'heat-api-backends-all-down'
|
||||
- 'heat-api-backends-majority-down'
|
||||
- 'heat-api-backends-one-down'
|
||||
heat-cfn-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
backends:
|
||||
- 'heat-cfn-api-backends-all-down'
|
||||
- 'heat-cfn-api-backends-majority-down'
|
||||
- 'heat-cfn-api-backends-one-down'
|
||||
alarms:
|
||||
- 'heat-cfn-api-backends-all-down'
|
||||
- 'heat-cfn-api-backends-majority-down'
|
||||
- 'heat-cfn-api-backends-one-down'
|
||||
heat-cloudwatch-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
backends:
|
||||
- 'heat-cloudwatch-api-backends-all-down'
|
||||
- 'heat-cloudwatch-api-backends-majority-down'
|
||||
- 'heat-cloudwatch-api-backends-one-down'
|
||||
alarms:
|
||||
- 'heat-cloudwatch-api-backends-all-down'
|
||||
- 'heat-cloudwatch-api-backends-majority-down'
|
||||
- 'heat-cloudwatch-api-backends-one-down'
|
||||
heat-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['heat-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['heat-api-check-failed']
|
||||
heat-cfn-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['heat-cfn-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['heat-cfn-api-check-failed']
|
||||
heat-api-endpoint:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
endpoint: ['heat-api-local-endpoint']
|
||||
members:
|
||||
endpoint:
|
||||
alarms: ['heat-api-local-endpoint']
|
||||
heat-cfn-api-endpoint:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
endpoint: ['heat-cfn-api-local-endpoint']
|
||||
members:
|
||||
endpoint:
|
||||
alarms: ['heat-cfn-api-local-endpoint']
|
||||
heat-logs:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['heat-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['heat-logs-error']
|
||||
<% if not @storage_options["objects_ceph"] then -%>
|
||||
swift-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
http_errors: ['swift-api-http-errors']
|
||||
members:
|
||||
http_errors:
|
||||
alarms: ['swift-api-http-errors']
|
||||
backends:
|
||||
- 'swift-api-backends-all-down'
|
||||
- 'swift-api-backends-majority-down'
|
||||
- 'swift-api-backends-one-down'
|
||||
alarms:
|
||||
- 'swift-api-backends-all-down'
|
||||
- 'swift-api-backends-majority-down'
|
||||
- 'swift-api-backends-one-down'
|
||||
swift-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['swift-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['swift-api-check-failed']
|
||||
swift-api-endpoint:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
endpoint: ['swift-api-local-endpoint']
|
||||
members:
|
||||
endpoint:
|
||||
alarms: ['swift-api-local-endpoint']
|
||||
swift-s3-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['swift-s3-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['swift-s3-api-check-failed']
|
||||
swift-logs:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['swift-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['swift-logs-error']
|
||||
<% end -%>
|
||||
cinder-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
http_errors: ['cinder-api-http-errors']
|
||||
members:
|
||||
http_errors:
|
||||
alarms: ['cinder-api-http-errors']
|
||||
backends:
|
||||
- 'cinder-api-backends-all-down'
|
||||
- 'cinder-api-backends-majority-down'
|
||||
- 'cinder-api-backends-one-down'
|
||||
alarms:
|
||||
- 'cinder-api-backends-all-down'
|
||||
- 'cinder-api-backends-majority-down'
|
||||
- 'cinder-api-backends-one-down'
|
||||
cinder-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['cinder-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['cinder-api-check-failed']
|
||||
cinder-v2-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['cinder-v2-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['cinder-v2-api-check-failed']
|
||||
cinder-api-endpoint:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
endpoint: ['cinder-api-local-endpoint']
|
||||
members:
|
||||
endpoint:
|
||||
alarms: ['cinder-api-local-endpoint']
|
||||
cinder-logs:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['cinder-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['cinder-logs-error']
|
||||
cinder-scheduler:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'cinder-scheduler-all-down'
|
||||
- 'cinder-scheduler-majority-down'
|
||||
- 'cinder-scheduler-one-down'
|
||||
alarms:
|
||||
- 'cinder-scheduler-all-down'
|
||||
- 'cinder-scheduler-majority-down'
|
||||
- 'cinder-scheduler-one-down'
|
||||
cinder-volume:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'cinder-volume-all-down'
|
||||
- 'cinder-volume-majority-down'
|
||||
- 'cinder-volume-one-down'
|
||||
alarms:
|
||||
- 'cinder-volume-all-down'
|
||||
- 'cinder-volume-majority-down'
|
||||
- 'cinder-volume-one-down'
|
||||
<% if not @storage_options["volumes_ceph"] then -%>
|
||||
cinder-volume-logs:
|
||||
apply_to_node: storage
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['cinder-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['cinder-logs-error']
|
||||
<% end -%>
|
||||
glance-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
http_errors: ['glance-api-http-errors']
|
||||
members:
|
||||
http_errors:
|
||||
alarms: ['glance-api-http-errors']
|
||||
backends:
|
||||
- 'glance-api-backends-all-down'
|
||||
- 'glance-api-backends-majority-down'
|
||||
- 'glance-api-backends-one-down'
|
||||
alarms:
|
||||
- 'glance-api-backends-all-down'
|
||||
- 'glance-api-backends-majority-down'
|
||||
- 'glance-api-backends-one-down'
|
||||
glance-registry-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
backends:
|
||||
- 'glance-registry-api-backends-all-down'
|
||||
- 'glance-registry-api-backends-majority-down'
|
||||
- 'glance-registry-api-backends-one-down'
|
||||
alarms:
|
||||
- 'glance-registry-api-backends-all-down'
|
||||
- 'glance-registry-api-backends-majority-down'
|
||||
- 'glance-registry-api-backends-one-down'
|
||||
glance-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['glance-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['glance-api-check-failed']
|
||||
glance-api-endpoint:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
endpoint: ['glance-api-local-endpoint']
|
||||
members:
|
||||
endpoint:
|
||||
alarms: ['glance-api-local-endpoint']
|
||||
glance-logs:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['glance-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['glance-logs-error']
|
||||
neutron-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
http_errors: ['neutron-api-http-errors']
|
||||
members:
|
||||
http_errors:
|
||||
alarms: ['neutron-api-http-errors']
|
||||
backends:
|
||||
- 'neutron-api-backends-all-down'
|
||||
- 'neutron-api-backends-majority-down'
|
||||
- 'neutron-api-backends-one-down'
|
||||
alarms:
|
||||
- 'neutron-api-backends-all-down'
|
||||
- 'neutron-api-backends-majority-down'
|
||||
- 'neutron-api-backends-one-down'
|
||||
neutron-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['neutron-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['neutron-api-check-failed']
|
||||
neutron-api-endpoint:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
endpoint: ['neutron-api-local-endpoint']
|
||||
members:
|
||||
endpoint:
|
||||
alarms: ['neutron-api-local-endpoint']
|
||||
neutron-logs:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['neutron-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['neutron-logs-error']
|
||||
neutron-l3:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'neutron-l3-all-down'
|
||||
- 'neutron-l3-majority-down'
|
||||
- 'neutron-l3-one-down'
|
||||
alarms:
|
||||
- 'neutron-l3-all-down'
|
||||
- 'neutron-l3-majority-down'
|
||||
- 'neutron-l3-one-down'
|
||||
neutron-dhcp:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'neutron-dhcp-all-down'
|
||||
- 'neutron-dhcp-majority-down'
|
||||
- 'neutron-dhcp-one-down'
|
||||
alarms:
|
||||
- 'neutron-dhcp-all-down'
|
||||
- 'neutron-dhcp-majority-down'
|
||||
- 'neutron-dhcp-one-down'
|
||||
neutron-metadata:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'neutron-metadata-all-down'
|
||||
- 'neutron-metadata-majority-down'
|
||||
- 'neutron-metadata-one-down'
|
||||
alarms:
|
||||
- 'neutron-metadata-all-down'
|
||||
- 'neutron-metadata-majority-down'
|
||||
- 'neutron-metadata-one-down'
|
||||
neutron-openvswitch:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
workers:
|
||||
- 'neutron-openvswitch-all-down'
|
||||
- 'neutron-openvswitch-majority-down'
|
||||
- 'neutron-openvswitch-one-down'
|
||||
alarms:
|
||||
- 'neutron-openvswitch-all-down'
|
||||
- 'neutron-openvswitch-majority-down'
|
||||
- 'neutron-openvswitch-one-down'
|
||||
neutron-logs-compute:
|
||||
apply_to_node: compute
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['neutron-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['neutron-logs-error']
|
||||
keystone-response-time:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
duration: ['keystone-response-time-duration']
|
||||
members:
|
||||
duration:
|
||||
alarms: ['keystone-response-time-duration']
|
||||
keystone-public-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
http_errors: ['keystone-public-api-http-errors']
|
||||
members:
|
||||
http_errors:
|
||||
alarms: ['keystone-public-api-http-errors']
|
||||
backends:
|
||||
- 'keystone-public-api-backends-all-down'
|
||||
- 'keystone-public-api-backends-majority-down'
|
||||
- 'keystone-public-api-backends-one-down'
|
||||
alarms:
|
||||
- 'keystone-public-api-backends-all-down'
|
||||
- 'keystone-public-api-backends-majority-down'
|
||||
- 'keystone-public-api-backends-one-down'
|
||||
keystone-public-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['keystone-public-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['keystone-public-api-check-failed']
|
||||
keystone-public-api-endpoint:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
endpoint: ['keystone-public-api-local-endpoint']
|
||||
members:
|
||||
endpoint:
|
||||
alarms: ['keystone-public-api-local-endpoint']
|
||||
keystone-logs:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
error: ['keystone-logs-error']
|
||||
members:
|
||||
error:
|
||||
alarms: ['keystone-logs-error']
|
||||
keystone-admin-api:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
http_errors: ['keystone-admin-api-http-errors']
|
||||
members:
|
||||
http_errors:
|
||||
alarms: ['keystone-admin-api-http-errors']
|
||||
backends:
|
||||
- 'keystone-admin-api-backends-all-down'
|
||||
- 'keystone-admin-api-backends-majority-down'
|
||||
- 'keystone-admin-api-backends-one-down'
|
||||
alarms:
|
||||
- 'keystone-admin-api-backends-all-down'
|
||||
- 'keystone-admin-api-backends-majority-down'
|
||||
- 'keystone-admin-api-backends-one-down'
|
||||
<% if @tls_enabled then -%>
|
||||
horizon-https:
|
||||
<% else -%>
|
||||
|
@ -3375,92 +3505,111 @@ lma_collector:
|
|||
<% end -%>
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
http_errors: ['horizon-web-http-errors']
|
||||
members:
|
||||
http_errors:
|
||||
alarms: ['horizon-web-http-errors']
|
||||
backends:
|
||||
- 'horizon-web-api-backends-all-down'
|
||||
- 'horizon-web-api-backends-majority-down'
|
||||
- 'horizon-web-api-backends-one-down'
|
||||
alarms:
|
||||
- 'horizon-web-api-backends-all-down'
|
||||
- 'horizon-web-api-backends-majority-down'
|
||||
- 'horizon-web-api-backends-one-down'
|
||||
nova-instances:
|
||||
#TODO(scroiset): apply on compute nodes
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
creation-time: ['instance-creation-time-warning']
|
||||
members:
|
||||
creation-time:
|
||||
alarms: ['instance-creation-time-warning']
|
||||
nova-free-vcpu:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
nova-free-vcpu: ['total-nova-free-vcpu-warning']
|
||||
members:
|
||||
nova-free-vcpu:
|
||||
alarms: ['total-nova-free-vcpu-warning']
|
||||
nova-free-memory:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
nova-free-memory: ['total-nova-free-memory-warning']
|
||||
members:
|
||||
nova-free-memory:
|
||||
alarms: ['total-nova-free-memory-warning']
|
||||
ceph-mon-cluster:
|
||||
apply_to_node: ceph-mon
|
||||
alerting: enabled
|
||||
alarms:
|
||||
health: ['ceph-health-critical', 'ceph-health-warning']
|
||||
capacity: ['ceph-capacity-critical', 'ceph-capacity-warning']
|
||||
members:
|
||||
health:
|
||||
alarms: ['ceph-health-critical', 'ceph-health-warning']
|
||||
capacity:
|
||||
alarms: ['ceph-capacity-critical', 'ceph-capacity-warning']
|
||||
ceph-mon-service:
|
||||
apply_to_node: ceph-mon
|
||||
alerting: enabled
|
||||
alarms:
|
||||
check: ['ceph-mon-check']
|
||||
members:
|
||||
check:
|
||||
alarms: ['ceph-mon-check']
|
||||
<% if @storage_options["volumes_ceph"] then -%>
|
||||
ceph-osd-service:
|
||||
apply_to_node: storage
|
||||
alerting: enabled
|
||||
alarms:
|
||||
check: ['ceph-osd-check']
|
||||
members:
|
||||
check:
|
||||
alarms: ['ceph-osd-check']
|
||||
<% end -%>
|
||||
elasticsearch-cluster:
|
||||
apply_to_node: elasticsearch-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
health: ['elasticsearch-health-critical', 'elasticsearch-health-warning']
|
||||
members:
|
||||
health:
|
||||
alarms: ['elasticsearch-health-critical', 'elasticsearch-health-warning']
|
||||
elasticsearch-service:
|
||||
apply_to_node: elasticsearch-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
check: ['elasticsearch-check']
|
||||
members:
|
||||
check:
|
||||
alarms: ['elasticsearch-check']
|
||||
influxdb-service:
|
||||
apply_to_node: influxdb-nodes
|
||||
alerting: enabled
|
||||
alarms:
|
||||
check: ['influxdb-check']
|
||||
members:
|
||||
check:
|
||||
alarms: ['influxdb-check']
|
||||
influxdb-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['influxdb-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['influxdb-api-check-failed']
|
||||
haproxy-openstack:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
check: ['haproxy-check']
|
||||
members:
|
||||
check:
|
||||
alarms: ['haproxy-check']
|
||||
pacemaker-service:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
check: ['pacemaker-check']
|
||||
members:
|
||||
check:
|
||||
alarms: ['pacemaker-check']
|
||||
libvirt-service:
|
||||
apply_to_node: compute
|
||||
alerting: enabled
|
||||
alarms:
|
||||
check: ['libvirt-check']
|
||||
members:
|
||||
check:
|
||||
alarms: ['libvirt-check']
|
||||
memcached-service:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
check: ['memcached-check']
|
||||
members:
|
||||
check:
|
||||
alarms: ['memcached-check']
|
||||
ceilometer-api-check:
|
||||
alerting: enabled
|
||||
alarms:
|
||||
vip: ['ceilometer-api-check-failed']
|
||||
members:
|
||||
vip:
|
||||
alarms: ['ceilometer-api-check-failed']
|
||||
mysqld-tcp:
|
||||
apply_to_node: controller
|
||||
alerting: enabled
|
||||
alarms:
|
||||
members:
|
||||
backends:
|
||||
- 'mysqld-tcp-api-backends-all-down'
|
||||
- 'mysqld-tcp-api-backends-majority-down'
|
||||
- 'mysqld-tcp-api-backends-one-down'
|
||||
alarms:
|
||||
- 'mysqld-tcp-api-backends-all-down'
|
||||
- 'mysqld-tcp-api-backends-majority-down'
|
||||
- 'mysqld-tcp-api-backends-one-down'
|
||||
|
|
Loading…
Reference in New Issue