Add Nagios support for OpenStack service status
implements blueprint alerting-lma-collector Change-Id: I722b7a83c5dd391a86423d6af526355bc2ed8bbc
This commit is contained in:
parent
6e914f0d1c
commit
929e15c324
|
@ -55,3 +55,22 @@ if $influxdb_mode == 'local' {
|
|||
fail("Could not find node '${influxdb_node_name}' in the environment")
|
||||
}
|
||||
}
|
||||
|
||||
$nagios_mode = $lma_collector['nagios_mode']
|
||||
if $nagios_mode == 'local' {
|
||||
# Check that the LMA-Infrastructure-Alerting plugin is enabled for that environment
|
||||
# and that the node names match
|
||||
$infra_alerting = hiera('lma_infrastructure_alerting', false)
|
||||
if ! $infra_alerting {
|
||||
fail('Could not get the LMA Infrastructure Alerting parameters. The LMA-Infrastructure-Alerting plugin is probably not installed.')
|
||||
}
|
||||
elsif ! $infra_alerting['metadata']['enabled'] {
|
||||
fail('Could not get the LMA Infrastructure Alerting parameters. The LMA-Infrastructure-Alerting plugin is probably not enabled for this environment.')
|
||||
}
|
||||
# Check that the LMA-Infrastructure-Alerting node exists in the environment
|
||||
$infra_alerting_node_name = $influxdb_grafana['node_name']
|
||||
$infra_alerting_nodes = filter_nodes(hiera('nodes'), 'user_node_name', $infra_alerting_node_name)
|
||||
if size($infra_alerting_nodes) < 1 {
|
||||
fail("Could not find node '${infra_alerting_node_name}' in the environment")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -159,3 +159,37 @@ if $lma_collector['influxdb_mode'] != 'disabled' {
|
|||
class { 'lma_collector::metrics::pacemaker_resources': }
|
||||
}
|
||||
}
|
||||
|
||||
$nagios_mode = $lma_collector['nagios_mode']
|
||||
if $nagios_mode != 'disabled' {
|
||||
|
||||
$deployment_id = hiera('deployment_id')
|
||||
if $nagios_mode == 'remote' {
|
||||
$nagios_url = $lma_collector['nagios_url']
|
||||
$nagios_user = $lma_collector['nagios_user']
|
||||
$nagios_password = $lma_collector['nagios_password']
|
||||
} elsif $nagios_mode == 'local' {
|
||||
$lma_infra_alerting = hiera('lma_infrastructure_alerting', false)
|
||||
$nagios_node_name = $lma_infra_alerting['node_name']
|
||||
$nagios_nodes = filter_nodes(hiera('nodes'), 'user_node_name', $nagios_node_name)
|
||||
$nagios_server = $nagios_nodes[0]['internal_address']
|
||||
$nagios_user = $lma_infra_alerting['nagios_user']
|
||||
$nagios_password = $lma_infra_alerting['nagios_password']
|
||||
|
||||
# TODO: $http_port and $http_path must match automatically the
|
||||
# lma_infra_monitoring configuration.
|
||||
$http_port = $lma_collector::params::nagios_http_port
|
||||
$http_path = $lma_collector::params::nagios_http_path
|
||||
$nagios_url = "http://${nagios_server}:${http_port}/${http_path}"
|
||||
} else {
|
||||
fail("'${nagios_mode}' mode not supported for the infrastructure alerting service")
|
||||
}
|
||||
|
||||
class { 'lma_collector::nagios':
|
||||
openstack_deployment_name => $deployment_id,
|
||||
url => $nagios_url,
|
||||
user => $nagios_user,
|
||||
password => $nagios_password,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
-- Copyright 2015 Mirantis, Inc.
|
||||
--
|
||||
-- Licensed under the Apache License, Version 2.0 (the "License");
|
||||
-- you may not use this file except in compliance with the License.
|
||||
-- You may obtain a copy of the License at
|
||||
--
|
||||
-- http://www.apache.org/licenses/LICENSE-2.0
|
||||
--
|
||||
-- Unless required by applicable law or agreed to in writing, software
|
||||
-- distributed under the License is distributed on an "AS IS" BASIS,
|
||||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
-- See the License for the specific language governing permissions and
|
||||
-- limitations under the License.
|
||||
require 'table'
|
||||
require 'string'
|
||||
require 'cjson'
|
||||
local utils = require 'lma_utils'
|
||||
|
||||
local host = read_config('nagios_host')
|
||||
local data = {
|
||||
cmd_typ = '30',
|
||||
cmd_mod = '2',
|
||||
host = host,
|
||||
service = nil,
|
||||
plugin_state = nil,
|
||||
plugin_output = nil,
|
||||
performance_data = '',
|
||||
}
|
||||
local nagios_break_line = '\\n'
|
||||
|
||||
function url_encode(str)
|
||||
if (str) then
|
||||
str = string.gsub (str, "([^%w %-%_%.%~])",
|
||||
function (c) return string.format ("%%%02X", string.byte(c)) end)
|
||||
str = string.gsub (str, " ", "+")
|
||||
end
|
||||
return str
|
||||
end
|
||||
|
||||
function process_message()
|
||||
local service = read_message('Fields[service]')
|
||||
local service_name = read_config(service)
|
||||
if not service_name then
|
||||
return -1
|
||||
end
|
||||
local status = read_message('Fields[status]')
|
||||
local payload = read_message('Payload')
|
||||
data['service'] = service_name
|
||||
data['plugin_state'] = status
|
||||
local ok, details = pcall(cjson.decode, payload)
|
||||
if not ok or not details then details = {'no detail'} end
|
||||
local title = string.format('%s %s',
|
||||
service_name,
|
||||
utils.global_status_to_label_map[status])
|
||||
table.insert(details, 1, title)
|
||||
data['plugin_output'] = table.concat(details, nagios_break_line)
|
||||
|
||||
local params = {}
|
||||
for k, v in pairs(data) do
|
||||
params[#params+1] = string.format("%s=%s", k, url_encode(v))
|
||||
end
|
||||
local p = table.concat(params, '&')
|
||||
inject_payload('txt', 'nagios', p)
|
||||
|
||||
return 0
|
||||
end
|
|
@ -69,9 +69,9 @@ function process_message ()
|
|||
local prev = all_service_status[service_name].global_status or utils.global_status_map.UNKNOWN
|
||||
local updated
|
||||
updated = (prev ~= global_status or #events > 0)
|
||||
if updated then -- append not UP status elements in details
|
||||
for k, v in pairs(not_up_status) do events[#events+1] = v end
|
||||
end
|
||||
-- always append not UP status elements in details
|
||||
for k, v in pairs(not_up_status) do events[#events+1] = v end
|
||||
|
||||
local details = ''
|
||||
if #events > 0 then
|
||||
details = cjson.encode(events)
|
||||
|
|
|
@ -98,6 +98,14 @@ class lma_collector (
|
|||
require => File[$plugins_dir]
|
||||
}
|
||||
|
||||
file { "${plugins_dir}/encoders":
|
||||
ensure => directory,
|
||||
source => 'puppet:///modules/lma_collector/plugins/encoders',
|
||||
recurse => remote,
|
||||
notify => Class['lma_collector::service'],
|
||||
require => File[$plugins_dir]
|
||||
}
|
||||
|
||||
if size($lma_collector::params::additional_packages) > 0 {
|
||||
package { $lma_collector::params::additional_packages:
|
||||
ensure => present,
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
# Copyright 2015 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
class lma_collector::nagios (
|
||||
$openstack_deployment_name = '',
|
||||
$url = undef,
|
||||
$user = $lma_collector::params::nagios_user,
|
||||
$password = $lma_collector::params::nagios_password,
|
||||
$ensure = present,
|
||||
) inherits lma_collector::params {
|
||||
include lma_collector::service
|
||||
|
||||
if $url == undef {
|
||||
fail('url parameter is undef!')
|
||||
}
|
||||
validate_string($url)
|
||||
|
||||
# This must be identical logic than in lma-infra-alerting-plugin
|
||||
$nagios_host = $lma_collector::params::nagios_hostname_service_status
|
||||
$_nagios_host = "${nagios_host}-env${openstack_deployment_name}"
|
||||
$config = $lma_collector::params::nagios_event_status_name_to_service_name_map
|
||||
$config['nagios_host'] = $_nagios_host
|
||||
|
||||
heka::encoder::sandbox { 'nagios':
|
||||
config_dir => $lma_collector::params::config_dir,
|
||||
filename => "${lma_collector::params::plugins_dir}/encoders/event_nagios.lua",
|
||||
config => $config,
|
||||
notify => Class['lma_collector::service'],
|
||||
}
|
||||
|
||||
heka::output::http { 'nagios':
|
||||
config_dir => $lma_collector::params::config_dir,
|
||||
url => $url,
|
||||
message_matcher => 'Type == \'heka.sandbox.status\'',
|
||||
username => $user,
|
||||
password => $password,
|
||||
encoder => 'nagios',
|
||||
timeout => $lma_collector::params::nagios_timeout,
|
||||
headers => {'Content-Type' => 'application/x-www-form-urlencoded'},
|
||||
require => Heka::Encoder::Sandbox['nagios'],
|
||||
notify => Class['lma_collector::service'],
|
||||
}
|
||||
}
|
|
@ -54,7 +54,7 @@ class lma_collector::params {
|
|||
$hekad_max_process_inject = 2
|
||||
|
||||
# We inject as many messages than the number of OpenStack services in the Heka
|
||||
# filter 'service_accumulator_states'. Currently 9 services.
|
||||
# filter 'service_accumulator_states'. Currently 10 services.
|
||||
# Hekad default is fine so far with 10 messages allowed from TimerEvent function
|
||||
$hekad_max_timer_inject = 10
|
||||
|
||||
|
@ -102,9 +102,14 @@ class lma_collector::params {
|
|||
$heartbeat_timeout = 30
|
||||
$service_status_timeout = 65
|
||||
$service_status_payload_name = 'service_status'
|
||||
|
||||
# Catch all metrics used to compute OpenStack service statutes
|
||||
$service_status_metrics_regexp = [
|
||||
'^openstack.(nova|cinder|neutron).(services|agents).*(up|down|disabled)$',
|
||||
'^haproxy.backend.*.servers.(down|up)$',
|
||||
# Exception for mysqld backend because the MySQL service status is
|
||||
# computed by a dedicated filter and this avoids to send an annoying
|
||||
# status Heka message.
|
||||
'^haproxy.backend.(horizon|nova|cinder|neutron|ceilometer|keystone|swift|heat|glance|radosgw)(-.+)?.servers.(down|up)$',
|
||||
'^pacemaker.resource.vip__public.active$',
|
||||
'^openstack.*check_api$'
|
||||
]
|
||||
|
@ -149,4 +154,29 @@ class lma_collector::params {
|
|||
'sahara' => 'sahara-api',
|
||||
'swift' => 'swift-api',
|
||||
}
|
||||
|
||||
# Nagios parameters
|
||||
#
|
||||
$nagios_server = 'localhost'
|
||||
$nagios_http_port = 80
|
||||
$nagios_http_path = 'nagios3/cgi-bin/cmd.cgi'
|
||||
$nagios_user = 'nagiosadmin'
|
||||
$nagios_password = ''
|
||||
$nagios_timeout = 2
|
||||
|
||||
# Following parameter must match the lma_infrastructure_alerting::params::nagios_openstack_dummy_hostname
|
||||
$nagios_hostname_service_status = '00-openstack-services'
|
||||
# Following parameter must match the lma_infrastructure_alerting::params::openstack_core_services
|
||||
$nagios_event_status_name_to_service_name_map = {
|
||||
'nova' => 'openstack.nova.status',
|
||||
'keystone' => 'openstack.keystone.status',
|
||||
'glance' => 'openstack.glance.status',
|
||||
'cinder' => 'openstack.cinder.status',
|
||||
'neutron' => 'openstack.neutron.status',
|
||||
'heat' => 'openstack.heat.status',
|
||||
'horizon' => 'openstack.horizon.status',
|
||||
'swift' => 'openstack.swift.status',
|
||||
'ceilometer' => 'openstack.ceilometer.status',
|
||||
'radosgw' => 'openstack.radosgw.status',
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,6 +87,7 @@ attributes:
|
|||
type: "text"
|
||||
regex: *not_empty_parameter
|
||||
restrictions: *disable_influxdb_parameters
|
||||
|
||||
influxdb_password:
|
||||
value: 'lmapass'
|
||||
label: 'InfluxDB password'
|
||||
|
@ -104,3 +105,48 @@ attributes:
|
|||
restrictions:
|
||||
- condition: "true"
|
||||
action: "hide"
|
||||
|
||||
nagios_mode:
|
||||
type: "radio"
|
||||
weight: 90
|
||||
value: "local"
|
||||
label: "Alerting"
|
||||
values:
|
||||
- data: "disabled"
|
||||
label: "Disabled"
|
||||
- data: "local"
|
||||
label: "Local node"
|
||||
- data: "remote"
|
||||
label: "Remote server"
|
||||
restrictions: *all_disabled_msg
|
||||
|
||||
nagios_url:
|
||||
value: ''
|
||||
label: 'Nagios URL'
|
||||
description: 'ie: http://<server>/nagios3/cgi-bin/cmd.cgi'
|
||||
weight: 100
|
||||
type: "text"
|
||||
regex: &node_url_regex
|
||||
source: '^(http(s?):\/\/)?(((www\.)?+[a-zA-Z0-9\.\-\_]+(\.[a-zA-Z]{2,6}){0,})|(\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b))(\/[a-zA-Z0-9\_\-\s\.\/\?\%\#\&\=]*)?$'
|
||||
error: "Invalid URL"
|
||||
restrictions: ¬_remote
|
||||
- condition: "settings:lma_collector.nagios_mode.value != 'remote'"
|
||||
action: "disable"
|
||||
|
||||
nagios_user:
|
||||
value: 'nagiosadmin'
|
||||
label: 'Nagios user'
|
||||
description: ''
|
||||
weight: 105
|
||||
type: "text"
|
||||
regex: *not_empty_parameter
|
||||
restrictions: *not_remote
|
||||
|
||||
nagios_password:
|
||||
value: ''
|
||||
label: 'Nagios password'
|
||||
description: ''
|
||||
weight: 110
|
||||
type: "password"
|
||||
regex: *not_empty_parameter
|
||||
restrictions: *not_remote
|
||||
|
|
Loading…
Reference in New Issue