Add CDM checks and alarms

This commit is contained in:
git-harry 2014-09-18 11:05:40 +01:00
parent 2d868acd34
commit badbea58f2
4 changed files with 124 additions and 0 deletions

View File

@ -0,0 +1,18 @@
---
# Copyright 2014, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- hosts: hosts
roles:
- maas_cdm

View File

@ -323,3 +323,17 @@
user: root
roles:
- maas_local
- hosts: hosts
vars:
check_name: disk_utilisation
check_details: file={{ check_name }}.py
check_period: "{{ maas_check_period }}"
check_timeout: "{{ maas_check_timeout }}"
alarms:
- { 'name': 'percentage_disk_utilisation_sda', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_utilisation_sda"] >= 90.0) { return new AlarmStatus(WARNING, "Disk utilisation for sda >= 90%"); }' }
- { 'name': 'percentage_disk_utilisation_sdb', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_utilisation_sdb"] >= 90.0) { return new AlarmStatus(WARNING, "Disk utilisation for sdb >= 90%"); }' }
- { 'name': 'percentage_disk_utilisation_sdc', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_utilisation_sdc"] >= 90.0) { return new AlarmStatus(WARNING, "Disk utilisation for sdc >= 90%"); }' }
user: root
roles:
- maas_local

View File

@ -0,0 +1,45 @@
---
# Copyright 2014, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Get entity ID for physical_host
shell: raxmon-entities-list | grep "label={{ inventory_hostname|quote }}{{ maas_fqdn_extension|default('') }} " | sed -e 's/^.* id=\(.*\) label=.*$/\1/g'
register: entity_id
- name: Validate if check exists
shell: raxmon-checks-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ check_name|quote }}--{{ inventory_hostname|quote }}"
register: check_exists
ignore_errors: True
- name: Create check if it does not exist
command: raxmon-checks-create --entity-id {{ entity_id.stdout }} --type {{ agent_type }} --label {{ check_name }}--{{ inventory_hostname }} --details {{ check_details }} --period {{ check_period }} --timeout {{ check_timeout }}
when: check_exists|failed
- name: Get check ID for newly created check
shell: raxmon-checks-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ check_name|quote }}--{{ inventory_hostname|quote }}" | sed -e 's/^.* id=\(.*\) label=.*$/\1/g'
register: check_id
- name: Validate if alarm exists
shell: raxmon-alarms-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ item.name|quote }}--{{ inventory_hostname|quote }}"
register: alarm_exists
ignore_errors: True
when: alarms is defined
with_items: alarms
- name: Create alarm if it does not exist
shell: raxmon-alarms-create --entity-id {{ entity_id.stdout|quote }} --check-id {{ check_id.stdout|quote }} --notification-plan {{ maas_notification_plan }} --label {{ item[1].name|quote }}--{{ inventory_hostname|quote }} --criteria {{ item[1].criteria|quote }}
when: item[0]|failed and alarms is defined
with_together:
- alarm_exists.results
- alarms

View File

@ -0,0 +1,47 @@
---
# Copyright 2014, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- include: cdm.yml
vars:
check_name: cpu
check_details: "{}"
check_period: "{{ maas_check_period }}"
check_timeout: "{{ maas_check_timeout }}"
agent_type: "agent.cpu"
alarms:
- { 'name': 'idle_percent_average', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["idle_percent_average"] <= 10.0) { return new AlarmStatus(WARNING, "CPU time spent idle has dropped to <= 10%"); }' }
user: root
- include: cdm.yml
vars:
check_name: filesystem
check_details: "target=/"
check_period: "{{ maas_check_period }}"
check_timeout: "{{ maas_check_timeout }}"
agent_type: "agent.filesystem"
alarms:
- { 'name': 'Disk space used on /', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (percentage(metric["used"], metric["total"]) >= 95.0) { return new AlarmStatus(WARNING, "Root filesystem is >= 95% full."); }' }
user: root
- include: cdm.yml
vars:
check_name: memory
check_details: "{}"
check_period: "{{ maas_check_period }}"
check_timeout: "{{ maas_check_timeout }}"
agent_type: "agent.memory"
alarms:
- { 'name': 'Memory used', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (percentage(metric["actual_used"], metric["total"]) >= 95.0) { return new AlarmStatus(WARNING, "Memory is 95%+ in use."); }' }
user: root