Add CDM checks and alarms
This commit is contained in:
parent
2d868acd34
commit
badbea58f2
18
rpc_deployment/playbooks/monitoring/maas_cdm.yml
Normal file
18
rpc_deployment/playbooks/monitoring/maas_cdm.yml
Normal file
@ -0,0 +1,18 @@
|
||||
---
|
||||
# Copyright 2014, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
- hosts: hosts
|
||||
roles:
|
||||
- maas_cdm
|
@ -323,3 +323,17 @@
|
||||
user: root
|
||||
roles:
|
||||
- maas_local
|
||||
|
||||
- hosts: hosts
|
||||
vars:
|
||||
check_name: disk_utilisation
|
||||
check_details: file={{ check_name }}.py
|
||||
check_period: "{{ maas_check_period }}"
|
||||
check_timeout: "{{ maas_check_timeout }}"
|
||||
alarms:
|
||||
- { 'name': 'percentage_disk_utilisation_sda', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_utilisation_sda"] >= 90.0) { return new AlarmStatus(WARNING, "Disk utilisation for sda >= 90%"); }' }
|
||||
- { 'name': 'percentage_disk_utilisation_sdb', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_utilisation_sdb"] >= 90.0) { return new AlarmStatus(WARNING, "Disk utilisation for sdb >= 90%"); }' }
|
||||
- { 'name': 'percentage_disk_utilisation_sdc', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_utilisation_sdc"] >= 90.0) { return new AlarmStatus(WARNING, "Disk utilisation for sdc >= 90%"); }' }
|
||||
user: root
|
||||
roles:
|
||||
- maas_local
|
||||
|
45
rpc_deployment/roles/maas_cdm/tasks/cdm.yml
Normal file
45
rpc_deployment/roles/maas_cdm/tasks/cdm.yml
Normal file
@ -0,0 +1,45 @@
|
||||
---
|
||||
# Copyright 2014, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
- name: Get entity ID for physical_host
|
||||
shell: raxmon-entities-list | grep "label={{ inventory_hostname|quote }}{{ maas_fqdn_extension|default('') }} " | sed -e 's/^.* id=\(.*\) label=.*$/\1/g'
|
||||
register: entity_id
|
||||
|
||||
- name: Validate if check exists
|
||||
shell: raxmon-checks-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ check_name|quote }}--{{ inventory_hostname|quote }}"
|
||||
register: check_exists
|
||||
ignore_errors: True
|
||||
|
||||
- name: Create check if it does not exist
|
||||
command: raxmon-checks-create --entity-id {{ entity_id.stdout }} --type {{ agent_type }} --label {{ check_name }}--{{ inventory_hostname }} --details {{ check_details }} --period {{ check_period }} --timeout {{ check_timeout }}
|
||||
when: check_exists|failed
|
||||
|
||||
- name: Get check ID for newly created check
|
||||
shell: raxmon-checks-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ check_name|quote }}--{{ inventory_hostname|quote }}" | sed -e 's/^.* id=\(.*\) label=.*$/\1/g'
|
||||
register: check_id
|
||||
|
||||
- name: Validate if alarm exists
|
||||
shell: raxmon-alarms-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ item.name|quote }}--{{ inventory_hostname|quote }}"
|
||||
register: alarm_exists
|
||||
ignore_errors: True
|
||||
when: alarms is defined
|
||||
with_items: alarms
|
||||
|
||||
- name: Create alarm if it does not exist
|
||||
shell: raxmon-alarms-create --entity-id {{ entity_id.stdout|quote }} --check-id {{ check_id.stdout|quote }} --notification-plan {{ maas_notification_plan }} --label {{ item[1].name|quote }}--{{ inventory_hostname|quote }} --criteria {{ item[1].criteria|quote }}
|
||||
when: item[0]|failed and alarms is defined
|
||||
with_together:
|
||||
- alarm_exists.results
|
||||
- alarms
|
47
rpc_deployment/roles/maas_cdm/tasks/main.yml
Normal file
47
rpc_deployment/roles/maas_cdm/tasks/main.yml
Normal file
@ -0,0 +1,47 @@
|
||||
---
|
||||
# Copyright 2014, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
- include: cdm.yml
|
||||
vars:
|
||||
check_name: cpu
|
||||
check_details: "{}"
|
||||
check_period: "{{ maas_check_period }}"
|
||||
check_timeout: "{{ maas_check_timeout }}"
|
||||
agent_type: "agent.cpu"
|
||||
alarms:
|
||||
- { 'name': 'idle_percent_average', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["idle_percent_average"] <= 10.0) { return new AlarmStatus(WARNING, "CPU time spent idle has dropped to <= 10%"); }' }
|
||||
user: root
|
||||
|
||||
- include: cdm.yml
|
||||
vars:
|
||||
check_name: filesystem
|
||||
check_details: "target=/"
|
||||
check_period: "{{ maas_check_period }}"
|
||||
check_timeout: "{{ maas_check_timeout }}"
|
||||
agent_type: "agent.filesystem"
|
||||
alarms:
|
||||
- { 'name': 'Disk space used on /', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (percentage(metric["used"], metric["total"]) >= 95.0) { return new AlarmStatus(WARNING, "Root filesystem is >= 95% full."); }' }
|
||||
user: root
|
||||
|
||||
- include: cdm.yml
|
||||
vars:
|
||||
check_name: memory
|
||||
check_details: "{}"
|
||||
check_period: "{{ maas_check_period }}"
|
||||
check_timeout: "{{ maas_check_timeout }}"
|
||||
agent_type: "agent.memory"
|
||||
alarms:
|
||||
- { 'name': 'Memory used', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (percentage(metric["actual_used"], metric["total"]) >= 95.0) { return new AlarmStatus(WARNING, "Memory is 95%+ in use."); }' }
|
||||
user: root
|
Loading…
Reference in New Issue
Block a user