977 lines
25 KiB
Plaintext
977 lines
25 KiB
Plaintext
---
|
|
lma_collector:
|
|
|
|
# The GSE policies are applied to clusters and describe how the the GSE
|
|
# cluster filter computes the cluster's status. The LMA toolchain is
|
|
# pre-configured with a few set of policies but everything can be customized:
|
|
# thresholds can be modified, new policies can be defined, and so on.
|
|
#
|
|
# A policy consists of a list of rules which are evaluated against the
|
|
# current status of the cluster's members. When one of the rules matches, the
|
|
# cluster's status gets the value associated with the rule and the evaluation
|
|
# stops here. The last rule of the list is usually a catch-all rule that
|
|
# defines the default status in case no other rule matched.
|
|
#
|
|
# The declaration of a policy rule is similar to an alarm rule except that:
|
|
# - there are no 'metric', 'window' and 'period' parameters.
|
|
# - there are only 2 supported functions:
|
|
# - 'count' which returns the number of members that match the passed value(s)
|
|
# - 'percent' which returns the percentage of members that match the passed value(s)
|
|
#
|
|
# The following rule definition reads as "the cluster's status is critical if
|
|
# more than 50% of its members are either down or critical":
|
|
#
|
|
# - status: critical
|
|
# trigger:
|
|
# logical_operator: or
|
|
# rules:
|
|
# - function: percent
|
|
# arguments: [ down, critical ]
|
|
# relational_operator: '>'
|
|
# threshold: 50
|
|
#
|
|
gse_policies:
|
|
# A policy defining that the cluster's status depends on the member with the
|
|
# highest severity, typically used for a cluster of services.
|
|
highest_severity:
|
|
- status: down
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: count
|
|
arguments: [ down ]
|
|
relational_operator: '>'
|
|
threshold: 0
|
|
- status: critical
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: count
|
|
arguments: [ critical ]
|
|
relational_operator: '>'
|
|
threshold: 0
|
|
- status: warning
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: count
|
|
arguments: [ warning ]
|
|
relational_operator: '>'
|
|
threshold: 0
|
|
- status: okay
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: count
|
|
arguments: [ okay ]
|
|
relational_operator: '>'
|
|
threshold: 0
|
|
- status: unknown
|
|
|
|
# A policy which is typically used for clusters managed by Pacemaker
|
|
# with the no-quorum-policy set to 'stop'.
|
|
majority_of_members:
|
|
- status: down
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: percent
|
|
arguments: [ down ]
|
|
relational_operator: '>'
|
|
threshold: 50
|
|
- status: critical
|
|
trigger:
|
|
logical_operator: and
|
|
rules:
|
|
- function: percent
|
|
arguments: [ down, critical ]
|
|
relational_operator: '>'
|
|
threshold: 20
|
|
- function: percent
|
|
arguments: [ okay ]
|
|
relational_operator: '<'
|
|
threshold: 50
|
|
function: percent
|
|
- status: warning
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: percent
|
|
arguments: [ okay ]
|
|
relational_operator: '<'
|
|
threshold: 50
|
|
function: percent
|
|
- status: okay
|
|
|
|
# A policy that is used to derive a cluster status based
|
|
# on the status okay or down status for its members.
|
|
availability_of_members:
|
|
- status: down
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: count
|
|
arguments: [ okay ]
|
|
relational_operator: '=='
|
|
threshold: 0
|
|
- status: critical
|
|
trigger:
|
|
logical_operator: and
|
|
rules:
|
|
- function: count
|
|
arguments: [ okay ]
|
|
relational_operator: '=='
|
|
threshold: 1
|
|
- function: count
|
|
arguments: [ critical, down ]
|
|
relational_operator: '>'
|
|
threshold: 1
|
|
- status: warning
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: percent
|
|
arguments: [ okay ]
|
|
relational_operator: '<'
|
|
threshold: 100
|
|
- status: okay
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: percent
|
|
arguments: [ okay ]
|
|
relational_operator: '=='
|
|
threshold: 100
|
|
- status: unknown
|
|
|
|
# A policy that is used to derive a cluster status based
|
|
# on the health status of its members.
|
|
status_of_members:
|
|
- status: down
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: percent
|
|
arguments: [ down ]
|
|
relational_operator: '=='
|
|
threshold: 100
|
|
- status: critical
|
|
trigger:
|
|
logical_operator: and
|
|
rules:
|
|
- function: count
|
|
arguments: [ okay, warning ]
|
|
relational_operator: '<='
|
|
threshold: 1
|
|
- function: count
|
|
arguments: [ critical, down, unknown ]
|
|
relational_operator: '>'
|
|
threshold: 0
|
|
- status: warning
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: percent
|
|
arguments: [ okay ]
|
|
relational_operator: '!='
|
|
threshold: 100
|
|
- status: okay
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: percent
|
|
arguments: [ okay ]
|
|
relational_operator: '=='
|
|
threshold: 100
|
|
- status: unknown
|
|
|
|
# A policy that is typically used for storage or compute clusters
|
|
majority_of_node_members:
|
|
- status: down
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: percent
|
|
arguments: [ down ]
|
|
relational_operator: '=='
|
|
threshold: 100
|
|
- status: critical
|
|
trigger:
|
|
logical_operator: and
|
|
rules:
|
|
- function: percent
|
|
arguments: [ down, critical ]
|
|
relational_operator: '>='
|
|
threshold: 50
|
|
- status: warning
|
|
trigger:
|
|
logical_operator: or
|
|
rules:
|
|
- function: percent
|
|
arguments: [ down, critical, warning, unknown ]
|
|
relational_operator: '>'
|
|
threshold: 0
|
|
function: percent
|
|
- status: okay
|
|
|
|
gse_cluster_service:
|
|
input_message_types:
|
|
- afd_service_metric
|
|
aggregator_flag: true
|
|
# the field in the input messages to identify the cluster
|
|
cluster_field: service
|
|
# the field in the input messages to identify the cluster's member
|
|
member_field: source
|
|
output_message_type: gse_service_cluster_metric
|
|
output_metric_name: cluster_service_status
|
|
interval: 10
|
|
warm_up_period: 20
|
|
alerting: enabled
|
|
clusters:
|
|
nova-logs:
|
|
policy: highest_severity
|
|
group_by: hostname
|
|
members:
|
|
- error
|
|
nova-logs-compute:
|
|
policy: highest_severity
|
|
group_by: hostname
|
|
members:
|
|
- error
|
|
heat-logs:
|
|
policy: highest_severity
|
|
group_by: hostname
|
|
members:
|
|
- error
|
|
cinder-logs:
|
|
policy: highest_severity
|
|
group_by: hostname
|
|
members:
|
|
- error
|
|
glance-logs:
|
|
policy: highest_severity
|
|
group_by: hostname
|
|
members:
|
|
- error
|
|
neutron-logs:
|
|
policy: highest_severity
|
|
group_by: hostname
|
|
members:
|
|
- error
|
|
neutron-logs-compute:
|
|
policy: highest_severity
|
|
group_by: hostname
|
|
members:
|
|
- error
|
|
keystone-logs:
|
|
policy: highest_severity
|
|
group_by: hostname
|
|
members:
|
|
- error
|
|
mysqld-tcp:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
mysql:
|
|
policy: majority_of_members
|
|
group_by: hostname
|
|
members:
|
|
- node-status
|
|
- check
|
|
haproxy-openstack:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- check
|
|
apache:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- worker
|
|
- check
|
|
memcached-service:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- check
|
|
rabbitmq-cluster:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- memory
|
|
- disk
|
|
- queue
|
|
- pacemaker
|
|
rabbitmq-service:
|
|
# A check failure on a single node doesn't mean that the whole cluster
|
|
# is down, this is why a 'hostname' group_by and 'majority_of_members'
|
|
# policy are used here.
|
|
policy: majority_of_members
|
|
group_by: hostname
|
|
members:
|
|
- check
|
|
nova-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
- http_errors
|
|
nova-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
nova-api-endpoint:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- endpoint
|
|
nova-novncproxy-websocket:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
nova-metadata-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
nova-scheduler:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
nova-cert:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
nova-consoleauth:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
nova-compute:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
nova-conductor:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
# The AFDs are emitted by all the controller nodes because all of them
|
|
# collect the openstack_nova_instance_creation_time metric. Hence the
|
|
# service is considered not okay when a majority of controllers report
|
|
# not okay.
|
|
nova-instances:
|
|
policy: majority_of_members
|
|
group_by: hostname
|
|
members:
|
|
- creation-time
|
|
cinder-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
- http_errors
|
|
cinder-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
cinder-v2-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
# Cinder V2 backends are in fact the same as the Cinder backends
|
|
- vip
|
|
cinder-api-endpoint:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- endpoint
|
|
cinder-scheduler:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
cinder-volume:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
neutron-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
- http_errors
|
|
neutron-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
neutron-api-endpoint:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- endpoint
|
|
neutron-l3:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
neutron-dhcp:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
neutron-metadata:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
<% if not @contrail_plugin then -%>
|
|
neutron-openvswitch:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- workers
|
|
<% end -%>
|
|
keystone-response-time:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- duration
|
|
keystone-public-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
- http_errors
|
|
keystone-public-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
keystone-public-api-endpoint:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- endpoint
|
|
keystone-admin-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
# TODO(pasquier-s): add a metric reporting the status of the keystone-admin-api vip
|
|
- backends
|
|
- http_errors
|
|
glance-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
- http_errors
|
|
glance-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
glance-api-endpoint:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- endpoint
|
|
glance-registry-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
heat-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
- http_errors
|
|
heat-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
heat-api-endpoint:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- endpoint
|
|
heat-cfn-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
heat-cfn-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
heat-cfn-api-endpoint:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- endpoint
|
|
heat-cloudwatch-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
<% if @tls_enabled then -%>
|
|
horizon-https:
|
|
<% else -%>
|
|
horizon-web:
|
|
<% end -%>
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
- http_errors
|
|
<% if not @storage_options["objects_ceph"] then -%>
|
|
swift-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
- http_errors
|
|
swift-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
swift-s3-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
# Swift S3 backends are in fact the same as the Swift backends
|
|
- vip
|
|
swift-api-endpoint:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- endpoint
|
|
<% end -%>
|
|
<% if @ceilometer_enabled -%>
|
|
ceilometer-api:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- backends
|
|
ceilometer-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
<% end -%>
|
|
<% if @storage_options["volumes_ceph"] then -%>
|
|
ceph-mon-cluster:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- health
|
|
- capacity
|
|
ceph-mon-service:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- check
|
|
ceph-osd-service:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- check
|
|
<% end -%>
|
|
<% if @monitor_elasticsearch -%>
|
|
elasticsearch-cluster:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- health
|
|
elasticsearch-service:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- check
|
|
<% end -%>
|
|
<% if @monitor_influxdb -%>
|
|
influxdb-api-check:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- vip
|
|
influxdb-service:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- check
|
|
<% end -%>
|
|
pacemaker-service:
|
|
policy: availability_of_members
|
|
group_by: hostname
|
|
members:
|
|
- check
|
|
libvirt-service:
|
|
policy: majority_of_node_members
|
|
group_by: hostname
|
|
members:
|
|
- check
|
|
nova-free-vcpu:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- nova-free-vcpu
|
|
nova-free-memory:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- nova-free-memory
|
|
- nova-aggregates-free-memory
|
|
|
|
gse_cluster_node:
|
|
input_message_types:
|
|
- afd_node_metric
|
|
aggregator_flag: true
|
|
# the field in the input messages to identify the cluster
|
|
cluster_field: node_role
|
|
# the field in the input messages to identify the cluster's member
|
|
member_field: source
|
|
output_message_type: gse_node_cluster_metric
|
|
output_metric_name: cluster_node_status
|
|
interval: 10
|
|
warm_up_period: 80
|
|
alerting: enabled_with_notification
|
|
clusters:
|
|
controller:
|
|
policy: status_of_members
|
|
group_by: hostname
|
|
members:
|
|
- cpu
|
|
- network-rx
|
|
- network-tx
|
|
- root-fs
|
|
- log-fs
|
|
- other-fs
|
|
- swap
|
|
- hdd-errors
|
|
<% if @detach_rabbitmq_enabled -%>
|
|
rabbitmq-nodes:
|
|
policy: status_of_members
|
|
group_by: hostname
|
|
members:
|
|
- cpu
|
|
- network-rx
|
|
- network-tx
|
|
- root-fs
|
|
- other-fs
|
|
- swap
|
|
- hdd-errors
|
|
<% end -%>
|
|
mysql-nodes:
|
|
policy: status_of_members
|
|
group_by: hostname
|
|
members:
|
|
<% if @detach_database_enabled -%>
|
|
- cpu
|
|
- network-rx
|
|
- network-tx
|
|
- root-fs
|
|
- other-fs
|
|
- swap
|
|
- hdd-errors
|
|
<% end -%>
|
|
- mysql-fs
|
|
compute:
|
|
policy: majority_of_node_members
|
|
group_by: hostname
|
|
members:
|
|
- cpu
|
|
- network-rx
|
|
- network-tx
|
|
- root-fs
|
|
- nova-fs
|
|
- other-fs
|
|
- swap
|
|
- hdd-errors
|
|
storage:
|
|
policy: majority_of_node_members
|
|
group_by: hostname
|
|
members:
|
|
- cpu
|
|
- network-rx
|
|
- network-tx
|
|
- root-fs
|
|
- other-fs
|
|
- swap
|
|
- hdd-errors
|
|
<% if @storage_options["volumes_ceph"] then -%>
|
|
- osd-disk
|
|
<% end -%>
|
|
<% if @monitor_elasticsearch -%>
|
|
elasticsearch-nodes:
|
|
policy: status_of_members
|
|
group_by: hostname
|
|
members:
|
|
- data-fs
|
|
- root-fs
|
|
- cpu
|
|
- network-rx
|
|
- network-tx
|
|
- swap
|
|
- hdd-errors
|
|
<% end -%>
|
|
<% if @monitor_influxdb -%>
|
|
influxdb-nodes:
|
|
policy: status_of_members
|
|
group_by: hostname
|
|
members:
|
|
- data-fs
|
|
- root-fs
|
|
- cpu
|
|
- network-rx
|
|
- network-tx
|
|
- swap
|
|
- hdd-errors
|
|
<% end -%>
|
|
|
|
gse_cluster_global:
|
|
input_message_types:
|
|
- gse_service_cluster_metric
|
|
- gse_node_cluster_metric
|
|
aggregator_flag: false
|
|
# the field in the input messages to identify the cluster's member
|
|
member_field: cluster_name
|
|
output_message_type: gse_cluster_metric
|
|
output_metric_name: cluster_status
|
|
interval: 10
|
|
warm_up_period: 30
|
|
alerting: enabled_with_notification
|
|
clusters:
|
|
mysql:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- mysqld-tcp
|
|
- mysql
|
|
haproxy-openstack:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- haproxy-openstack
|
|
apache:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- apache
|
|
memcached:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- memcached-service
|
|
rabbitmq:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- rabbitmq-cluster
|
|
- rabbitmq-service
|
|
nova-control-plane:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- nova-logs
|
|
- nova-api # backends and http errors
|
|
- nova-api-check # vip check
|
|
- nova-api-endpoint # local check
|
|
- nova-metadata-api # worker
|
|
- nova-scheduler # worker
|
|
- nova-conductor # worker
|
|
- nova-cert # worker
|
|
- nova-consoleauth # worker
|
|
- nova-novncproxy-websocket # worker
|
|
hints:
|
|
- cinder-control-plane
|
|
- glance
|
|
- keystone
|
|
- neutron-control-plane
|
|
- mysql
|
|
- rabbitmq
|
|
nova-data-plane:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- nova-logs-compute
|
|
- nova-compute
|
|
- nova-instances
|
|
- libvirt-service
|
|
- nova-free-vcpu
|
|
- nova-free-memory
|
|
- nova-aggregates-free-memory
|
|
hints:
|
|
- neutron-data-plane
|
|
cinder-control-plane:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- cinder-logs
|
|
- cinder-api
|
|
- cinder-api-check
|
|
- cinder-v2-api-check
|
|
- cinder-api-endpoint
|
|
- cinder-scheduler
|
|
hints:
|
|
- keystone
|
|
- mysql
|
|
- rabbitmq
|
|
<% if not @storage_options["volumes_ceph"] then -%>
|
|
cinder-data-plane:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- cinder-volume-logs
|
|
- cinder-volume
|
|
<% end -%>
|
|
neutron-control-plane:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- neutron-logs
|
|
- neutron-api
|
|
- neutron-api-check
|
|
- neutron-api-endpoint
|
|
<% if not @contrail_plugin then -%>
|
|
- neutron-l3
|
|
- neutron-dhcp
|
|
- neutron-metadata
|
|
- neutron-openvswitch
|
|
<% end -%>
|
|
hints:
|
|
- keystone
|
|
- mysql
|
|
- rabbitmq
|
|
neutron-data-plane:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- neutron-logs-compute
|
|
<% if not @contrail_plugin then -%>
|
|
- neutron-openvswitch
|
|
<% end -%>
|
|
keystone:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- keystone-logs
|
|
- keystone-response-time
|
|
- keystone-public-api
|
|
- keystone-public-api-check
|
|
- keystone-public-api-endpoint
|
|
- keystone-admin-api
|
|
hints:
|
|
- mysql
|
|
- apache
|
|
glance:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- glance-logs
|
|
- glance-api
|
|
- glance-api-check
|
|
- glance-api-endpoint
|
|
- glance-registry-api
|
|
hints:
|
|
- keystone
|
|
- mysql
|
|
heat:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- heat-logs
|
|
- heat-api
|
|
- heat-api-check
|
|
- heat-api-endpoint
|
|
- heat-cfn-api
|
|
- heat-cfn-api-check
|
|
- heat-cfn-api-endpoint
|
|
- heat-cloudwatch-api
|
|
hints:
|
|
- cinder-control-plane
|
|
- glance
|
|
- keystone
|
|
- neutron-control-plane
|
|
- nova-control-plane
|
|
- mysql
|
|
- rabbitmq
|
|
horizon:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
<% if @tls_enabled then -%>
|
|
- horizon-https
|
|
<% else -%>
|
|
- horizon-web
|
|
<% end -%>
|
|
hints:
|
|
- keystone
|
|
- apache
|
|
<% if not @storage_options["objects_ceph"] then -%>
|
|
swift:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- swift-api
|
|
- swift-api-check
|
|
- swift-s3-api-check
|
|
- swift-api-endpoint
|
|
hints:
|
|
- keystone
|
|
<% end -%>
|
|
<% if @ceilometer_enabled -%>
|
|
ceilometer:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- ceilometer-api
|
|
- ceilometer-api-check
|
|
hints:
|
|
- keystone
|
|
- rabbitmq
|
|
<% end -%>
|
|
<% if @storage_options["volumes_ceph"] then -%>
|
|
ceph:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- ceph-mon-cluster
|
|
- ceph-mon-service
|
|
- ceph-osd-service
|
|
<% end -%>
|
|
<% if @monitor_elasticsearch -%>
|
|
elasticsearch:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- elasticsearch-cluster
|
|
- elasticsearch-service
|
|
<% end -%>
|
|
<% if @monitor_influxdb -%>
|
|
influxdb:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- influxdb-api-check
|
|
- influxdb-service
|
|
<% end -%>
|
|
pacemaker:
|
|
policy: highest_severity
|
|
group_by: member
|
|
members:
|
|
- pacemaker-service
|