fuel-plugin-lma-collector/deployment_scripts/puppet/modules/fuel_lma_collector/templates/alarming.yaml.erb

---
lma_collector:
  alarms:
    - name: 'cpu-critical-controller'
      description: 'The CPU usage is too high (controller node)'
      severity: 'critical'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_idle
            relational_operator: '<='
            threshold: 5
            window: 120
            periods: 0
            function: avg
          - metric: cpu_wait
            relational_operator: '>='
            threshold: 35
            window: 120
            periods: 0
            function: avg
    - name: 'cpu-warning-controller'
      description: 'The CPU usage is high (controller node)'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_idle
            relational_operator: '<='
            threshold: 15
            window: 120
            periods: 0
            function: avg
          - metric: cpu_wait
            relational_operator: '>='
            threshold: 25
            window: 120
            periods: 0
            function: avg
    - name: 'swap-usage-critical'
      description: 'There is no more swap free space'
      severity: 'critical'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: swap_free
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: max
    - name: 'swap-activity-warning'
      description: 'The swap activity is high'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: swap_io_in
            relational_operator: '>='
            threshold: 1048576 # 1 Mb/s
            window: 120
            periods: 0
            function: avg
          - metric: swap_io_out
            relational_operator: '>='
            threshold: 1048576 # 1 Mb/s
            window: 120
            periods: 0
            function: avg
    - name: 'swap-usage-warning'
      description: 'The swap free space is low'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: swap_percent_used
            relational_operator: '>='
            threshold: 0.8
            window: 60
            periods: 0
            function: avg
    - name: 'cpu-critical-compute'
      description: 'The CPU usage is too high (compute node)'
      severity: 'critical'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_wait
            relational_operator: '>='
            threshold: 30
            window: 120
            periods: 0
            function: avg
    - name: 'cpu-warning-compute'
      description: 'The CPU usage is high (compute node)'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_wait
            relational_operator: '>='
            threshold: 20
            window: 120
            periods: 0
            function: avg
    - name: 'cpu-critical-rabbitmq'
      description: 'The CPU usage is too high (RabbitMQ node)'
      severity: 'critical'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_idle
            relational_operator: '<='
            threshold: 5
            window: 120
            periods: 0
            function: avg
    - name: 'cpu-warning-rabbitmq'
      description: 'The CPU usage is high (RabbitMQ node)'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_idle
            relational_operator: '<='
            threshold: 15
            window: 120
            periods: 0
            function: avg
    - name: 'cpu-critical-mysql'
      description: 'The CPU usage is too high (MySQL node)'
      severity: 'critical'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_idle
            relational_operator: '<='
            threshold: 5
            window: 120
            periods: 0
            function: avg
    - name: 'cpu-warning-mysql'
      description: 'The CPU usage is high (MySQL node)'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_idle
            relational_operator: '<='
            threshold: 15
            window: 120
            periods: 0
            function: avg
    - name: 'cpu-critical-storage'
      description: 'The CPU usage is too high (storage node)'
      severity: 'critical'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_wait
            relational_operator: '>='
            threshold: 40
            window: 120
            periods: 0
            function: avg
          - metric: cpu_idle
            relational_operator: '<='
            threshold: 5
            window: 120
            periods: 0
            function: avg
    - name: 'cpu-warning-storage'
      description: 'The CPU usage is high (storage node)'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_wait
            relational_operator: '>='
            threshold: 30
            window: 120
            periods: 0
            function: avg
          - metric: cpu_idle
            relational_operator: '<='
            threshold: 15
            window: 120
            periods: 0
            function: avg
    - name: 'cpu-critical-default'
      description: 'The CPU usage is too high'
      severity: 'critical'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: cpu_wait
            relational_operator: '>='
            threshold: 35
            window: 120
            periods: 0
            function: avg
          - metric: cpu_idle
            relational_operator: '<='
            threshold: 5
            window: 120
            periods: 0
            function: avg
    - name: 'rabbitmq-disk-limit-critical'
      description: 'RabbitMQ has reached the free disk threshold. All producers are blocked'
      severity: 'critical'
      # If the local RabbitMQ instance is down, it will be caught by the
      # rabbitmq-check alarm
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: rabbitmq_remaining_disk
            relational_operator: '<='
            threshold: 0
            window: 20
            periods: 0
            function: min
    - name: 'rabbitmq-disk-limit-warning'
      description: 'RabbitMQ is getting close to the free disk threshold'
      severity: 'warning'
      # If the local RabbitMQ instance is down, it will be caught by the
      # rabbitmq-check alarm
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: rabbitmq_remaining_disk
            relational_operator: '<='
            threshold: 104857600 # 100MB
            window: 20
            periods: 0
            function: min
    - name: 'rabbitmq-memory-limit-critical'
      description: 'RabbitMQ has reached the memory threshold. All producers are blocked'
      severity: 'critical'
      # If the local RabbitMQ instance is down, it will be caught by the
      # rabbitmq-check alarm
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: rabbitmq_remaining_memory
            relational_operator: '<='
            threshold: 0
            window: 20
            periods: 0
            function: min
    - name: 'rabbitmq-memory-limit-warning'
      description: 'RabbitMQ is getting close to the memory threshold'
      severity: 'warning'
      # If the local RabbitMQ instance is down, it will be caught by the
      # rabbitmq-check alarm
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: rabbitmq_remaining_memory
            relational_operator: '<='
            threshold: 104857600 # 100MB
            window: 20
            periods: 0
            function: min
    - name: 'rabbitmq-queue-warning'
      description: 'The number of outstanding messages is too high'
      severity: 'warning'
      # If the local RabbitMQ instance is down, it will be caught by the
      # rabbitmq-check alarm
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: rabbitmq_messages
            relational_operator: '>='
            threshold: 200
            window: 120
            periods: 0
            function: avg
    - name: 'rabbitmq-pacemaker-down'
      description: 'The RabbitMQ cluster is down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        logical_operator: 'and'
        rules:
          - metric: pacemaker_resource_percent
            fields:
              resource: rabbitmq
              status: up
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'rabbitmq-pacemaker-critical'
      description: 'The RabbitMQ cluster is critical because less than half of the nodes are up'
      severity: 'critical'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        logical_operator: 'and'
        rules:
          - metric: pacemaker_resource_percent
            fields:
              resource: rabbitmq
              status: up
            relational_operator: '<'
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'rabbitmq-pacemaker-warning'
      description: 'The RabbitMQ cluster is degraded because some RabbitMQ nodes are missing'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        logical_operator: 'and'
        rules:
          - metric: pacemaker_resource_percent
            fields:
              resource: rabbitmq
              status: up
            relational_operator: '<'
            threshold: 100
            window: 60
            periods: 0
            function: last
    - name: 'apache-warning'
      description: 'There is no Apache idle workers available'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: apache_idle_workers
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: min
    - name: 'apache-check'
      description: 'Apache cannot be checked'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: apache_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'log-fs-warning'
      description: "The log filesystem's free space is low"
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/var/log'
            relational_operator: '<'
            threshold: 10
            window: 60
            periods: 0
            function: min
    - name: 'log-fs-critical'
      description: "The log filesystem's free space is too low"
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/var/log'
            relational_operator: '<'
            threshold: 5
            window: 60
            periods: 0
            function: min
    - name: 'root-fs-warning'
      description: "The root filesystem's free space is low"
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/'
            relational_operator: '<'
            threshold: 10
            window: 60
            periods: 0
            function: min
    - name: 'root-fs-critical'
      description: "The root filesystem's free space is too low"
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/'
            relational_operator: '<'
            threshold: 5
            window: 60
            periods: 0
            function: min
    - name: 'mysql-fs-warning'
      description: "The MySQL filesystem's free space is low"
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/var/lib/mysql'
            relational_operator: '<'
            threshold: 10
            window: 60
            periods: 0
            function: min
    - name: 'mysql-fs-critical'
      description: "The MySQL filesystem's free space is too low"
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/var/lib/mysql'
            relational_operator: '<'
            threshold: 5
            window: 60
            periods: 0
            function: min
    - name: 'nova-fs-warning'
      description: "The filesystem's free space is low (compute node)"
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/var/lib/nova'
            relational_operator: '<'
            threshold: 10
            window: 60
            periods: 0
            function: min
    - name: 'nova-fs-critical'
      description: "The filesystem's free space is too low (compute node)"
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/var/lib/nova'
            relational_operator: '<'
            threshold: 5
            window: 60
            periods: 0
            function: min
    - name: 'other-fs-warning'
      description: "The filesystem's free space is low"
      severity: 'warning'
      enabled: 'true'
      no_data_policy: 'okay'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '!= /var/lib/nova && != /var/log && != /var/lib/mysql && != / && !~ ceph%-%d+$'
            group_by: [fs]
            relational_operator: '<'
            threshold: 10
            window: 60
            periods: 0
            function: min
    - name: 'other-fs-critical'
      description: "The filesystem's free space is too low"
      severity: 'critical'
      enabled: 'true'
      no_data_policy: 'okay'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '!= /var/lib/nova && != /var/log && != /var/lib/mysql && != / && !~ ceph%-%d+$'
            group_by: [fs]
            relational_operator: '<'
            threshold: 5
            window: 60
            periods: 0
            function: min
    - name: 'osd-disk-critical'
      description: "The filesystem's free space is too low (OSD disk)"
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              # Real FS is /var/lib/ceph/osd/ceph-0 but Collectd substituted '/' by '-'
              fs: '=~ ceph/%d+$'
            group_by: [fs]
            relational_operator: '<'
            threshold: 5
            window: 60
            periods: 0
            function: min
    - name: 'nova-api-http-errors'
      description: 'Too many 5xx HTTP errors have been detected on nova-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: haproxy_backend_response_5xx
            fields:
              backend: 'nova-api'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 1
            function: diff
    - name: 'nova-logs-error'
      description: 'Too many errors have been detected in Nova logs'
      severity: 'warning'
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: log_messages
            fields:
              service: 'nova'
              level: 'error'
            relational_operator: '>'
            threshold: 0.1
            window: 70
            periods: 0
            function: max
    - name: 'heat-api-http-errors'
      description: 'Too many 5xx HTTP errors have been detected on heat-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: haproxy_backend_response_5xx
            fields:
              backend: 'heat-api'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 1
            function: diff
    - name: 'heat-logs-error'
      description: 'Too many errors have been detected in Heat logs'
      severity: 'warning'
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: log_messages
            fields:
              service: 'heat'
              level: 'error'
            relational_operator: '>'
            threshold: 0.1
            window: 70
            periods: 0
            function: max
    - name: 'swift-api-http-errors'
      description: 'Too many 5xx HTTP errors have been detected on swift-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: haproxy_backend_response_5xx
            fields:
              backend: 'swift-api || object-storage'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 1
            function: diff
    - name: 'swift-logs-error'
      description: 'Too many errors have been detected in Swift logs'
      severity: 'warning'
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: log_messages
            fields:
              service: 'swift'
              level: 'error'
            relational_operator: '>'
            threshold: 0.1
            window: 70
            periods: 0
            function: max
    - name: 'cinder-api-http-errors'
      description: 'Too many 5xx HTTP errors have been detected on cinder-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: haproxy_backend_response_5xx
            fields:
              backend: 'cinder-api'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 1
            function: diff
    - name: 'cinder-logs-error'
      description: 'Too many errors have been detected in Cinder logs'
      severity: 'warning'
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: log_messages
            fields:
              service: 'cinder'
              level: 'error'
            relational_operator: '>'
            threshold: 0.1
            window: 70
            periods: 0
            function: max
    - name: 'glance-api-http-errors'
      description: 'Too many 5xx HTTP errors have been detected on glance-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: haproxy_backend_response_5xx
            fields:
              backend: 'glance-api'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 1
            function: diff
    - name: 'glance-logs-error'
      description: 'Too many errors have been detected in Glance logs'
      severity: 'warning'
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: log_messages
            fields:
              service: 'glance'
              level: 'error'
            relational_operator: '>'
            threshold: 0.1
            window: 70
            periods: 0
            function: max
    - name: 'neutron-api-http-errors'
      description: 'Too many 5xx HTTP errors have been detected on neutron-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: haproxy_backend_response_5xx
            fields:
              backend: 'neutron-api'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 1
            function: diff
    - name: 'neutron-logs-error'
      description: 'Too many errors have been detected in Neutron logs'
      severity: 'warning'
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: log_messages
            fields:
              service: 'neutron'
              level: 'error'
            relational_operator: '>'
            threshold: 0.1
            window: 70
            periods: 0
            function: max
    - name: 'keystone-response-time-duration'
      description: 'Keystone API is too slow'
      severity: 'warning'
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: openstack_keystone_http_response_times
            fields:
              http_method: '== GET || == POST'
              http_status: '!= 5xx'
            relational_operator: '>'
            threshold: 0.3
            window: 60
            periods: 0
            value: upper_90
            function: max
    - name: 'keystone-public-api-http-errors'
      description: 'Too many 5xx HTTP errors have been detected on keystone-public-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: haproxy_backend_response_5xx
            fields:
              backend: 'keystone-public-api'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 1
            function: diff
    - name: 'keystone-admin-api-http-errors'
      description: 'Too many 5xx HTTP errors have been detected on keystone-admin-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: haproxy_backend_response_5xx
            fields:
              backend: 'keystone-admin-api'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 1
            function: diff
    - name: 'horizon-web-http-errors'
      description: 'Too many 5xx HTTP errors have been detected on horizon'
      severity: 'warning'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: haproxy_backend_response_5xx
            fields:
              backend: 'horizon-web || horizon-https'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 1
            function: diff
    - name: 'keystone-logs-error'
      description: 'Too many errors have been detected in Keystone logs'
      severity: 'warning'
      no_data_policy: 'okay'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: log_messages
            fields:
              service: 'keystone'
              level: 'error'
            relational_operator: '>'
            threshold: 0.1
            window: 70
            periods: 0
            function: max
    - name: 'mysql-node-connected'
      description: 'The MySQL service has lost connectivity with the other nodes'
      severity: 'critical'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: mysql_cluster_connected
            relational_operator: '=='
            threshold: 0
            window: 30
            periods: 1
            function: min
    - name: 'mysql-node-ready'
      description: "The MySQL service isn't ready to serve queries"
      severity: 'critical'
      enabled: 'true'
      trigger:
        logical_operator: 'or'
        rules:
          - metric: mysql_cluster_ready
            relational_operator: '=='
            threshold: 0
            window: 30
            periods: 1
            function: min
    - name: 'ceph-health-critical'
      description: 'Ceph health is critical'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: ceph_health
            relational_operator: '=='
            threshold: 3 # HEALTH_ERR
            window: 60
            function: max
    - name: 'ceph-health-warning'
      description: 'Ceph health is warning'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: ceph_health
            relational_operator: '=='
            threshold: 2 # HEALTH_WARN
            window: 60
            function: max
    - name: 'ceph-capacity-critical'
      description: 'Ceph free capacity is too low'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: ceph_pool_total_percent_free
            relational_operator: '<'
            threshold: 2
            window: 60
            function: max
    - name: 'ceph-capacity-warning'
      description: 'Ceph free capacity is low'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: ceph_pool_total_percent_free
            relational_operator: '<'
            threshold: 5
            window: 60
            function: max
    - name: 'elasticsearch-health-critical'
      description: 'Elasticsearch cluster health is critical'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: elasticsearch_cluster_health
            relational_operator: '=='
            threshold: 3 # red
            window: 60
            function: min
    - name: 'elasticsearch-health-warning'
      description: 'Elasticsearch health is warning'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: elasticsearch_cluster_health
            relational_operator: '=='
            threshold: 2 # yellow
            window: 60
            function: min
    - name: 'elasticsearch-fs-warning'
      description: "The filesystem's free space is low (Elasticsearch node)"
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/opt/es/data' # Real FS is /opt/es-data but Collectd substituted '/' by '-'
            relational_operator: '<'
            threshold: 20 # The low watermark for disk usage is 85% by default
            window: 60
            periods: 0
            function: min
    - name: 'elasticsearch-fs-critical'
      description: "The filesystem's free space is too low (Elasticsearch node)"
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/opt/es/data' # Real FS is /opt/es-data but Collectd substituted '/' by '-'
            relational_operator: '<'
            threshold: 15 # The high watermark for disk usage is 90% by default
            window: 60
            periods: 0
            function: min
    - name: 'influxdb-fs-warning'
      description: "The filesystem's free space is low (InfluxDB node)"
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/var/lib/influxdb'
            relational_operator: '<'
            threshold: 10
            window: 60
            periods: 0
            function: min
    - name: 'influxdb-fs-critical'
      description: "The filesystem's free space is too low (InfluxDB node)"
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: fs_space_percent_free
            fields:
              fs: '/var/lib/influxdb'
            relational_operator: '<'
            threshold: 5
            window: 60
            periods: 0
            function: min
    - name: 'haproxy-check'
      description: "HAProxy cannot be checked"
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'rabbitmq-check'
      description: "RabbitMQ cannot be checked"
      # This alarm's severity is warning because the effective status of the
      # RabbitMQ cluster is computed by rabbitmq-pacemaker-* alarms.
      # This alarm is still useful because it will report the node(s) on which
      # RabbitMQ isn't running.
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: rabbitmq_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'ceph-mon-check'
      description: "Ceph monitor cannot be checked"
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: ceph_mon_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'ceph-osd-check'
      description: "Ceph OSD cannot be checked"
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: ceph_osd_check
            relational_operator: '=='
            threshold: 0
            window: 80  # The metric interval collection is 60s
            periods: 0
            function: last
    - name: 'pacemaker-check'
      description: "Pacemaker cannot be checked"
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: pacemaker_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'elasticsearch-check'
      description: "Elasticsearch cannot be checked"
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: elasticsearch_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'influxdb-check'
      description: "InfluxDB cannot be checked"
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: influxdb_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'libvirt-check'
      description: "Libvirt cannot be checked"
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: libvirt_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'memcached-check'
      description: "memcached cannot be checked"
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: memcached_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'mysql-check'
      description: "MySQL cannot be checked"
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: mysql_check
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'network-warning-dropped-rx'
      description: "Some received packets have been dropped"
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: if_dropped_rx
            relational_operator: '>'
            threshold: 100
            window: 60
            periods: 0
            function: avg
    - name: 'network-critical-dropped-rx'
      description: "Too many received packets have been dropped"
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: if_dropped_rx
            relational_operator: '>'
            threshold: 1000
            window: 60
            periods: 0
            function: avg
    - name: 'network-warning-dropped-tx'
      description: "Some transmitted packets have been dropped"
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: if_dropped_tx
            relational_operator: '>'
            threshold: 100
            window: 60
            periods: 0
            function: avg
    - name: 'network-critical-dropped-tx'
      description: "Too many transmitted packets have been dropped"
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: if_dropped_tx
            relational_operator: '>'
            threshold: 1000
            function: avg
            window: 60
    - name: 'instance-creation-time-warning'
      description: "Instance creation takes too much time"
      severity: 'warning'
      no_data_policy: 'okay' # This is a sporadic metric
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_instance_creation_time
            relational_operator: '>'
            threshold: 20
            window: 600
            periods: 0
            function: avg
    - name: 'hdd-errors-critical'
      description: 'Errors on hard drive(s) have been detected'
      severity: 'critical'
      enabled: 'true'
      no_data_policy: okay
      trigger:
        rules:
          - metric: hdd_errors_rate
            group_by: ['device']
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: max
    - name: 'total-nova-free-vcpu-warning'
      description: 'There is none VCPU available for new instances'
      severity: 'warning'
      enabled: 'true'
      no_data_policy: skip # the metric is only collected from the aggregator node
      trigger:
        rules:
          - metric: openstack_nova_total_free_vcpus
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: max
    - name: 'total-nova-free-memory-warning'
      description: 'There is none memory available for new instances'
      severity: 'warning'
      enabled: 'true'
      no_data_policy: skip # the metric is only collected from the aggregator node
      trigger:
        rules:
          - metric: openstack_nova_total_free_ram
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: max
    - name: 'nova-aggregates-free-memory-warning'
      description: "The nova aggregates free memory percent is low"
      severity: 'warning'
      enabled: 'true'
      no_data_policy: skip # the metric is only collected from the aggregator node
      trigger:
        rules:
          - metric: openstack_nova_aggregate_free_ram_percent
            group_by: [aggregate]
            relational_operator: '<'
            threshold: 10.0
            window: 60
            periods: 0
            function: min
    - name: 'nova-aggregates-free-memory-critical'
      description: "The nova aggregates free memory percent is too low"
      severity: 'critical'
      enabled: 'true'
      no_data_policy: skip # the metric is only collected from the aggregator node
      trigger:
        rules:
          - metric: openstack_nova_aggregate_free_ram_percent
            group_by: [aggregate]
            relational_operator: '<'
            threshold: 1.0
            window: 60
            periods: 0
            function: min

    # Adds alarm on local check for OpenStack services endpoint
    - name: 'cinder-api-local-endpoint'
      description: 'Cinder API is locally down'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_local_api
            fields:
              service: 'cinder-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'glance-api-local-endpoint'
      description: 'Glance API is locally down'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_local_api
            fields:
              service: 'glance-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-api-local-endpoint'
      description: 'Heat API is locally down'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_local_api
            fields:
              service: 'heat-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-cfn-api-local-endpoint'
      description: 'Heat CFN API is locally down'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_local_api
            fields:
              service: 'heat-cfn-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'keystone-public-api-local-endpoint'
      description: 'Keystone public API is locally down'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_local_api
            fields:
              service: 'keystone-public-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-api-local-endpoint'
      description: 'Neutron API is locally down'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_local_api
            fields:
              service: 'neutron-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-api-local-endpoint'
      description: 'Nova API is locally down'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_local_api
            fields:
              service: 'nova-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'swift-api-local-endpoint'
      description: 'Swift API is locally down'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_local_api
            fields:
              service: 'swift-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last

    # Following are the OpenStack service check API definitions and
    # also InfluxDB API
    - name: 'influxdb-api-check-failed'
      description: 'Endpoint check for InfluxDB is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: http_check
            fields:
              service: 'influxdb-cluster'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-api-check-failed'
      description: 'Endpoint check for nova-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'nova-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-api-check-failed'
      description: 'Endpoint check for neutron-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'neutron-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'cinder-api-check-failed'
      description: 'Endpoint check for cinder-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'cinder-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'cinder-v2-api-check-failed'
      description: 'Endpoint check for cinder-v2-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'cinder-v2-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'glance-api-check-failed'
      description: 'Endpoint check for glance-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'glance-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-api-check-failed'
      description: 'Endpoint check for heat-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'heat-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-cfn-api-check-failed'
      description: 'Endpoint check for heat-cfn-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'heat-cfn-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'swift-api-check-failed'
      description: 'Endpoint check for swift-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'swift-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'swift-s3-api-check-failed'
      description: 'Endpoint check for swift-s3-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'swift-s3-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'keystone-public-api-check-failed'
      description: 'Endpoint check for keystone-public-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'keystone-public-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'ceilometer-api-check-failed'
      description: 'Endpoint check for ceilometer-api is failed'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the controller running the management VIP
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_check_api
            fields:
              service: 'ceilometer-api'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last

    # Following are the AFD generated to check API backends
    # All backends are down
    - name: 'elasticsearch-api-backends-all-down'
      description: 'All Elasticsearch backends are down'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'elasticsearch-rest'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'kibana-api-backends-all-down'
      description: 'All API backends are down for Kibana'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'kibana'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'influxdb-api-backends-all-down'
      description: 'All API backends are down for InfluxDB'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'influxdb'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'grafana-api-backends-all-down'
      description: 'All API backends are down for Grafana'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'grafana'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'glance-registry-api-backends-all-down'
      description: 'All API backends are down for glance-registry-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'glance-registry-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-api-backends-all-down'
      description: 'All API backends are down for nova-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'nova-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'cinder-api-backends-all-down'
      description: 'All API backends are down for cinder-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'cinder-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'object-storage-api-backends-all-down'
      description: 'All API backends are down for object-storage'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'object-storage'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-cfn-api-backends-all-down'
      description: 'All API backends are down for heat-cfn-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'heat-cfn-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'horizon-web-api-backends-all-down'
      description: 'All API backends are down for horizon-web'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'horizon-web || horizon-https'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-novncproxy-websocket-api-backends-all-down'
      description: 'All API backends are down for nova-novncproxy-websocket'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'nova-novncproxy-websocket'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-api-backends-all-down'
      description: 'All API backends are down for heat-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'heat-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'keystone-public-api-backends-all-down'
      description: 'All API backends are down for keystone-public-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'keystone-public-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-cloudwatch-api-backends-all-down'
      description: 'All API backends are down for heat-cloudwatch-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'heat-cloudwatch-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-metadata-api-backends-all-down'
      description: 'All API backends are down for nova-metadata-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'nova-metadata-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'mysqld-tcp-api-backends-all-down'
      description: 'All API backends are down for mysqld-tcp'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'mysqld-tcp'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'keystone-admin-api-backends-all-down'
      description: 'All API backends are down for keystone-admin-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'keystone-admin-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'glance-api-backends-all-down'
      description: 'All API backends are down for glance-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'glance-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-api-backends-all-down'
      description: 'All API backends are down for neutron-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'neutron-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'swift-api-backends-all-down'
      description: 'All API backends are down for swift-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'swift-api || object-storage'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'ceilometer-api-backends-all-down'
      description: 'All API backends are down for ceilometer-api'
      severity: 'down'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'ceilometer-api'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    # At least one backend is down
    - name: 'elasticsearch-api-backends-one-down'
      description: 'At least one API backend is down for elasticsearch'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'elasticsearch-rest'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'kibana-api-backends-one-down'
      description: 'At least one API backend is down for kibana'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'kibana'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'influxdb-api-backends-one-down'
      description: 'At least one API backend is down for influxdb'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'influxdb'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'grafana-api-backends-one-down'
      description: 'At least one API backend is down for grafana'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'grafana'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'glance-registry-api-backends-one-down'
      description: 'At least one API backend is down for glance-registry-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'glance-registry-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-api-backends-one-down'
      description: 'At least one API backend is down for nova-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'nova-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'cinder-api-backends-one-down'
      description: 'At least one API backend is down for cinder-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'cinder-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'object-storage-api-backends-one-down'
      description: 'At least one API backend is down for object-storage'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'object-storage'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-cfn-api-backends-one-down'
      description: 'At least one API backend is down for heat-cfn-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'heat-cfn-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'horizon-web-api-backends-one-down'
      description: 'At least one API backend is down for horizon-web'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'horizon-web || horizon-https'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-novncproxy-websocket-api-backends-one-down'
      description: 'At least one API backend is down for nova-novncproxy-websocket'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'nova-novncproxy-websocket'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-api-backends-one-down'
      description: 'At least one API backend is down for heat-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'heat-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'keystone-public-api-backends-one-down'
      description: 'At least one API backend is down for keystone-public-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'keystone-public-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'heat-cloudwatch-api-backends-one-down'
      description: 'At least one API backend is down for heat-cloudwatch-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'heat-cloudwatch-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-metadata-api-backends-one-down'
      description: 'At least one API backend is down for nova-metadata-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'nova-metadata-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'mysqld-tcp-api-backends-one-down'
      description: 'At least one API backend is down for mysqld-tcp'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'mysqld-tcp'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'keystone-admin-api-backends-one-down'
      description: 'At least one API backend is down for keystone-admin-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'keystone-admin-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'glance-api-backends-one-down'
      description: 'At least one API backend is down for glance-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'glance-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-api-backends-one-down'
      description: 'At least one API backend is down for neutron-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'neutron-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'swift-api-backends-one-down'
      description: 'At least one API backend is down for swift-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'swift-api || object-storage'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'ceilometer-api-backends-one-down'
      description: 'At least one API backend is down for ceilometer-api'
      severity: 'warning'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers
            fields:
              backend: 'ceilometer-api'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    # Less than 50% of backends are up
    - name: 'elasticsearch-api-backends-majority-down'
      description: 'Less than 50% of backends are up for elasticsearch'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'elasticsearch-rest'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'kibana-api-backends-majority-down'
      description: 'Less than 50% of backends are up for kibana'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'kibana'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'influxdb-api-backends-majority-down'
      description: 'Less than 50% of backends are up for influxdb'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'influxdb'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'grafana-api-backends-majority-down'
      description: 'Less than 50% of backends are up for grafana'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'grafana'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'glance-registry-api-backends-majority-down'
      description: 'Less than 50% of backends are up for glance-registry-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'glance-registry-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'nova-api-backends-majority-down'
      description: 'Less than 50% of backends are up for nova-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'nova-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'cinder-api-backends-majority-down'
      description: 'Less than 50% of backends are up for cinder-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'cinder-api'
              state: 'up'

            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'object-storage-api-backends-majority-down'
      description: 'Less than 50% of backends are up for object-storage'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'object-storage'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'heat-cfn-api-backends-majority-down'
      description: 'Less than 50% of backends are up for heat-cfn-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'heat-cfn-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'horizon-web-api-backends-majority-down'
      description: 'Less than 50% of backends are up for horizon-web'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'horizon-web || horizon-https'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'nova-novncproxy-websocket-api-backends-majority-down'
      description: 'Less than 50% of backends are up for nova-novncproxy-websocket'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'nova-novncproxy-websocket'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'heat-api-backends-majority-down'
      description: 'Less than 50% of backends are up for heat-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'heat-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'keystone-public-api-backends-majority-down'
      description: 'Less than 50% of backends are up for keystone-public-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'keystone-public-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'heat-cloudwatch-api-backends-majority-down'
      description: 'Less than 50% of backends are up for heat-cloudwatch-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'heat-cloudwatch-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'nova-metadata-api-backends-majority-down'
      description: 'Less than 50% of backends are up for nova-metadata-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'nova-metadata-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'mysqld-tcp-api-backends-majority-down'
      description: 'Less than 50% of backends are up for mysqld-tcp'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'mysqld-tcp'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'keystone-admin-api-backends-majority-down'
      description: 'Less than 50% of backends are up for keystone-admin-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'keystone-admin-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'glance-api-backends-majority-down'
      description: 'Less than 50% of backends are up for glance-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'glance-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'neutron-api-backends-majority-down'
      description: 'Less than 50% of backends are up for neutron-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'neutron-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'swift-api-backends-majority-down'
      description: 'Less than 50% of backends are up for swift-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'swift-api || object-storage'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'ceilometer-api-backends-majority-down'
      description: 'Less than 50% of backends are up for ceilometer-api'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: haproxy_backend_servers_percent
            fields:
              backend: 'ceilometer-api'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last

    # Following are the AFD generated to check workers
    # All workers are down
    - name: 'nova-scheduler-all-down'
      description: 'All Nova schedulers are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'scheduler'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-cert-all-down'
      description: 'All Nova certs are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'cert'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-consoleauth-all-down'
      description: 'All Nova consoleauths are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'consoleauth'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-compute-all-down'
      description: 'All Nova computes are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'compute'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-conductor-all-down'
      description: 'All Nova conductors are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'conductor'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'cinder-scheduler-all-down'
      description: 'All Cinder schedulers are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_cinder_services
            fields:
              service: 'scheduler'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'cinder-volume-all-down'
      description: 'All Cinder volumes are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_cinder_services
            fields:
              service: 'volume'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-l3-all-down'
      description: 'All Neutron L3 agents are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents
            fields:
              service: 'l3'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-dhcp-all-down'
      description: 'All Neutron DHCP agents are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents
            fields:
              service: 'dhcp'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-metadata-all-down'
      description: 'All Neutron metadata agents are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents
            fields:
              service: 'metadata'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-openvswitch-all-down'
      description: 'All Neutron openvswitch agents are down'
      severity: 'down'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents
            fields:
              service: 'openvswitch'
              state: 'up'
            relational_operator: '=='
            threshold: 0
            window: 60
            periods: 0
            function: last
    # At least one backend is down
    - name: 'nova-scheduler-one-down'
      description: 'At least one Nova scheduler is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'scheduler'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-cert-one-down'
      description: 'At least one Nova cert is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'cert'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-consoleauth-one-down'
      description: 'At least one Nova consoleauth is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'consoleauth'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-compute-one-down'
      description: 'At least one Nova compute is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'compute'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'nova-conductor-one-down'
      description: 'At least one Nova conductor is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services
            fields:
              service: 'conductor'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'cinder-scheduler-one-down'
      description: 'At least one Cinder scheduler is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_cinder_services
            fields:
              service: 'scheduler'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'cinder-volume-one-down'
      description: 'At least one Cinder volume is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_cinder_services
            fields:
              service: 'volume'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-l3-one-down'
      description: 'At least one L3 agent is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents
            fields:
              service: 'l3'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-dhcp-one-down'
      description: 'At least one DHCP agent is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents
            fields:
              service: 'dhcp'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-metadata-one-down'
      description: 'At least one metadata agents is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents
            fields:
              service: 'metadata'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    - name: 'neutron-openvswitch-one-down'
      description: 'At least one openvswitch agents is down'
      severity: 'warning'
      no_data_policy: 'skip' # the metric is only collected from the DC node
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents
            fields:
              service: 'openvswitch'
              state: 'down'
            relational_operator: '>'
            threshold: 0
            window: 60
            periods: 0
            function: last
    # Less than 50% of service are up (compared to up and down).
    - name: 'nova-scheduler-majority-down'
      description: 'Less than 50% of Nova schedulers are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services_percent
            fields:
              service: 'scheduler'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'nova-cert-majority-down'
      description: 'Less than 50% of Nova certs are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services_percent
            fields:
              service: 'cert'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'nova-consoleauth-majority-down'
      description: 'Less than 50% of Nova consoleauths are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services_percent
            fields:
              service: 'consoleauth'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'nova-compute-majority-down'
      description: 'Less than 50% of Nova computes are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services_percent
            fields:
              service: 'compute'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'nova-conductor-majority-down'
      description: 'Less than 50% of Nova conductors are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_nova_services_percent
            fields:
              service: 'conductor'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'cinder-scheduler-majority-down'
      description: 'Less than 50% of Cinder schedulers are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_cinder_services_percent
            fields:
              service: 'scheduler'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'cinder-volume-majority-down'
      description: 'Less than 50% of Cinder volumes are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_cinder_services_percent
            fields:
              service: 'volume'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'neutron-l3-majority-down'
      description: 'Less than 50% of Neutron L3 agents are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents_percent
            fields:
              service: 'l3'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'neutron-dhcp-majority-down'
      description: 'Less than 50% of Neutron DHCP agents are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents_percent
            fields:
              service: 'dhcp'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'neutron-metadata-majority-down'
      description: 'Less than 50% of Neutron metadata agents are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents_percent
            fields:
              service: 'metadata'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last
    - name: 'neutron-openvswitch-majority-down'
      description: 'Less than 50% of Neutron openvswitch agents are up'
      severity: 'critical'
      enabled: 'true'
      trigger:
        rules:
          - metric: openstack_neutron_agents_percent
            fields:
              service: 'openvswitch'
              state: 'up'
            relational_operator: '<='
            threshold: 50
            window: 60
            periods: 0
            function: last

  # Definition of the AFD node filters
  node_cluster_alarms:
    controller:
        apply_to_node: controller
        alerting: enabled
        members:
            cpu:
                alarms: ['cpu-critical-controller', 'cpu-warning-controller']
            network-rx:
               alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
            network-tx:
                alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
            root-fs:
                alarms: ['root-fs-critical', 'root-fs-warning']
            log-fs:
                alarms: ['log-fs-critical', 'log-fs-warning']
            other-fs:
                alarms: ['other-fs-critical', 'other-fs-warning']
            swap:
                alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
            hdd-errors:
                alerting: enabled_with_notification
                alarms: ['hdd-errors-critical']
<% if @detach_rabbitmq_enabled -%>
    rabbitmq-nodes:
        apply_to_node: rabbitmq-nodes
        alerting: enabled
        members:
            cpu:
                alarms: ['cpu-critical-rabbitmq', 'cpu-warning-rabbitmq']
            network-rx:
                alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
            network-tx:
                alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
            root-fs:
                alarms: ['root-fs-critical', 'root-fs-warning']
            other-fs:
                alarms: ['other-fs-critical', 'other-fs-warning']
            swap:
                alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
            hdd-errors:
                alerting: enabled_with_notification
                alarms: ['hdd-errors-critical']
<% end -%>
    mysql-nodes:
        apply_to_node: mysql-nodes
        alerting: enabled
        members:
<% if @detach_database_enabled -%>
            cpu:
                alarms: ['cpu-critical-mysql', 'cpu-warning-mysql']
            network-rx:
                alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
            network-tx:
                alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
            root-fs:
                alarms: ['root-fs-critical', 'root-fs-warning']
            other-fs:
                alarms: ['other-fs-critical', 'other-fs-warning']
            swap:
                alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
            hdd-errors:
                alerting: enabled_with_notification
                alarms: ['hdd-errors-critical']
<% end -%>
            mysql-fs:
                alarms: ['mysql-fs-critical', 'mysql-fs-warning']
    compute:
        apply_to_node: compute
        alerting: enabled
        members:
            cpu:
                alarms: ['cpu-critical-compute', 'cpu-warning-compute']
            network-rx:
                alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
            network-tx:
                alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
            root-fs:
                alarms: ['root-fs-critical', 'root-fs-warning']
            nova-fs:
                alarms: ['nova-fs-critical', 'nova-fs-warning']
            other-fs:
                alarms: ['other-fs-critical', 'other-fs-warning']
            swap:
                alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
            hdd-errors:
                alerting: enabled_with_notification
                alarms: ['hdd-errors-critical']
    storage:
        apply_to_node: storage
        alerting: enabled
        members:
            cpu:
                alarms: ['cpu-critical-storage', 'cpu-warning-storage']
            network-rx:
                alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
            network-tx:
                alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
            root-fs:
                alarms: ['root-fs-critical', 'root-fs-warning']
            other-fs:
                alarms: ['other-fs-critical', 'other-fs-warning']
            swap:
                alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
            hdd-errors:
                alerting: enabled_with_notification
                alarms: ['hdd-errors-critical']
<% if @storage_options["volumes_ceph"] then -%>
            osd-disk:
                alarms: ['osd-disk-critical']
<% end -%>
    elasticsearch-nodes:
        apply_to_node: elasticsearch-nodes
        alerting: enabled
        members:
            cpu:
                alarms: ['cpu-critical-default']
            network-rx:
                alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
            network-tx:
                alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
            root-fs:
                alarms: ['root-fs-critical', 'root-fs-warning']
            data-fs:
                alarms: ['elasticsearch-fs-critical', 'elasticsearch-fs-warning']
            swap:
                alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
            hdd-errors:
                alerting: enabled_with_notification
                alarms: ['hdd-errors-critical']
    influxdb-nodes:
        apply_to_node: influxdb-nodes
        alerting: enabled
        members:
            cpu:
                alarms: ['cpu-critical-default']
            network-rx:
                alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
            network-tx:
                alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
            root-fs:
                alarms: ['root-fs-critical', 'root-fs-warning']
            data-fs:
                alarms: ['influxdb-fs-critical', 'influxdb-fs-warning']
            swap:
                alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
            hdd-errors:
                alerting: enabled_with_notification
                alarms: ['hdd-errors-critical']
    # This is the default members configured for all nodes with unknown roles
    default:
        apply_to_node: default
        # Operator wants to receive alert notification for individual nodes
        alerting: enabled_with_notification
        members:
            cpu:
                alarms: ['cpu-critical-default']
            network-rx:
                alarms: ['network-critical-dropped-rx', 'network-warning-dropped-rx']
            network-tx:
                alarms: ['network-critical-dropped-tx', 'network-warning-dropped-tx']
            root-fs:
                alarms: ['root-fs-critical', 'root-fs-warning']
            other-fs:
                alarms: ['other-fs-critical', 'other-fs-warning']
            swap:
                alarms: ['swap-usage-critical', 'swap-activity-warning', 'swap-usage-warning']
            hdd-errors:
                alarms: ['hdd-errors-critical']

  # Definition of the AFD service filters
  service_cluster_alarms:
    rabbitmq-cluster:
        apply_to_node: rabbitmq-nodes
        alerting: enabled
        members:
            pacemaker:
                alarms: ['rabbitmq-pacemaker-down', 'rabbitmq-pacemaker-critical', 'rabbitmq-pacemaker-warning']
            queue:
                alarms: ['rabbitmq-queue-warning']
            memory:
                alarms: ['rabbitmq-memory-limit-critical', 'rabbitmq-memory-limit-warning']
            disk:
                alarms: ['rabbitmq-disk-limit-critical', 'rabbitmq-disk-limit-warning']
    rabbitmq-service:
        apply_to_node: rabbitmq-nodes
        alerting: enabled
        members:
            check:
                alarms: ['rabbitmq-check']
    mysql:
        apply_to_node: mysql-nodes
        alerting: enabled
        members:
            node-status:
                alarms: ['mysql-node-connected', 'mysql-node-ready']
            check:
                alarms: ['mysql-check']
    apache:
        apply_to_node: controller
        alerting: enabled
        members:
            worker:
                alarms: ['apache-warning']
            check:
                alarms: ['apache-check']
    nova-api:
        apply_to_node: controller
        alerting: enabled
        members:
            http_errors:
                alarms: ['nova-api-http-errors']
            backends:
                alarms:
                    - 'nova-api-backends-all-down'
                    - 'nova-api-backends-majority-down'
                    - 'nova-api-backends-one-down'
    nova-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['nova-api-check-failed']
    nova-metadata-api:
        apply_to_node: controller
        alerting: enabled
        members:
            backends:
                alarms:
                    - 'nova-metadata-api-backends-all-down'
                    - 'nova-metadata-api-backends-majority-down'
                    - 'nova-metadata-api-backends-one-down'
    nova-novncproxy-websocket:
        apply_to_node: controller
        alerting: enabled
        members:
            backends:
                alarms:
                    - 'nova-novncproxy-websocket-api-backends-all-down'
                    - 'nova-novncproxy-websocket-api-backends-majority-down'
                    - 'nova-novncproxy-websocket-api-backends-one-down'
    nova-api-endpoint:
        apply_to_node: controller
        alerting: enabled
        members:
            endpoint:
                alarms: ['nova-api-local-endpoint']
    nova-logs:
        apply_to_node: controller
        alerting: enabled
        members:
            error:
                alarms: ['nova-logs-error']
    nova-logs-compute:
        apply_to_node: compute
        alerting: enabled
        members:
            error:
                alarms: ['nova-logs-error']
    nova-cert:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'nova-cert-all-down'
                    - 'nova-cert-majority-down'
                    - 'nova-cert-one-down'
    nova-consoleauth:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'nova-consoleauth-all-down'
                    - 'nova-consoleauth-majority-down'
                    - 'nova-consoleauth-one-down'
    nova-compute:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'nova-compute-all-down'
                    - 'nova-compute-majority-down'
                    - 'nova-compute-one-down'
    nova-conductor:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'nova-conductor-all-down'
                    - 'nova-conductor-majority-down'
                    - 'nova-conductor-one-down'
    nova-scheduler:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'nova-scheduler-all-down'
                    - 'nova-scheduler-majority-down'
                    - 'nova-scheduler-one-down'
    heat-api:
        apply_to_node: controller
        alerting: enabled
        members:
            http_errors:
                alarms: ['heat-api-http-errors']
            backends:
                alarms:
                    - 'heat-api-backends-all-down'
                    - 'heat-api-backends-majority-down'
                    - 'heat-api-backends-one-down'
    heat-cfn-api:
        apply_to_node: controller
        alerting: enabled
        members:
            backends:
                alarms:
                    - 'heat-cfn-api-backends-all-down'
                    - 'heat-cfn-api-backends-majority-down'
                    - 'heat-cfn-api-backends-one-down'
    heat-cloudwatch-api:
        apply_to_node: controller
        alerting: enabled
        members:
            backends:
                alarms:
                    - 'heat-cloudwatch-api-backends-all-down'
                    - 'heat-cloudwatch-api-backends-majority-down'
                    - 'heat-cloudwatch-api-backends-one-down'
    heat-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['heat-api-check-failed']
    heat-cfn-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['heat-cfn-api-check-failed']
    heat-api-endpoint:
        apply_to_node: controller
        alerting: enabled
        members:
            endpoint:
                alarms: ['heat-api-local-endpoint']
    heat-cfn-api-endpoint:
        apply_to_node: controller
        alerting: enabled
        members:
            endpoint:
                alarms: ['heat-cfn-api-local-endpoint']
    heat-logs:
        apply_to_node: controller
        alerting: enabled
        members:
            error:
                alarms: ['heat-logs-error']
<% if not @storage_options["objects_ceph"] then -%>
    swift-api:
        apply_to_node: controller
        alerting: enabled
        members:
            http_errors:
                alarms: ['swift-api-http-errors']
            backends:
                alarms:
                    - 'swift-api-backends-all-down'
                    - 'swift-api-backends-majority-down'
                    - 'swift-api-backends-one-down'
    swift-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['swift-api-check-failed']
    swift-api-endpoint:
        apply_to_node: controller
        alerting: enabled
        members:
            endpoint:
                alarms: ['swift-api-local-endpoint']
    swift-s3-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['swift-s3-api-check-failed']
    swift-logs:
        apply_to_node: controller
        alerting: enabled
        members:
            error:
                alarms: ['swift-logs-error']
<% end -%>
    cinder-api:
        apply_to_node: controller
        alerting: enabled
        members:
            http_errors:
                alarms: ['cinder-api-http-errors']
            backends:
                alarms:
                    - 'cinder-api-backends-all-down'
                    - 'cinder-api-backends-majority-down'
                    - 'cinder-api-backends-one-down'
    cinder-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['cinder-api-check-failed']
    cinder-v2-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['cinder-v2-api-check-failed']
    cinder-api-endpoint:
        apply_to_node: controller
        alerting: enabled
        members:
            endpoint:
                alarms: ['cinder-api-local-endpoint']
    cinder-logs:
        apply_to_node: controller
        alerting: enabled
        members:
            error:
                alarms: ['cinder-logs-error']
    cinder-scheduler:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'cinder-scheduler-all-down'
                    - 'cinder-scheduler-majority-down'
                    - 'cinder-scheduler-one-down'
    cinder-volume:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'cinder-volume-all-down'
                    - 'cinder-volume-majority-down'
                    - 'cinder-volume-one-down'
<% if not @storage_options["volumes_ceph"] then -%>
    cinder-volume-logs:
        apply_to_node: storage
        alerting: enabled
        members:
            error:
                alarms: ['cinder-logs-error']
<% end -%>
    glance-api:
        apply_to_node: controller
        alerting: enabled
        members:
            http_errors:
                alarms: ['glance-api-http-errors']
            backends:
                alarms:
                    - 'glance-api-backends-all-down'
                    - 'glance-api-backends-majority-down'
                    - 'glance-api-backends-one-down'
    glance-registry-api:
        apply_to_node: controller
        alerting: enabled
        members:
            backends:
                alarms:
                    - 'glance-registry-api-backends-all-down'
                    - 'glance-registry-api-backends-majority-down'
                    - 'glance-registry-api-backends-one-down'
    glance-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['glance-api-check-failed']
    glance-api-endpoint:
        apply_to_node: controller
        alerting: enabled
        members:
            endpoint:
                alarms: ['glance-api-local-endpoint']
    glance-logs:
        apply_to_node: controller
        alerting: enabled
        members:
            error:
                alarms: ['glance-logs-error']
    neutron-api:
        apply_to_node: controller
        alerting: enabled
        members:
            http_errors:
                alarms: ['neutron-api-http-errors']
            backends:
                alarms:
                    - 'neutron-api-backends-all-down'
                    - 'neutron-api-backends-majority-down'
                    - 'neutron-api-backends-one-down'
    neutron-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['neutron-api-check-failed']
    neutron-api-endpoint:
        apply_to_node: controller
        alerting: enabled
        members:
            endpoint:
                alarms: ['neutron-api-local-endpoint']
    neutron-logs:
        apply_to_node: controller
        alerting: enabled
        members:
            error:
                alarms: ['neutron-logs-error']
    neutron-l3:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'neutron-l3-all-down'
                    - 'neutron-l3-majority-down'
                    - 'neutron-l3-one-down'
    neutron-dhcp:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'neutron-dhcp-all-down'
                    - 'neutron-dhcp-majority-down'
                    - 'neutron-dhcp-one-down'
    neutron-metadata:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'neutron-metadata-all-down'
                    - 'neutron-metadata-majority-down'
                    - 'neutron-metadata-one-down'
    neutron-openvswitch:
        alerting: enabled
        members:
            workers:
                alarms:
                    - 'neutron-openvswitch-all-down'
                    - 'neutron-openvswitch-majority-down'
                    - 'neutron-openvswitch-one-down'
    neutron-logs-compute:
        apply_to_node: compute
        alerting: enabled
        members:
            error:
                alarms: ['neutron-logs-error']
    keystone-response-time:
        apply_to_node: controller
        alerting: enabled
        members:
            duration:
                alarms: ['keystone-response-time-duration']
    keystone-public-api:
        apply_to_node: controller
        alerting: enabled
        members:
            http_errors:
                alarms: ['keystone-public-api-http-errors']
            backends:
                alarms:
                    - 'keystone-public-api-backends-all-down'
                    - 'keystone-public-api-backends-majority-down'
                    - 'keystone-public-api-backends-one-down'
    keystone-public-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['keystone-public-api-check-failed']
    keystone-public-api-endpoint:
        apply_to_node: controller
        alerting: enabled
        members:
            endpoint:
                alarms: ['keystone-public-api-local-endpoint']
    keystone-logs:
        apply_to_node: controller
        alerting: enabled
        members:
            error:
                alarms: ['keystone-logs-error']
    keystone-admin-api:
        apply_to_node: controller
        alerting: enabled
        members:
            http_errors:
                alarms: ['keystone-admin-api-http-errors']
            backends:
                alarms:
                    - 'keystone-admin-api-backends-all-down'
                    - 'keystone-admin-api-backends-majority-down'
                    - 'keystone-admin-api-backends-one-down'
<% if @tls_enabled then -%>
    horizon-https:
<% else -%>
    horizon-web:
<% end -%>
        apply_to_node: controller
        alerting: enabled
        members:
            http_errors:
                alarms: ['horizon-web-http-errors']
            backends:
                alarms:
                    - 'horizon-web-api-backends-all-down'
                    - 'horizon-web-api-backends-majority-down'
                    - 'horizon-web-api-backends-one-down'
    nova-instances:
        #TODO(scroiset): apply on compute nodes
        apply_to_node: controller
        alerting: enabled
        members:
            creation-time:
                alarms: ['instance-creation-time-warning']
    nova-free-vcpu:
        alerting: enabled
        members:
            nova-free-vcpu:
                alarms: ['total-nova-free-vcpu-warning']
    nova-free-memory:
        alerting: enabled
        members:
            nova-free-memory:
                alarms: ['total-nova-free-memory-warning']
            nova-aggregates-free-memory:
                alarms: ['nova-aggregates-free-memory-critical', 'nova-aggregates-free-memory-warning']
    ceph-mon-cluster:
        apply_to_node: ceph-mon
        alerting: enabled
        members:
            health:
                alarms: ['ceph-health-critical', 'ceph-health-warning']
            capacity:
                alarms: ['ceph-capacity-critical', 'ceph-capacity-warning']
    ceph-mon-service:
        apply_to_node: ceph-mon
        alerting: enabled
        members:
            check:
                alarms: ['ceph-mon-check']
<% if @storage_options["volumes_ceph"] then -%>
    ceph-osd-service:
        apply_to_node: storage
        alerting: enabled
        members:
            check:
                alarms: ['ceph-osd-check']
<% end -%>
    elasticsearch-cluster:
        apply_to_node: elasticsearch-nodes
        alerting: enabled
        members:
            health:
                alarms: ['elasticsearch-health-critical', 'elasticsearch-health-warning']
    elasticsearch-service:
        apply_to_node: elasticsearch-nodes
        alerting: enabled
        members:
            check:
                alarms: ['elasticsearch-check']
    influxdb-service:
        apply_to_node: influxdb-nodes
        alerting: enabled
        members:
            check:
                alarms: ['influxdb-check']
    influxdb-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['influxdb-api-check-failed']
    haproxy-openstack:
        apply_to_node: controller
        alerting: enabled
        members:
            check:
                alarms: ['haproxy-check']
    pacemaker-service:
        apply_to_node: controller
        alerting: enabled
        members:
            check:
                alarms: ['pacemaker-check']
    libvirt-service:
        apply_to_node: compute
        alerting: enabled
        members:
            check:
                alarms: ['libvirt-check']
    memcached-service:
        apply_to_node: controller
        alerting: enabled
        members:
            check:
                alarms: ['memcached-check']
    ceilometer-api-check:
        alerting: enabled
        members:
            vip:
                alarms: ['ceilometer-api-check-failed']
    mysqld-tcp:
        apply_to_node: controller
        alerting: enabled
        members:
            backends:
                alarms:
                    - 'mysqld-tcp-api-backends-all-down'
                    - 'mysqld-tcp-api-backends-majority-down'
                    - 'mysqld-tcp-api-backends-one-down'