Add filter to detect HTTP errors on some backends

Currently we are checking
  - nova-api
  - heat-api
  - swift-api
  - cinder-api
  - glance-api
  - neutron-api
  - keyston-public-api
  - keyston-admin-api

Change-Id: I1d5f73390e6d3479634de8a46e7cdf4b246a0366
This commit is contained in:
Guillaume Thouvenin 2015-10-15 16:31:48 +02:00
parent 5cfc9d3c6c
commit 9eaa462af7
2 changed files with 162 additions and 0 deletions

View File

@ -270,13 +270,148 @@ lma_collector:
window: 60
periods: 0
function: min
- name: 'nova-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on nova-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'nova-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'heat-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on heat-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'heat-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
<% if not @storage_options["objects_ceph"] then -%>
- name: 'swift-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on swift-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'swift-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
<% end -%>
- name: 'cinder-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on cinder-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'cinder-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'glance-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on glance-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'glance-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'neutron-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on neutron-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'neutron-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'keystone-public-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on keystone-public-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'keystone-public-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'keystone-admin-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on keystone-admin-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'keystone-admin-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
node_cluster_roles:
- controller: ['primary-controller', 'controller']
- compute: ['compute']
- storage: ['cinder', 'ceph-osd']
service_cluster_roles:
- rabbitmq: ['primary-controller', 'controller']
- apache: ['primary-controller', 'controller']
- nova-api: ['primary-controller', 'controller']
- heat-api: ['primary-controller', 'controller']
<% if not @storage_options["objects_ceph"] then -%>
- swift-api: ['primary-controller', 'controller']
<% end -%>
- cinder-api: ['primary-controller', 'controller']
- glance-api: ['primary-controller', 'controller']
- neutron-api: ['primary-controller', 'controller']
- keystone-public-api: ['primary-controller', 'controller']
- keystone-admin-api: ['primary-controller', 'controller']
node_cluster_alarms:
- controller:
- cpu: ['cpu-critical-controller', 'cpu-warning-controller']
@ -290,6 +425,7 @@ lma_collector:
- default:
- cpu: ['cpu-critical-default']
- fs: ['fs-critical', 'fs-warning']
service_cluster_alarms:
- rabbitmq:
- queue: ['rabbitmq-queue-warning']
@ -297,3 +433,21 @@ lma_collector:
- disk: ['rabbitmq-disk-limit-critical', 'rabbitmq-disk-limit-warning']
- apache:
- worker: ['apache-warning']
- nova-api:
- http_errors: ['nova-api-http-errors']
- heat-api:
- http_errors: ['heat-api-http-errors']
<% if not @storage_options["objects_ceph"] then -%>
- swift-api:
- http_errors: ['swift-api-http-errors']
<% end -%>
- cinder-api:
- http_errors: ['cinder-api-http-errors']
- glance-api:
- http_errors: ['glance-api-http-errors']
- neutron-api:
- http_errors: ['neutron-api-http-errors']
- keystone-public-api:
- http_errors: ['keystone-public-api-http-errors']
- keystone-admin-api:
- http_errors: ['keystone-admin-api-http-errors']

View File

@ -43,6 +43,7 @@ lma_collector:
members:
- backends
- endpoint
- http_errors
nova-ec2-api:
members:
- backends
@ -71,6 +72,7 @@ lma_collector:
members:
- backends
- endpoint
- http_errors
cinder-v2-api:
members:
# Cinder V2 backends are in fact the same as the Cinder backends
@ -85,6 +87,7 @@ lma_collector:
members:
- backends
- endpoint
- http_errors
neutron-l3:
members:
- workers
@ -101,14 +104,17 @@ lma_collector:
members:
- backends
- endpoint
- http_errors
keystone-admin-api:
members:
# TODO(pasquier-s): add a metric reporting the status of the keystone-admin-api endpoint
- backends
- http_errors
glance-api:
members:
- backends
- endpoint
- http_errors
glance-registry-api:
members:
- backends
@ -116,6 +122,7 @@ lma_collector:
members:
- backends
- endpoint
- http_errors
heat-cfn-api:
members:
- backends
@ -137,6 +144,7 @@ lma_collector:
members:
- backends
- endpoint
- http_errors
swift-s3-api:
members:
# Swift S3 backends are in fact the same as the Swift backends