Add filter to detect HTTP errors on some backends

Currently we are checking
  - nova-api
  - heat-api
  - swift-api
  - cinder-api
  - glance-api
  - neutron-api
  - keyston-public-api
  - keyston-admin-api

Change-Id: I1d5f73390e6d3479634de8a46e7cdf4b246a0366
This commit is contained in:
Guillaume Thouvenin 2015-10-15 16:31:48 +02:00
parent 5cfc9d3c6c
commit 9eaa462af7
2 changed files with 162 additions and 0 deletions

View File

@ -270,13 +270,148 @@ lma_collector:
window: 60 window: 60
periods: 0 periods: 0
function: min function: min
- name: 'nova-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on nova-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'nova-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'heat-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on heat-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'heat-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
<% if not @storage_options["objects_ceph"] then -%>
- name: 'swift-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on swift-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'swift-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
<% end -%>
- name: 'cinder-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on cinder-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'cinder-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'glance-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on glance-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'glance-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'neutron-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on neutron-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'neutron-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'keystone-public-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on keystone-public-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'keystone-public-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
- name: 'keystone-admin-api-http-errors'
description: 'Some 5xx HTTP errors have been detected on keystone-admin-api'
severity: 'warning'
enabled: 'true'
trigger:
logical_operator: 'or'
rules:
- metric: haproxy_backend_response_5xx
fields:
backend: 'keystone-admin-api'
relational_operator: '>'
threshold: 0
window: 60
periods: 1
function: diff
node_cluster_roles: node_cluster_roles:
- controller: ['primary-controller', 'controller'] - controller: ['primary-controller', 'controller']
- compute: ['compute'] - compute: ['compute']
- storage: ['cinder', 'ceph-osd'] - storage: ['cinder', 'ceph-osd']
service_cluster_roles: service_cluster_roles:
- rabbitmq: ['primary-controller', 'controller'] - rabbitmq: ['primary-controller', 'controller']
- apache: ['primary-controller', 'controller'] - apache: ['primary-controller', 'controller']
- nova-api: ['primary-controller', 'controller']
- heat-api: ['primary-controller', 'controller']
<% if not @storage_options["objects_ceph"] then -%>
- swift-api: ['primary-controller', 'controller']
<% end -%>
- cinder-api: ['primary-controller', 'controller']
- glance-api: ['primary-controller', 'controller']
- neutron-api: ['primary-controller', 'controller']
- keystone-public-api: ['primary-controller', 'controller']
- keystone-admin-api: ['primary-controller', 'controller']
node_cluster_alarms: node_cluster_alarms:
- controller: - controller:
- cpu: ['cpu-critical-controller', 'cpu-warning-controller'] - cpu: ['cpu-critical-controller', 'cpu-warning-controller']
@ -290,6 +425,7 @@ lma_collector:
- default: - default:
- cpu: ['cpu-critical-default'] - cpu: ['cpu-critical-default']
- fs: ['fs-critical', 'fs-warning'] - fs: ['fs-critical', 'fs-warning']
service_cluster_alarms: service_cluster_alarms:
- rabbitmq: - rabbitmq:
- queue: ['rabbitmq-queue-warning'] - queue: ['rabbitmq-queue-warning']
@ -297,3 +433,21 @@ lma_collector:
- disk: ['rabbitmq-disk-limit-critical', 'rabbitmq-disk-limit-warning'] - disk: ['rabbitmq-disk-limit-critical', 'rabbitmq-disk-limit-warning']
- apache: - apache:
- worker: ['apache-warning'] - worker: ['apache-warning']
- nova-api:
- http_errors: ['nova-api-http-errors']
- heat-api:
- http_errors: ['heat-api-http-errors']
<% if not @storage_options["objects_ceph"] then -%>
- swift-api:
- http_errors: ['swift-api-http-errors']
<% end -%>
- cinder-api:
- http_errors: ['cinder-api-http-errors']
- glance-api:
- http_errors: ['glance-api-http-errors']
- neutron-api:
- http_errors: ['neutron-api-http-errors']
- keystone-public-api:
- http_errors: ['keystone-public-api-http-errors']
- keystone-admin-api:
- http_errors: ['keystone-admin-api-http-errors']

View File

@ -43,6 +43,7 @@ lma_collector:
members: members:
- backends - backends
- endpoint - endpoint
- http_errors
nova-ec2-api: nova-ec2-api:
members: members:
- backends - backends
@ -71,6 +72,7 @@ lma_collector:
members: members:
- backends - backends
- endpoint - endpoint
- http_errors
cinder-v2-api: cinder-v2-api:
members: members:
# Cinder V2 backends are in fact the same as the Cinder backends # Cinder V2 backends are in fact the same as the Cinder backends
@ -85,6 +87,7 @@ lma_collector:
members: members:
- backends - backends
- endpoint - endpoint
- http_errors
neutron-l3: neutron-l3:
members: members:
- workers - workers
@ -101,14 +104,17 @@ lma_collector:
members: members:
- backends - backends
- endpoint - endpoint
- http_errors
keystone-admin-api: keystone-admin-api:
members: members:
# TODO(pasquier-s): add a metric reporting the status of the keystone-admin-api endpoint # TODO(pasquier-s): add a metric reporting the status of the keystone-admin-api endpoint
- backends - backends
- http_errors
glance-api: glance-api:
members: members:
- backends - backends
- endpoint - endpoint
- http_errors
glance-registry-api: glance-registry-api:
members: members:
- backends - backends
@ -116,6 +122,7 @@ lma_collector:
members: members:
- backends - backends
- endpoint - endpoint
- http_errors
heat-cfn-api: heat-cfn-api:
members: members:
- backends - backends
@ -137,6 +144,7 @@ lma_collector:
members: members:
- backends - backends
- endpoint - endpoint
- http_errors
swift-s3-api: swift-s3-api:
members: members:
# Swift S3 backends are in fact the same as the Swift backends # Swift S3 backends are in fact the same as the Swift backends