From 9eaa462af7240137a66be1e5ee5efc9eaeeb3e78 Mon Sep 17 00:00:00 2001 From: Guillaume Thouvenin Date: Thu, 15 Oct 2015 16:31:48 +0200 Subject: [PATCH] Add filter to detect HTTP errors on some backends Currently we are checking - nova-api - heat-api - swift-api - cinder-api - glance-api - neutron-api - keyston-public-api - keyston-admin-api Change-Id: I1d5f73390e6d3479634de8a46e7cdf4b246a0366 --- .../lma_collector/templates/alarming.yaml.erb | 154 ++++++++++++++++++ .../templates/gse_filters.yaml.erb | 8 + 2 files changed, 162 insertions(+) diff --git a/deployment_scripts/puppet/modules/lma_collector/templates/alarming.yaml.erb b/deployment_scripts/puppet/modules/lma_collector/templates/alarming.yaml.erb index dd89c1b5a..3fa6b9f4e 100644 --- a/deployment_scripts/puppet/modules/lma_collector/templates/alarming.yaml.erb +++ b/deployment_scripts/puppet/modules/lma_collector/templates/alarming.yaml.erb @@ -270,13 +270,148 @@ lma_collector: window: 60 periods: 0 function: min + - name: 'nova-api-http-errors' + description: 'Some 5xx HTTP errors have been detected on nova-api' + severity: 'warning' + enabled: 'true' + trigger: + logical_operator: 'or' + rules: + - metric: haproxy_backend_response_5xx + fields: + backend: 'nova-api' + relational_operator: '>' + threshold: 0 + window: 60 + periods: 1 + function: diff + - name: 'heat-api-http-errors' + description: 'Some 5xx HTTP errors have been detected on heat-api' + severity: 'warning' + enabled: 'true' + trigger: + logical_operator: 'or' + rules: + - metric: haproxy_backend_response_5xx + fields: + backend: 'heat-api' + relational_operator: '>' + threshold: 0 + window: 60 + periods: 1 + function: diff +<% if not @storage_options["objects_ceph"] then -%> + - name: 'swift-api-http-errors' + description: 'Some 5xx HTTP errors have been detected on swift-api' + severity: 'warning' + enabled: 'true' + trigger: + logical_operator: 'or' + rules: + - metric: haproxy_backend_response_5xx + fields: + backend: 'swift-api' + relational_operator: '>' + threshold: 0 + window: 60 + periods: 1 + function: diff +<% end -%> + - name: 'cinder-api-http-errors' + description: 'Some 5xx HTTP errors have been detected on cinder-api' + severity: 'warning' + enabled: 'true' + trigger: + logical_operator: 'or' + rules: + - metric: haproxy_backend_response_5xx + fields: + backend: 'cinder-api' + relational_operator: '>' + threshold: 0 + window: 60 + periods: 1 + function: diff + - name: 'glance-api-http-errors' + description: 'Some 5xx HTTP errors have been detected on glance-api' + severity: 'warning' + enabled: 'true' + trigger: + logical_operator: 'or' + rules: + - metric: haproxy_backend_response_5xx + fields: + backend: 'glance-api' + relational_operator: '>' + threshold: 0 + window: 60 + periods: 1 + function: diff + - name: 'neutron-api-http-errors' + description: 'Some 5xx HTTP errors have been detected on neutron-api' + severity: 'warning' + enabled: 'true' + trigger: + logical_operator: 'or' + rules: + - metric: haproxy_backend_response_5xx + fields: + backend: 'neutron-api' + relational_operator: '>' + threshold: 0 + window: 60 + periods: 1 + function: diff + - name: 'keystone-public-api-http-errors' + description: 'Some 5xx HTTP errors have been detected on keystone-public-api' + severity: 'warning' + enabled: 'true' + trigger: + logical_operator: 'or' + rules: + - metric: haproxy_backend_response_5xx + fields: + backend: 'keystone-public-api' + relational_operator: '>' + threshold: 0 + window: 60 + periods: 1 + function: diff + - name: 'keystone-admin-api-http-errors' + description: 'Some 5xx HTTP errors have been detected on keystone-admin-api' + severity: 'warning' + enabled: 'true' + trigger: + logical_operator: 'or' + rules: + - metric: haproxy_backend_response_5xx + fields: + backend: 'keystone-admin-api' + relational_operator: '>' + threshold: 0 + window: 60 + periods: 1 + function: diff + node_cluster_roles: - controller: ['primary-controller', 'controller'] - compute: ['compute'] - storage: ['cinder', 'ceph-osd'] + service_cluster_roles: - rabbitmq: ['primary-controller', 'controller'] - apache: ['primary-controller', 'controller'] + - nova-api: ['primary-controller', 'controller'] + - heat-api: ['primary-controller', 'controller'] +<% if not @storage_options["objects_ceph"] then -%> + - swift-api: ['primary-controller', 'controller'] +<% end -%> + - cinder-api: ['primary-controller', 'controller'] + - glance-api: ['primary-controller', 'controller'] + - neutron-api: ['primary-controller', 'controller'] + - keystone-public-api: ['primary-controller', 'controller'] + - keystone-admin-api: ['primary-controller', 'controller'] + node_cluster_alarms: - controller: - cpu: ['cpu-critical-controller', 'cpu-warning-controller'] @@ -290,6 +425,7 @@ lma_collector: - default: - cpu: ['cpu-critical-default'] - fs: ['fs-critical', 'fs-warning'] + service_cluster_alarms: - rabbitmq: - queue: ['rabbitmq-queue-warning'] @@ -297,3 +433,21 @@ lma_collector: - disk: ['rabbitmq-disk-limit-critical', 'rabbitmq-disk-limit-warning'] - apache: - worker: ['apache-warning'] + - nova-api: + - http_errors: ['nova-api-http-errors'] + - heat-api: + - http_errors: ['heat-api-http-errors'] +<% if not @storage_options["objects_ceph"] then -%> + - swift-api: + - http_errors: ['swift-api-http-errors'] +<% end -%> + - cinder-api: + - http_errors: ['cinder-api-http-errors'] + - glance-api: + - http_errors: ['glance-api-http-errors'] + - neutron-api: + - http_errors: ['neutron-api-http-errors'] + - keystone-public-api: + - http_errors: ['keystone-public-api-http-errors'] + - keystone-admin-api: + - http_errors: ['keystone-admin-api-http-errors'] diff --git a/deployment_scripts/puppet/modules/lma_collector/templates/gse_filters.yaml.erb b/deployment_scripts/puppet/modules/lma_collector/templates/gse_filters.yaml.erb index 9e490bab9..2d385bf63 100644 --- a/deployment_scripts/puppet/modules/lma_collector/templates/gse_filters.yaml.erb +++ b/deployment_scripts/puppet/modules/lma_collector/templates/gse_filters.yaml.erb @@ -43,6 +43,7 @@ lma_collector: members: - backends - endpoint + - http_errors nova-ec2-api: members: - backends @@ -71,6 +72,7 @@ lma_collector: members: - backends - endpoint + - http_errors cinder-v2-api: members: # Cinder V2 backends are in fact the same as the Cinder backends @@ -85,6 +87,7 @@ lma_collector: members: - backends - endpoint + - http_errors neutron-l3: members: - workers @@ -101,14 +104,17 @@ lma_collector: members: - backends - endpoint + - http_errors keystone-admin-api: members: # TODO(pasquier-s): add a metric reporting the status of the keystone-admin-api endpoint - backends + - http_errors glance-api: members: - backends - endpoint + - http_errors glance-registry-api: members: - backends @@ -116,6 +122,7 @@ lma_collector: members: - backends - endpoint + - http_errors heat-cfn-api: members: - backends @@ -137,6 +144,7 @@ lma_collector: members: - backends - endpoint + - http_errors swift-s3-api: members: # Swift S3 backends are in fact the same as the Swift backends