Merge "Improve StatsD metric precision"

This commit is contained in:
Zuul 2022-05-06 15:14:41 +00:00 committed by Gerrit Code Review
commit 652faab58b
3 changed files with 103 additions and 23 deletions

View File

@ -50,6 +50,12 @@ def _check_resource(strict=False):
return wrap
def normalize_metric_name(name):
name = name.replace('.', '_')
name = name.replace(':', '_')
return name
class Proxy(adapter.Adapter):
"""Represents a service."""
@ -204,22 +210,35 @@ class Proxy(adapter.Adapter):
self._report_stats_influxdb(response, url, method, exc)
def _report_stats_statsd(self, response, url=None, method=None, exc=None):
try:
if response is not None and not url:
url = response.request.url
if response is not None and not method:
method = response.request.method
name_parts = self._extract_name(url,
self.service_type,
self.session.get_project_id())
name_parts = [
normalize_metric_name(f) for f in
self._extract_name(
url, self.service_type, self.session.get_project_id())
]
key = '.'.join(
[self._statsd_prefix, self.service_type, method]
+ name_parts)
[self._statsd_prefix,
normalize_metric_name(self.service_type), method,
'_'.join(name_parts)
])
with self._statsd_client.pipeline() as pipe:
if response is not None:
pipe.timing(key, response.elapsed)
pipe.incr(key)
duration = int(response.elapsed.total_seconds() * 1000)
metric_name = '%s.%s' % (key, str(response.status_code))
pipe.timing(metric_name, duration)
pipe.incr(metric_name)
if duration > 1000:
pipe.incr('%s.over_1000' % key)
elif exc is not None:
pipe.incr('%s.failed' % key)
pipe.incr('%s.attempted' % key)
except Exception:
# We do not want errors in metric reporting ever break client
self.log.exception("Exception reporting metrics")
def _report_stats_prometheus(self, response, url=None, method=None,
exc=None):
@ -253,9 +272,12 @@ class Proxy(adapter.Adapter):
method = response.request.method
tags = dict(
method=method,
name='_'.join(self._extract_name(
name='_'.join([
normalize_metric_name(f) for f in
self._extract_name(
url, self.service_type,
self.session.get_project_id()))
self.session.get_project_id())
])
)
fields = dict(
attempted=1

View File

@ -23,7 +23,9 @@ import threading
import time
import fixtures
from keystoneauth1 import exceptions
import prometheus_client
from requests import exceptions as rexceptions
import testtools.content
from openstack.tests.unit import base
@ -175,7 +177,7 @@ class TestStats(base.TestCase):
self.assert_calls()
self.assert_reported_stat(
'openstack.api.identity.GET.projects', value='1', kind='c')
'openstack.api.identity.GET.projects.200', value='1', kind='c')
self.assert_prometheus_stat(
'openstack_http_requests_total', 1, dict(
service_type='identity',
@ -196,7 +198,7 @@ class TestStats(base.TestCase):
self.assert_calls()
self.assert_reported_stat(
'openstack.api.identity.GET.projects', value='1', kind='c')
'openstack.api.identity.GET.projects.200', value='1', kind='c')
self.assert_prometheus_stat(
'openstack_http_requests_total', 1, dict(
service_type='identity',
@ -217,7 +219,11 @@ class TestStats(base.TestCase):
self.assert_calls()
self.assert_reported_stat(
'openstack.api.compute.GET.servers.detail', value='1', kind='c')
'openstack.api.compute.GET.servers_detail.200',
value='1', kind='c')
self.assert_reported_stat(
'openstack.api.compute.GET.servers_detail.200',
value='0', kind='ms')
self.assert_prometheus_stat(
'openstack_http_requests_total', 1, dict(
service_type='compute',
@ -237,7 +243,11 @@ class TestStats(base.TestCase):
self.assert_calls()
self.assert_reported_stat(
'openstack.api.compute.GET.servers', value='1', kind='c')
'openstack.api.compute.GET.servers.200', value='1', kind='c')
self.assert_reported_stat(
'openstack.api.compute.GET.servers.200', value='0', kind='ms')
self.assert_reported_stat(
'openstack.api.compute.GET.servers.attempted', value='1', kind='c')
self.assert_prometheus_stat(
'openstack_http_requests_total', 1, dict(
service_type='compute',
@ -245,6 +255,49 @@ class TestStats(base.TestCase):
method='GET',
status_code='200'))
def test_servers_error(self):
mock_uri = 'https://compute.example.com/v2.1/servers'
self.register_uris([
dict(method='GET', uri=mock_uri, status_code=500,
json={})])
self.cloud.compute.get('/servers')
self.assert_calls()
self.assert_reported_stat(
'openstack.api.compute.GET.servers.500', value='1', kind='c')
self.assert_reported_stat(
'openstack.api.compute.GET.servers.500', value='0', kind='ms')
self.assert_reported_stat(
'openstack.api.compute.GET.servers.attempted', value='1', kind='c')
self.assert_prometheus_stat(
'openstack_http_requests_total', 1, dict(
service_type='compute',
endpoint=mock_uri,
method='GET',
status_code='500'))
def test_timeout(self):
mock_uri = 'https://compute.example.com/v2.1/servers'
self.register_uris([
dict(method='GET', uri=mock_uri,
exc=rexceptions.ConnectTimeout)
])
try:
self.cloud.compute.get('/servers')
except exceptions.ConnectTimeout:
pass
self.assert_reported_stat(
'openstack.api.compute.GET.servers.failed', value='1', kind='c')
self.assert_reported_stat(
'openstack.api.compute.GET.servers.attempted', value='1', kind='c')
class TestNoStats(base.TestCase):

View File

@ -0,0 +1,5 @@
---
upgrade:
- |
API metrics emitted by OpenStackSDK to StatsD now contain status_code
part of the metric name in order to improve information precision.