From 7289278dada9ac5dfd8679694effb9bd0ffb3b22 Mon Sep 17 00:00:00 2001 From: jlarriba Date: Thu, 13 Jun 2024 14:08:56 +0200 Subject: [PATCH] Ceilometer to export Prometheus metrics With this patch, ceilometer-polling is able to expose metrics itself in Prometheus format so they can be scraped directly from a Prometheus instance. Change-Id: I68d1dc96a89e57cafa9dba207d92403a1e156fe6 --- ceilometer/polling/manager.py | 42 +++- ceilometer/polling/prom_exporter.py | 143 ++++++++++++ ceilometer/tests/unit/test_prom_exporter.py | 238 ++++++++++++++++++++ requirements.txt | 1 + 4 files changed, 415 insertions(+), 9 deletions(-) create mode 100644 ceilometer/polling/prom_exporter.py create mode 100644 ceilometer/tests/unit/test_prom_exporter.py diff --git a/ceilometer/polling/manager.py b/ceilometer/polling/manager.py index c2543b9d42..0c1a9975b8 100644 --- a/ceilometer/polling/manager.py +++ b/ceilometer/polling/manager.py @@ -41,6 +41,7 @@ from ceilometer import keystone_client from ceilometer import messaging from ceilometer.polling import dynamic_pollster from ceilometer.polling import plugin_base +from ceilometer.polling import prom_exporter from ceilometer.publisher import utils as publisher_utils from ceilometer import utils @@ -77,6 +78,19 @@ POLLING_OPTS = [ 'recommended that ceilometer be configured with a ' 'caching backend to reduce the number of calls ' 'made to keystone.'), + cfg.BoolOpt('enable_notifications', + default=True, + help='Whether the polling service should be sending ' + 'notifications to RabbitMQ after polling cycles.'), + cfg.BoolOpt('enable_prometheus_exporter', + default=False, + help='Allow this ceilometer polling instance to ' + 'expose directly the retrieved metrics in Prometheus ' + 'format.'), + cfg.ListOpt('prometheus_listen_addresses', + default=["127.0.0.1:9101"], + help='A list of ipaddr:port combinations on which ' + 'the exported metrics will be exposed.') ] @@ -282,11 +296,14 @@ class PollingTask(object): exc_info=True) def _send_notification(self, samples): - self.manager.notifier.sample( - {}, - 'telemetry.polling', - {'samples': samples} - ) + if self.manager.conf.polling.enable_notifications: + self.manager.notifier.sample( + {}, + 'telemetry.polling', + {'samples': samples} + ) + if self.manager.conf.polling.enable_prometheus_exporter: + prom_exporter.collect_metrics(samples) class AgentManager(cotyledon.Service): @@ -342,10 +359,17 @@ class AgentManager(cotyledon.Service): self.group_prefix = ('%s-%s' % (namespace_prefix, group_prefix) if group_prefix else namespace_prefix) - self.notifier = oslo_messaging.Notifier( - messaging.get_transport(self.conf), - driver=self.conf.publisher_notifier.telemetry_driver, - publisher_id="ceilometer.polling") + if self.conf.polling.enable_notifications: + self.notifier = oslo_messaging.Notifier( + messaging.get_transport(self.conf), + driver=self.conf.publisher_notifier.telemetry_driver, + publisher_id="ceilometer.polling") + + if self.conf.polling.enable_prometheus_exporter: + for addr in self.conf.polling.prometheus_listen_addresses: + address = addr.split(":") + if len(address) == 2: + prom_exporter.export(address[0], address[1]) self._keystone = None self._keystone_last_exception = None diff --git a/ceilometer/polling/prom_exporter.py b/ceilometer/polling/prom_exporter.py new file mode 100644 index 0000000000..25a0370b75 --- /dev/null +++ b/ceilometer/polling/prom_exporter.py @@ -0,0 +1,143 @@ +# +# Copyright 2024 Juan Larriba +# Copyright 2024 Red Hat, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import prometheus_client as prom + +CEILOMETER_REGISTRY = prom.CollectorRegistry() + + +def export(prometheus_iface, prometheus_port): + prom.start_http_server(port=int(prometheus_port), + addr=prometheus_iface, + registry=CEILOMETER_REGISTRY) + + +def collect_metrics(samples): + for sample in samples: + name = "ceilometer_" + sample['counter_name'].replace('.', '_') + type = sample['counter_type'] + value = sample['counter_volume'] + labels = _gen_labels(sample) + + metric = CEILOMETER_REGISTRY._names_to_collectors.get(name, None) + if metric is None: + if type == "cumulative": + metric = prom.Counter(name=name, documentation="", + labelnames=labels['keys'], + registry=CEILOMETER_REGISTRY) + metric.labels(*labels['values']).inc(value) + if type == "gauge" or type == "delta": + metric = prom.Gauge(name=name, documentation="", + labelnames=labels['keys'], + registry=CEILOMETER_REGISTRY) + metric.labels(*labels['values']).set(value) + else: + if type == 'cumulative': + metric.labels(*labels['values']).inc(value) + elif type == 'gauge' or type == 'delta': + metric.labels(*labels['values']).set(value) + + +def _gen_labels(sample): + labels = dict(keys=[], values=[]) + cNameShards = sample['counter_name'].split(".") + ctype = '' + + plugin = cNameShards[0] + pluginVal = sample['resource_id'] + if len(cNameShards) > 2: + pluginVal = cNameShards[2] + + if len(cNameShards) > 1: + ctype = cNameShards[1] + else: + ctype = cNameShards[0] + + labels['keys'].append(plugin) + labels['values'].append(pluginVal) + + labels['keys'].append("publisher") + labels['values'].append("ceilometer") + + labels['keys'].append("type") + labels['values'].append(ctype) + + index = 3 + if (sample.get('counter_name', '') != '' and + sample.get('counter_name') is not None): + labels['keys'].append("counter") + labels['values'].append(sample['counter_name']) + index += 1 + + if (sample.get('project_id', '') != '' and + sample.get('project_id') is not None): + labels['keys'].append("project") + labels['values'].append(sample['project_id']) + index += 1 + + if (sample.get('project_name', '') != '' and + sample.get('project_name') is not None): + labels['keys'].append("project_name") + labels['values'].append(sample['project_name']) + index += 1 + + if (sample.get('user_id', '') != '' and + sample.get('user_id') is not None): + labels['keys'].append("user") + labels['values'].append(sample['user_id']) + index += 1 + + if (sample.get('user_name', '') != '' and + sample.get('user_name') is not None): + labels['keys'].append("user_name") + labels['values'].append(sample['user_name']) + index += 1 + + if (sample.get('counter_unit', '') != '' and + sample.get('counter_unit') is not None): + labels['keys'].append("unit") + labels['values'].append(sample['counter_unit']) + index += 1 + + if (sample.get('resource_id', '') != '' and + sample.get('resource_id') is not None): + labels['keys'].append("resource") + labels['values'].append(sample['resource_id']) + index += 1 + + if (sample.get('resource_metadata', '') != '' and + sample.get('resource_metadata') is not None): + + if (sample['resource_metadata'].get('host', '') != ''): + labels['keys'].append("vm_instance") + labels['values'].append(sample['resource_metadata']['host']) + index += 1 + + if (sample['resource_metadata'].get('display_name', '') != ''): + labels['keys'].append("resource_name") + labels['values'].append(sample['resource_metadata'] + ['display_name']) + + if (sample['resource_metadata'].get('name', '') != ''): + labels['keys'].append("resource_name") + if (labels['values'][index] if index < len(labels['values']) + else '' != ''): + labels['values'].append(labels['values'][index] + ":" + + sample['resource_metadata']['name']) + else: + labels['values'].append(sample['resource_metadata']['name']) + + return labels diff --git a/ceilometer/tests/unit/test_prom_exporter.py b/ceilometer/tests/unit/test_prom_exporter.py new file mode 100644 index 0000000000..a111984e46 --- /dev/null +++ b/ceilometer/tests/unit/test_prom_exporter.py @@ -0,0 +1,238 @@ +# +# Copyright 2022 Red Hat, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""Tests for ceilometer/polling/prom_exporter.py""" + +from oslotest import base + +from unittest import mock +from unittest.mock import call + +from ceilometer.polling import manager +from ceilometer.polling import prom_exporter +from ceilometer import service + + +COUNTER_SOURCE = 'testsource' + + +class TestPromExporter(base.BaseTestCase): + test_data = [ + { + 'source': 'openstack', + 'counter_name': 'disk.device.read.latency', + 'counter_type': 'cumulative', + 'counter_unit': 'ns', + 'counter_volume': 132128682, + 'user_id': '6e7d71415cd5401cbe103829c9c5dec2', + 'user_name': None, + 'project_id': 'd965489b7f894cbda89cd2e25bfd85a0', + 'project_name': None, + 'resource_id': 'e536fff6-b20d-4aa5-ac2f-d15ac8b3af63-vda', + 'timestamp': '2024-06-20T09:32:36.521082', + 'resource_metadata': { + 'display_name': 'myserver', + 'name': 'instance-00000002', + 'instance_id': 'e536fff6-b20d-4aa5-ac2f-d15ac8b3af63', + 'instance_type': 'tiny', + 'host': 'e0d297f5df3b62ec73c8d42b', + 'instance_host': 'devstack', + 'flavor': { + 'id': '4af9ac72-5787-4f86-8644-0faa87ce7c83', + 'name': 'tiny', + 'vcpus': 1, + 'ram': 512, + 'disk': 1, + 'ephemeral': 0, + 'swap': 0 + }, + 'status': 'active', + 'state': 'running', + 'task_state': '', + 'image': { + 'id': '71860ed5-f66d-43e0-9514-f1d188106284' + }, + 'image_ref': '71860ed5-f66d-43e0-9514-f1d188106284', + 'image_ref_url': None, + 'architecture': 'x86_64', + 'os_type': 'hvm', + 'vcpus': 1, + 'memory_mb': 512, + 'disk_gb': 1, + 'ephemeral_gb': 0, + 'root_gb': 1, + 'disk_name': 'vda' + }, + 'message_id': '078029c7-2ee8-11ef-a915-bd45e2085de3', + 'monotonic_time': 1819980.112406547, + 'message_signature': 'f8d9a411b0cd0cb0d34e83' + }, + { + 'source': 'openstack', + 'counter_name': 'memory.usage', + 'counter_type': 'gauge', + 'counter_unit': 'MB', + 'counter_volume': 37.98046875, + 'user_id': '6e7d71415cd5401cbe103829c9c5dec2', + 'user_name': None, + 'project_id': 'd965489b7f894cbda89cd2e25bfd85a0', + 'project_name': None, + 'resource_id': 'e536fff6-b20d-4aa5-ac2f-d15ac8b3af63', + 'timestamp': '2024-06-20T09:32:36.515823', + 'resource_metadata': { + 'display_name': 'myserver', + 'name': 'instance-00000002', + 'instance_id': 'e536fff6-b20d-4aa5-ac2f-d15ac8b3af63', + 'instance_type': 'tiny', + 'host': 'e0d297f5df3b62ec73c8d42b', + 'instance_host': 'devstack', + 'flavor': { + 'id': '4af9ac72-5787-4f86-8644-0faa87ce7c83', + 'name': 'tiny', + 'vcpus': 1, + 'ram': 512, + 'disk': 1, + 'ephemeral': 0, + 'swap': 0 + }, + 'status': 'active', + 'state': 'running', + 'task_state': '', + 'image': { + 'id': '71860ed5-f66d-43e0-9514-f1d188106284' + }, + 'image_ref': '71860ed5-f66d-43e0-9514-f1d188106284', + 'image_ref_url': None, + 'architecture': 'x86_64', + 'os_type': 'hvm', + 'vcpus': 1, + 'memory_mb': 512, + 'disk_gb': 1, + 'ephemeral_gb': 0, + 'root_gb': 1 + }, + 'message_id': '078029bf-2ee8-11ef-a915-bd45e2085de3', + 'monotonic_time': 1819980.131767362, + 'message_signature': 'f8d9a411b0cd0cb0d34e83' + }, + { + 'source': 'openstack', + 'counter_name': 'image.size', + 'counter_type': 'gauge', + 'counter_unit': 'B', + 'counter_volume': 16344576, + 'user_id': None, + 'user_name': None, + 'project_id': 'd965489b7f894cbda89cd2e25bfd85a0', + 'project_name': None, + 'resource_id': 'f9276c96-8a12-432b-96a1-559d70715f97', + 'timestamp': '2024-06-20T09:40:17.118871', + 'resource_metadata': { + 'status': 'active', + 'visibility': 'public', + 'name': 'cirros2', + 'container_format': 'bare', + 'created_at': '2024-05-30T11:38:52Z', + 'disk_format': 'qcow2', + 'updated_at': '2024-05-30T11:38:52Z', + 'min_disk': 0, + 'protected': False, + 'checksum': '7734eb3945297adc90ddc6cebe8bb082', + 'min_ram': 0, + 'tags': [], + 'virtual_size': 117440512 + }, + 'message_id': '19f8f78a-2ee9-11ef-a95f-bd45e2085de3', + 'monotonic_time': None, + 'message_signature': 'f8d9a411b0cd0cb0d34e83' + } + ] + + @mock.patch('ceilometer.polling.prom_exporter.export') + def test_prom_disabled(self, export): + CONF = service.prepare_service([], []) + manager.AgentManager(0, CONF) + + export.assert_not_called() + + @mock.patch('ceilometer.polling.prom_exporter.export') + def test_export_called(self, export): + CONF = service.prepare_service([], []) + CONF.polling.enable_prometheus_exporter = True + CONF.polling.prometheus_listen_addresses = ['127.0.0.1:9101', + '127.0.0.1:9102'] + manager.AgentManager(0, CONF) + + export.assert_has_calls([ + call('127.0.0.1', '9101'), + call('127.0.0.1', '9102') + ]) + + def test_collect_metrics(self): + prom_exporter.collect_metrics(self.test_data) + sample_dict_1 = {'counter': 'image.size', + 'image': 'f9276c96-8a12-432b-96a1-559d70715f97', + 'project': 'd965489b7f894cbda89cd2e25bfd85a0', + 'publisher': 'ceilometer', + 'resource': 'f9276c96-8a12-432b-96a1-559d70715f97', + 'resource_name': 'cirros2', + 'type': 'size', + 'unit': 'B'} + sample_dict_2 = {'counter': 'memory.usage', + 'memory': 'e536fff6-b20d-4aa5-ac2f-d15ac8b3af63', + 'project': 'd965489b7f894cbda89cd2e25bfd85a0', + 'publisher': 'ceilometer', + 'resource': 'e536fff6-b20d-4aa5-ac2f-d15ac8b3af63', + 'resource_name': 'myserver:instance-00000002', + 'type': 'usage', + 'unit': 'MB', + 'user': '6e7d71415cd5401cbe103829c9c5dec2', + 'vm_instance': 'e0d297f5df3b62ec73c8d42b'} + self.assertEqual(16344576, + prom_exporter.CEILOMETER_REGISTRY. + get_sample_value('ceilometer_image_size', + sample_dict_1)) + self.assertEqual(37.98046875, + prom_exporter.CEILOMETER_REGISTRY. + get_sample_value('ceilometer_memory_usage', + sample_dict_2)) + + def test_gen_labels(self): + slabels1 = dict(keys=[], values=[]) + slabels1['keys'] = ['disk', 'publisher', 'type', 'counter', + 'project', 'user', 'unit', 'resource', + 'vm_instance', 'resource_name', + 'resource_name'] + slabels1['values'] = ['read', 'ceilometer', 'device', + 'disk.device.read.latency', + 'd965489b7f894cbda89cd2e25bfd85a0', + '6e7d71415cd5401cbe103829c9c5dec2', + 'ns', + 'e536fff6-b20d-4aa5-ac2f-d15ac8b3af63-vda', + 'e0d297f5df3b62ec73c8d42b', 'myserver', + 'myserver:instance-00000002'] + label1 = prom_exporter._gen_labels(self.test_data[0]) + self.assertDictEqual(label1, slabels1) + + slabels2 = dict(keys=[], values=[]) + slabels2['keys'] = ['image', 'publisher', 'type', 'counter', + 'project', 'unit', 'resource', + 'resource_name'] + slabels2['values'] = ['f9276c96-8a12-432b-96a1-559d70715f97', + 'ceilometer', 'size', 'image.size', + 'd965489b7f894cbda89cd2e25bfd85a0', 'B', + 'f9276c96-8a12-432b-96a1-559d70715f97', + 'cirros2'] + label2 = prom_exporter._gen_labels(self.test_data[2]) + self.assertDictEqual(label2, slabels2) diff --git a/requirements.txt b/requirements.txt index 9f2f3ef679..802ba60d2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,3 +39,4 @@ tooz[zake]>=1.47.0 # Apache-2.0 oslo.cache>=1.26.0 # Apache-2.0 gnocchiclient>=7.0.0 # Apache-2.0 python-zaqarclient>=1.3.0 # Apache-2.0 +prometheus_client>=0.20.0 # Apache-2.0