Add Redfish metrics support
Added a parsing module to process Redfish-originated metrics and submit them to Prometheus. Change-Id: I1c751041488366304c92d4df07cb8a62dcb371fa
This commit is contained in:
parent
46e50fb56f
commit
9959cf36bc
|
@ -15,6 +15,7 @@ import os
|
|||
|
||||
from ironic_prometheus_exporter.parsers import ipmi
|
||||
from ironic_prometheus_exporter.parsers import header
|
||||
from ironic_prometheus_exporter.parsers import redfish
|
||||
from oslo_config import cfg
|
||||
from oslo_messaging.notify import notifier
|
||||
from prometheus_client import write_to_textfile, CollectorRegistry
|
||||
|
@ -43,14 +44,23 @@ class PrometheusFileDriver(notifier.Driver):
|
|||
|
||||
def notify(self, ctxt, message, priority, retry):
|
||||
try:
|
||||
if message['event_type'] == 'hardware.ipmi.metrics':
|
||||
registry = CollectorRegistry()
|
||||
node_message = message['payload']
|
||||
header.timestamp_registry(node_message, registry)
|
||||
registry = CollectorRegistry()
|
||||
|
||||
event_type = message['event_type']
|
||||
node_message = message['payload']
|
||||
header.timestamp_registry(node_message, registry)
|
||||
|
||||
if event_type == 'hardware.ipmi.metrics':
|
||||
ipmi.category_registry(node_message, registry)
|
||||
nodeFile = os.path.join(self.location,
|
||||
node_message['node_name'])
|
||||
write_to_textfile(nodeFile, registry)
|
||||
|
||||
elif event_type == 'hardware.redfish.metrics':
|
||||
redfish.category_registry(node_message, registry)
|
||||
|
||||
nodeFile = os.path.join(
|
||||
self.location,
|
||||
node_message['node_name'] + '-' + event_type)
|
||||
write_to_textfile(nodeFile, registry)
|
||||
|
||||
except Exception as e:
|
||||
LOG.error(e)
|
||||
|
||||
|
|
|
@ -0,0 +1,256 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import collections
|
||||
import logging
|
||||
|
||||
from prometheus_client import Gauge
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _build_labels(node_message):
|
||||
return {
|
||||
k: node_message[k]
|
||||
for k in ('node_name', 'node_uuid', 'instance_uuid')
|
||||
}
|
||||
|
||||
|
||||
def build_temperature_metrics(node_message):
|
||||
"""Build Prometheus temperature metrics from Oslo message.
|
||||
|
||||
Takes Oslo notification message carrying Redfish sensor data and
|
||||
produces a data structure suitable for submitting to Prometheus.
|
||||
|
||||
:param node_message: Oslo notification message
|
||||
|
||||
Examples::
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
# metric name
|
||||
'baremetal_temp_cpu_celsius':
|
||||
[
|
||||
# metric value
|
||||
42,
|
||||
# metric instance in form of Prometheus labels
|
||||
{
|
||||
'node_name': 'kninode',
|
||||
'node_uuid', 'XXX-YYY-ZZZ',
|
||||
'instance_uuid': 'ZZZ-YYY-XXX',
|
||||
'entity_id': 'CPU',
|
||||
'sensor_id': '1'
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
"""
|
||||
payload = node_message
|
||||
|
||||
for key in ('payload', 'Temperature'):
|
||||
payload = payload.get(key, {})
|
||||
|
||||
metrics = collections.defaultdict(list)
|
||||
|
||||
for sensor_id, sensor_data in payload.items():
|
||||
metric = 'baremetal_temp_%s_celsius' % (
|
||||
sensor_data['physical_context'].lower())
|
||||
|
||||
labels = _build_labels(node_message)
|
||||
|
||||
labels['entity_id'] = sensor_data['physical_context']
|
||||
labels['sensor_id'] = sensor_data['sensor_number']
|
||||
|
||||
value = sensor_data['reading_celsius']
|
||||
|
||||
metrics[metric].append((value, labels))
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def build_power_metrics(node_message):
|
||||
"""Build Prometheus power metrics from Oslo message.
|
||||
|
||||
Takes Oslo notification message carrying Redfish sensor data and
|
||||
produces a data structure suitable for submitting to Prometheus.
|
||||
|
||||
:param node_message: Oslo notification message
|
||||
|
||||
Examples::
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
# metric name
|
||||
'baremetal_power_status':
|
||||
[
|
||||
# metric value (0 - OK, 1 - on fire)
|
||||
0,
|
||||
# metric instance in form of Prometheus labels
|
||||
{
|
||||
'node_name': 'kninode',
|
||||
'node_uuid', 'XXX-YYY-ZZZ',
|
||||
'instance_uuid': 'ZZZ-YYY-XXX',
|
||||
'entity_id': 'PSU',
|
||||
'sensor_id': '0:Power@ZZZ-YYY-XXX'
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
"""
|
||||
payload = node_message
|
||||
|
||||
for key in ('payload', 'Power'):
|
||||
payload = payload.get(key, {})
|
||||
|
||||
metrics = collections.defaultdict(list)
|
||||
|
||||
for sensor_id, sensor_data in payload.items():
|
||||
metric = 'baremetal_power_status'
|
||||
|
||||
labels = _build_labels(node_message)
|
||||
|
||||
labels['entity_id'] = 'PSU'
|
||||
labels['sensor_id'] = sensor_id
|
||||
|
||||
value = sensor_data['health'] != 'OK' and 1 or 0
|
||||
|
||||
metrics[metric].append((value, labels))
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def build_fan_metrics(node_message):
|
||||
"""Build Prometheus fan metrics from Oslo message.
|
||||
|
||||
Takes Oslo notification message carrying Redfish sensor data and
|
||||
produces a data structure suitable for submitting to Prometheus.
|
||||
|
||||
:param node_message: Oslo notification message
|
||||
|
||||
Examples::
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
# metric name
|
||||
'baremetal_fan_status':
|
||||
[
|
||||
# metric value (0 - OK, 1 - on fire)
|
||||
0,
|
||||
# metric instance in form of Prometheus labels
|
||||
{
|
||||
'node_name': 'kninode',
|
||||
'node_uuid', 'XXX-YYY-ZZZ',
|
||||
'instance_uuid': 'ZZZ-YYY-XXX',
|
||||
'entity_id': 'CPU',
|
||||
'sensor_id': '0:Power@ZZZ-YYY-XXX'
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
"""
|
||||
payload = node_message
|
||||
|
||||
for key in ('payload', 'Fan'):
|
||||
payload = payload.get(key, {})
|
||||
|
||||
metrics = collections.defaultdict(list)
|
||||
|
||||
for sensor_id, sensor_data in payload.items():
|
||||
metric = 'baremetal_fan_status'
|
||||
|
||||
labels = _build_labels(node_message)
|
||||
|
||||
labels['entity_id'] = sensor_data['physical_context']
|
||||
labels['sensor_id'] = sensor_data['identity']
|
||||
|
||||
value = sensor_data['health'] != 'OK' and 1 or 0
|
||||
|
||||
metrics[metric].append((value, labels))
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def build_drive_metrics(node_message):
|
||||
"""Build Prometheus drive metrics from Oslo message.
|
||||
|
||||
Takes Oslo notification message carrying Redfish sensor data and
|
||||
produces a data structure suitable for submitting to Prometheus.
|
||||
|
||||
:param node_message: Oslo notification message
|
||||
|
||||
Examples::
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
# metric name
|
||||
'baremetal_drive_status':
|
||||
[
|
||||
# metric value (0 - OK, 1 - on fire)
|
||||
0,
|
||||
# metric instance in form of Prometheus labels
|
||||
{
|
||||
'node_name': 'kninode',
|
||||
'node_uuid', 'XXX-YYY-ZZZ',
|
||||
'instance_uuid': 'ZZZ-YYY-XXX',
|
||||
'entity_id': 'HDD',
|
||||
'sensor_id': '32ADF365C6C1B7BD'
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
"""
|
||||
payload = node_message
|
||||
|
||||
for key in ('payload', 'Drive'):
|
||||
payload = payload.get(key, {})
|
||||
|
||||
metrics = collections.defaultdict(list)
|
||||
|
||||
for sensor_id, sensor_data in payload.items():
|
||||
metric = 'baremetal_drive_status'
|
||||
|
||||
labels = _build_labels(node_message)
|
||||
|
||||
labels['entity_id'] = 'HDD'
|
||||
labels['sensor_id'] = sensor_id
|
||||
|
||||
value = sensor_data['health'] != 'OK' and 1 or 0
|
||||
|
||||
metrics[metric].append((value, labels))
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def category_registry(node_message, metrics_registry):
|
||||
"""Parse Redfish metrics and submit them to Prometheus
|
||||
|
||||
:param node_message: Oslo notification message
|
||||
:param metrics_registry: Prometheus registry
|
||||
"""
|
||||
metrics = build_temperature_metrics(node_message)
|
||||
metrics.update(build_power_metrics(node_message))
|
||||
metrics.update(build_fan_metrics(node_message))
|
||||
metrics.update(build_drive_metrics(node_message))
|
||||
|
||||
for metric, details in metrics.items():
|
||||
|
||||
for value, labels in details:
|
||||
|
||||
gauge = Gauge(metric, '', labelnames=labels,
|
||||
registry=metrics_registry)
|
||||
|
||||
gauge.labels(**labels).set(value)
|
|
@ -0,0 +1,67 @@
|
|||
{
|
||||
"priority": "INFO",
|
||||
"event_type": "hardware.redfish.metrics",
|
||||
"timestamp": "2019-03-29 20:12:26.885347",
|
||||
"publisher_id": "None.localhost.localdomain",
|
||||
"payload": {
|
||||
"instance_uuid": "ac2aa2fd-6e1a-41c8-a114-2084c8705228",
|
||||
"node_uuid": "ac2aa2fd-6e1a-41c8-a114-2084c8705228",
|
||||
"event_type": "hardware.redfish.metrics.update",
|
||||
"timestamp": "2019-03-29T20:12:22.989020",
|
||||
"node_name": "knilab-master-u9",
|
||||
"message_id": "85d6b2c8-fe57-432d-868a-330e0e28cf34",
|
||||
"payload": {
|
||||
"Temperature": {
|
||||
"XXX-YYY-ZZZ@ZZZ-YYY-XXX": {
|
||||
"identity": "XXX-YYY-ZZZ",
|
||||
"max_reading_range_temp": 120,
|
||||
"min_reading_range_temp": 0,
|
||||
"physical_context": "CPU",
|
||||
"reading_celsius": 62,
|
||||
"sensor_number": 1,
|
||||
"health": "OK",
|
||||
"state": "enabled"
|
||||
}
|
||||
},
|
||||
"Power": {
|
||||
"0:Power@ZZZ-YYY-XXX": {
|
||||
"health": "OK",
|
||||
"last_power_output_watts": 650,
|
||||
"line_input_voltage": 220,
|
||||
"maximum_frequency_hz": 63,
|
||||
"maximum_voltage": 250,
|
||||
"minimum_frequency_hz": 47,
|
||||
"minimum_voltage": 185,
|
||||
"output_wattage": 1450,
|
||||
"power_capacity_watts": 1450,
|
||||
"serial_number": "SN010203040506",
|
||||
"state": "enabled"
|
||||
}
|
||||
},
|
||||
"Fan": {
|
||||
"XXX-YYY-ZZZ@ZZZ-YYY-XXX": {
|
||||
"identity": "XXX-YYY-ZZZ",
|
||||
"max_reading_range": 10000,
|
||||
"min_reading_range": 0,
|
||||
"physical_context": "CPU",
|
||||
"reading": 6000,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": "SN010203040506",
|
||||
"health": "OK",
|
||||
"state": "enabled"
|
||||
}
|
||||
},
|
||||
"Drive": {
|
||||
"32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX": {
|
||||
"capacity_bytes": 3750000000,
|
||||
"failure_predicted": true,
|
||||
"health": "OK",
|
||||
"identity": "32ADF365C6C1B7BD",
|
||||
"model": "IBM 350A",
|
||||
"state": "enabled"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"message_id": "2c0da1e8-1958-484f-9bdd-9117d717f7fa"
|
||||
}
|
|
@ -25,7 +25,7 @@ class TestPrometheusFileNotifier(test_utils.BaseTestCase):
|
|||
def setUp(self):
|
||||
super(TestPrometheusFileNotifier, self).setUp()
|
||||
|
||||
def test_instanciate(self):
|
||||
def test_instantiate(self):
|
||||
temp_dir = self.useFixture(fixtures.TempDir()).path
|
||||
self.config(location=temp_dir,
|
||||
group='oslo_messaging_notifications')
|
||||
|
@ -72,8 +72,8 @@ class TestPrometheusFileNotifier(test_utils.BaseTestCase):
|
|||
if os.path.isfile(os.path.join(DIR, name))]
|
||||
self.assertEqual(node1, node2)
|
||||
self.assertEqual(len(all_files), 1)
|
||||
self.assertIn(node1, all_files)
|
||||
self.assertIn(node2, all_files)
|
||||
self.assertIn(node1 + '-hardware.ipmi.metrics', all_files)
|
||||
self.assertIn(node2 + '-hardware.ipmi.metrics', all_files)
|
||||
|
||||
def test_messages_from_different_nodes(self):
|
||||
temp_dir = self.useFixture(fixtures.TempDir()).path
|
||||
|
@ -104,5 +104,35 @@ class TestPrometheusFileNotifier(test_utils.BaseTestCase):
|
|||
all_files = [name for name in os.listdir(DIR)
|
||||
if os.path.isfile(os.path.join(DIR, name))]
|
||||
self.assertEqual(len(all_files), 2)
|
||||
self.assertIn(node1, all_files)
|
||||
self.assertIn(node2, all_files)
|
||||
self.assertIn(node1 + '-hardware.ipmi.metrics', all_files)
|
||||
self.assertIn(node2 + '-hardware.ipmi.metrics', all_files)
|
||||
|
||||
def test_messages_of_different_types(self):
|
||||
temp_dir = self.useFixture(fixtures.TempDir()).path
|
||||
self.config(location=temp_dir,
|
||||
group='oslo_messaging_notifications')
|
||||
transport = oslo_messaging.get_notification_transport(self.conf)
|
||||
driver = PrometheusFileDriver(self.conf, None, transport)
|
||||
|
||||
sample_file_1 = os.path.join(
|
||||
os.path.dirname(ironic_prometheus_exporter.__file__),
|
||||
'tests', 'json_samples', 'notification-ipmi-1.json')
|
||||
|
||||
sample_file_2 = os.path.join(
|
||||
os.path.dirname(ironic_prometheus_exporter.__file__),
|
||||
'tests', 'json_samples', 'notification-redfish.json')
|
||||
|
||||
msg1 = json.load(open(sample_file_1))
|
||||
node1 = msg1['payload']['node_name']
|
||||
msg2 = json.load(open(sample_file_2))
|
||||
node2 = msg2['payload']['node_name']
|
||||
|
||||
driver.notify(None, msg1, 'info', 0)
|
||||
driver.notify(None, msg2, 'info', 0)
|
||||
|
||||
DIR = self.conf.oslo_messaging_notifications.location
|
||||
all_files = [name for name in os.listdir(DIR)
|
||||
if os.path.isfile(os.path.join(DIR, name))]
|
||||
self.assertEqual(len(all_files), 2)
|
||||
self.assertIn(node1 + '-hardware.ipmi.metrics', all_files)
|
||||
self.assertIn(node2 + '-hardware.redfish.metrics', all_files)
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
import json
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import ironic_prometheus_exporter
|
||||
from ironic_prometheus_exporter.parsers import redfish
|
||||
from prometheus_client import CollectorRegistry
|
||||
|
||||
|
||||
sample_file = os.path.join(
|
||||
os.path.dirname(ironic_prometheus_exporter.__file__),
|
||||
'tests', 'json_samples', 'notification-redfish.json')
|
||||
|
||||
DATA = json.load(open(sample_file))
|
||||
|
||||
|
||||
class TestPayloadsParser(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.node_message = DATA['payload']
|
||||
self.node_name = DATA['payload']['node_name']
|
||||
self.node_uuid = DATA['payload']['node_uuid']
|
||||
self.instance_uuid = DATA['payload']['instance_uuid']
|
||||
|
||||
def test_build_temperature_metrics(self):
|
||||
metrics = redfish.build_temperature_metrics(self.node_message)
|
||||
|
||||
expected_metric = 'baremetal_temp_cpu_celsius'
|
||||
|
||||
self.assertIn(expected_metric, metrics)
|
||||
|
||||
self.assertEqual(62, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'CPU',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'sensor_id': 1
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
expected_labels, metrics[expected_metric][0][1])
|
||||
|
||||
def test_build_power_metrics(self):
|
||||
metrics = redfish.build_power_metrics(self.node_message)
|
||||
|
||||
expected_metric = 'baremetal_power_status'
|
||||
|
||||
self.assertIn(expected_metric, metrics)
|
||||
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'PSU',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'sensor_id': '0:Power@ZZZ-YYY-XXX'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
expected_labels, metrics[expected_metric][0][1])
|
||||
|
||||
def test_build_fan_metrics(self):
|
||||
metrics = redfish.build_fan_metrics(self.node_message)
|
||||
|
||||
expected_metric = 'baremetal_fan_status'
|
||||
|
||||
self.assertIn(expected_metric, metrics)
|
||||
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'CPU',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'sensor_id': 'XXX-YYY-ZZZ'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
expected_labels, metrics[expected_metric][0][1])
|
||||
|
||||
def test_build_drive_metrics(self):
|
||||
metrics = redfish.build_drive_metrics(self.node_message)
|
||||
|
||||
expected_metric = 'baremetal_drive_status'
|
||||
|
||||
self.assertIn(expected_metric, metrics)
|
||||
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'HDD',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
expected_labels, metrics[expected_metric][0][1])
|
||||
|
||||
def test_category_registry(self):
|
||||
metrics_registry = CollectorRegistry()
|
||||
|
||||
redfish.category_registry(self.node_message, metrics_registry)
|
||||
|
||||
label = {
|
||||
'entity_id': 'HDD',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX'
|
||||
}
|
||||
|
||||
sensor_value = metrics_registry.get_sample_value(
|
||||
'baremetal_drive_status', label)
|
||||
|
||||
self.assertEqual(0, sensor_value)
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
features:
|
||||
- |
|
||||
Adds support for handling Redfish-originated metrics alongside
|
||||
IPMI ones.
|
Loading…
Reference in New Issue