From ec80028dec56e57908b565ddf16cac12d41bb5cb Mon Sep 17 00:00:00 2001 From: Iury Gregory Melo Ferreira Date: Sat, 12 Jul 2025 13:54:09 -0300 Subject: [PATCH] [IPE] Support iDRAC driver metrics Currently we only support metrics from ipmi and redfish driver. This patch adds support for idrac metrics via the redfish parser. In addition to all supported redfish metrics we also added: - baremetal_temperature_status We also fixed the status metrics to differentiate between warning and critical states, showing a value of 1 for both. The metrics now show a value of 1 for Warning and 2 for Critical. Closes-Bug: #2111832 Assisted-By: Claude Code - Claude Sonnet 4 Change-Id: I6091013900ea5ed5d14076b837c054740c2f1873 Signed-off-by: Iury Gregory Melo Ferreira --- ironic_prometheus_exporter/messaging.py | 3 + .../parsers/metrics_information/redfish.json | 8 +- ironic_prometheus_exporter/parsers/redfish.py | 97 +++-- .../json_samples/notification-idrac.json | 339 ++++++++++++++++++ .../tests/test_driver.py | 10 +- .../tests/test_idrac_parser.py | 183 ++++++++++ .../tests/test_redfish_parser.py | 78 +++- .../notes/idrac-metrics-83480d59c49b13ca.yaml | 10 + 8 files changed, 683 insertions(+), 45 deletions(-) create mode 100644 ironic_prometheus_exporter/tests/json_samples/notification-idrac.json create mode 100644 ironic_prometheus_exporter/tests/test_idrac_parser.py create mode 100644 releasenotes/notes/idrac-metrics-83480d59c49b13ca.yaml diff --git a/ironic_prometheus_exporter/messaging.py b/ironic_prometheus_exporter/messaging.py index 07c30f1..ce0d353 100644 --- a/ironic_prometheus_exporter/messaging.py +++ b/ironic_prometheus_exporter/messaging.py @@ -65,6 +65,9 @@ class PrometheusFileDriver(notifier.Driver): elif event_type == 'hardware.redfish.metrics': redfish.category_registry(payload, registry) + elif event_type == 'hardware.idrac.metrics': + redfish.category_registry(payload, registry) + # Order of preference is for a node Name, UUID, or # payload hostname field to be used (i.e. for conductor # message payloads). diff --git a/ironic_prometheus_exporter/parsers/metrics_information/redfish.json b/ironic_prometheus_exporter/parsers/metrics_information/redfish.json index 65f7275..cbffaa9 100644 --- a/ironic_prometheus_exporter/parsers/metrics_information/redfish.json +++ b/ironic_prometheus_exporter/parsers/metrics_information/redfish.json @@ -1,6 +1,6 @@ { "baremetal_power_status": - "Power supply unit health status (0 - OK, 1 - failure)", + "Power supply unit health status (0 - OK, 1 - Warning, 2 - Critical)", "baremetal_temp_room_celsius": "Room temperature expressed in Celsius", "baremetal_temp_intake_celsius": @@ -47,8 +47,10 @@ "Chassis temperature expressed in Celsius", "baremetal_temp_fan_celsius": "Cooling fan temperature expressed in Celsius", + "baremetal_temperature_status": + "Baremetal temperature sensor status (0 - OK, 1 - Warning, 2 - Critical)", "baremetal_fan_status": - "Cooling fan health status (0 - OK, 1 - failure)", + "Cooling fan health status (0 - OK, 1 - Warning, 2 - Critical)", "baremetal_drive_status": - "Storage drive health status (0 - OK, 1 - failure)" + "Storage drive health status (0 - OK, 1 - Warning, 2 - Critical)" } diff --git a/ironic_prometheus_exporter/parsers/redfish.py b/ironic_prometheus_exporter/parsers/redfish.py index 1e3e4e4..70f7813 100644 --- a/ironic_prometheus_exporter/parsers/redfish.py +++ b/ironic_prometheus_exporter/parsers/redfish.py @@ -22,6 +22,13 @@ from ironic_prometheus_exporter import utils as ipe_utils LOG = logging.getLogger(__name__) +HEALTH_MAP = { + 'OK': 0, + 'Warning': 1, + 'Critical': 2 +} + + def _build_labels(node_message): fields = ['node_name', 'node_uuid', 'instance_uuid'] if not node_message['node_name']: @@ -31,6 +38,14 @@ def _build_labels(node_message): } +def _build_sensor_labels(sensor_labels, sensor_id, sensor_data, ignore_keys): + for k, v in sensor_data.items(): + if k not in ignore_keys and v is not None: + sensor_labels[k] = v + sensor_labels['sensor_id'] = sensor_id + return sensor_labels + + def build_temperature_metrics(node_message): """Build Prometheus temperature metrics from Oslo message. @@ -49,7 +64,7 @@ def build_temperature_metrics(node_message): [ # metric value 42, - # metric instance in form of Prometheus labels + # metric instance in form of Prometheus labels example { 'node_name': 'kninode', 'node_uuid', 'XXX-YYY-ZZZ', @@ -58,6 +73,17 @@ def build_temperature_metrics(node_message): 'sensor_id': '1' } ] + # baremetal_temperature_status: + # metric value (0 - OK, 1 - Warning, 2- Critical) + 0, + # metric labels + { + 'node_name': 'kninode', + 'node_uuid', 'XXX-YYY-ZZZ', + 'instance_uuid': 'ZZZ-YYY-XXX', + 'entity_id': 'CPU', + 'sensor_id': '1' + } ] } """ @@ -69,17 +95,32 @@ def build_temperature_metrics(node_message): metrics = collections.defaultdict(list) for sensor_id, sensor_data in payload.items(): + if sensor_data['state'].lower() != 'enabled': + continue + metric = 'baremetal_temp_%s_celsius' % ( sensor_data['physical_context'].lower()) + sensor_reading = sensor_data.pop('reading_celsius') + health_value = HEALTH_MAP.get(sensor_data['health']) + temp_metrics = { + metric: sensor_reading, + 'baremetal_temperature_status': health_value + } + ignore = [] labels = _build_labels(node_message) + _build_sensor_labels(labels, sensor_id, sensor_data, ignore) - labels['entity_id'] = sensor_data['physical_context'] - labels['sensor_id'] = sensor_data['sensor_number'] - - value = sensor_data['reading_celsius'] - - metrics[metric].append((value, labels)) + for name, value in temp_metrics.items(): + # NOTE(iurygregory): we do this to ensure the reading_celsius + # value is used as label for the baremetal_temperature_status + # metric. + if name == 'baremetal_temperature_status': + new_labels = labels.copy() + new_labels['reading_celsius'] = sensor_reading + metrics[name].append((value, new_labels)) + else: + metrics[name].append((value, labels)) return metrics @@ -100,7 +141,7 @@ def build_power_metrics(node_message): # metric name 'baremetal_power_status': [ - # metric value (0 - OK, 1 - on fire) + # metric value (0 - OK, 1 - Warning, 2- Critical) 0, # metric instance in form of Prometheus labels { @@ -122,16 +163,17 @@ def build_power_metrics(node_message): metrics = collections.defaultdict(list) for sensor_id, sensor_data in payload.items(): - metric = 'baremetal_power_status' + if sensor_data['state'].lower() != 'enabled': + continue + + name = 'baremetal_power_status' + value = HEALTH_MAP.get(sensor_data['health']) + ignore = [] labels = _build_labels(node_message) + _build_sensor_labels(labels, sensor_id, sensor_data, ignore) - labels['entity_id'] = 'PSU' - labels['sensor_id'] = sensor_id - - value = sensor_data['health'] != 'OK' and 1 or 0 - - metrics[metric].append((value, labels)) + metrics[name].append((value, labels)) return metrics @@ -152,7 +194,7 @@ def build_fan_metrics(node_message): # metric name 'baremetal_fan_status': [ - # metric value (0 - OK, 1 - on fire) + # metric value (0 - OK, 1 - Warning, 2- Critical) 0, # metric instance in form of Prometheus labels { @@ -174,16 +216,16 @@ def build_fan_metrics(node_message): metrics = collections.defaultdict(list) for sensor_id, sensor_data in payload.items(): - metric = 'baremetal_fan_status' + if sensor_data['state'].lower() != 'enabled': + continue + name = 'baremetal_fan_status' + ignore = [] + value = HEALTH_MAP.get(sensor_data['health']) labels = _build_labels(node_message) + _build_sensor_labels(labels, sensor_id, sensor_data, ignore) - labels['entity_id'] = sensor_data['physical_context'] - labels['sensor_id'] = sensor_data['identity'] - - value = sensor_data['health'] != 'OK' and 1 or 0 - - metrics[metric].append((value, labels)) + metrics[name].append((value, labels)) return metrics @@ -226,14 +268,15 @@ def build_drive_metrics(node_message): metrics = collections.defaultdict(list) for sensor_id, sensor_data in payload.items(): + if sensor_data['state'].lower() != 'enabled': + continue metric = 'baremetal_drive_status' + ignore = [] labels = _build_labels(node_message) + _build_sensor_labels(labels, sensor_id, sensor_data, ignore) - labels['entity_id'] = 'HDD' - labels['sensor_id'] = sensor_id - - value = sensor_data['health'] != 'OK' and 1 or 0 + value = HEALTH_MAP.get(sensor_data['health']) metrics[metric].append((value, labels)) diff --git a/ironic_prometheus_exporter/tests/json_samples/notification-idrac.json b/ironic_prometheus_exporter/tests/json_samples/notification-idrac.json new file mode 100644 index 0000000..fe808a2 --- /dev/null +++ b/ironic_prometheus_exporter/tests/json_samples/notification-idrac.json @@ -0,0 +1,339 @@ +{ + "message_id": "836ecf60-ada0-44a9-9832-dcf62b8b4086", + "publisher_id": "", + "event_type": "hardware.idrac.metrics", + "priority": "INFO", + "payload": { + "message_id": "81323197-0f8e-4d8a-b6ac-ee08c4c1145e", + "instance_uuid": "235e4d8a-0f1a-87a0-ea81-8a1b0277cd87", + "node_uuid": "fe81395b-1999-4ab4-8eb0-235e1ab02778", + "timestamp": "2025-06-10T02:17:40.953055", + "node_name": "r640-u12", + "event_type": "hardware.idrac.metrics.update", + "payload": { + "Fan": { + "0@System.Embedded.1": { + "identity": "0", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9600, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "1@System.Embedded.1": { + "identity": "1", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5520, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "2@System.Embedded.1": { + "identity": "2", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9360, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "3@System.Embedded.1": { + "identity": "3", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5640, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "4@System.Embedded.1": { + "identity": "4", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9600, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "5@System.Embedded.1": { + "identity": "5", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5760, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "6@System.Embedded.1": { + "identity": "6", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9840, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "7@System.Embedded.1": { + "identity": "7", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5880, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, "8@System.Embedded.1": { + "identity": "8", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9600, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "9@System.Embedded.1": { + "identity": "9", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5880, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "10@System.Embedded.1": { + "identity": "10", + "max_reading_range": null, + "min_reading_range": null, + "reading": 10080, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "11@System.Embedded.1": { + "identity": "11", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5760, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "12@System.Embedded.1": { + "identity": "12", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9600, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "13@System.Embedded.1": { + "identity": "13", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5880, "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "14@System.Embedded.1": { + "identity": "14", + "max_reading_range": null, + "min_reading_range": null, + "reading": 10080, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "15@System.Embedded.1": { + "identity": "15", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5760, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + } + }, + "Temperature": { + "0@System.Embedded.1": { + "identity": "0", + "max_reading_range_temp": null, + "min_reading_range_temp": null, + "reading_celsius": 49, + "physical_context": "CPU", + "sensor_number": 1, + "state": "Enabled", + "health": "OK" + }, + "1@System.Embedded.1": { + "identity": "1", + "max_reading_range_temp": null, + "min_reading_range_temp": null, + "reading_celsius": 56, + "physical_context": "CPU", + "sensor_number": 2, + "state": "Enabled", + "health": "OK" + }, + "2@System.Embedded.1": { + "identity": "2", + "max_reading_range_temp": null, + "min_reading_range_temp": null, + "reading_celsius": 80, + "physical_context": "SystemBoard", + "sensor_number": 5, + "state": "Enabled", + "health": "Warning" + }, + "3@System.Embedded.1": { + "identity": "3", + "max_reading_range_temp": null, + "min_reading_range_temp": null, + "reading_celsius": 100, + "physical_context": "SystemBoard", + "sensor_number": 6, + "state": "Enabled", + "health": "Critical" + } + }, + "Power": { + "0:Power@System.Embedded.1": { + "power_capacity_watts": 750, + "line_input_voltage": 208, + "last_power_output_watts": 148, + "serial_number": "CNDED0089IA7W5", + "state": "Enabled", + "health": "OK" + }, + "1:Power@System.Embedded.1": { + "power_capacity_watts": 750, + "line_input_voltage": 208, + "last_power_output_watts": 139, + "serial_number": "CNDED0089IA7WU", + "state": "Enabled", + "health": "OK" + } + }, + "Drive": { + "PCIe SSD in Slot 9 in Bay 1:CPU.1@System.Embedded.1": { + "name": "PCIe SSD in Slot 9 in Bay 1", + "model": "Dell Express Flash NVMe P4500 1.0TB SFF", + "capacity_bytes": 1000204886016, + "state": "Enabled", + "health": "OK" + }, + "PCIe SSD in Slot 8 in Bay 1:CPU.1@System.Embedded.1": { + "name": "PCIe SSD in Slot 8 in Bay 1", + "model": "Dell Express Flash NVMe P4600 1.6TB SFF", + "capacity_bytes": 1600321314816, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:0:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:0", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:1:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:1", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:2:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:2", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:3:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:3", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:4:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:4", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:5:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:5", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:6:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:6", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:7:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:7", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "SSD 0:AHCI.Slot.2-1@System.Embedded.1": { + "name": "SSD 0", + "model": "MTFDDAV240TCB", + "capacity_bytes": 240057409536, + "state": "Enabled", + "health": "OK" + }, + "SSD 1:AHCI.Slot.2-1@System.Embedded.1": { + "name": "SSD 1", + "model": "MTFDDAV240TCB", + "capacity_bytes": 240057409536, + "state": "Enabled", + "health": "OK" + } + } + } + }, + "timestamp": "2025-06-10 02:17:47.220276" +} diff --git a/ironic_prometheus_exporter/tests/test_driver.py b/ironic_prometheus_exporter/tests/test_driver.py index af50382..934edc6 100644 --- a/ironic_prometheus_exporter/tests/test_driver.py +++ b/ironic_prometheus_exporter/tests/test_driver.py @@ -123,17 +123,25 @@ class TestPrometheusFileNotifier(test_utils.BaseTestCase): os.path.dirname(ironic_prometheus_exporter.__file__), 'tests', 'json_samples', 'notification-redfish.json') + sample_file_3 = os.path.join( + os.path.dirname(ironic_prometheus_exporter.__file__), + 'tests', 'json_samples', 'notification-idrac.json') + msg1 = json.load(open(sample_file_1)) node1 = msg1['payload']['node_name'] msg2 = json.load(open(sample_file_2)) node2 = msg2['payload']['node_name'] + msg3 = json.load(open(sample_file_3)) + node3 = msg3['payload']['node_name'] driver.notify(None, msg1, 'info', 0) driver.notify(None, msg2, 'info', 0) + driver.notify(None, msg3, 'info', 0) DIR = self.conf.oslo_messaging_notifications.location all_files = [name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))] - self.assertEqual(len(all_files), 2) + self.assertEqual(len(all_files), 3) self.assertIn(node1 + '-hardware.ipmi.metrics', all_files) self.assertIn(node2 + '-hardware.redfish.metrics', all_files) + self.assertIn(node3 + '-hardware.idrac.metrics', all_files) diff --git a/ironic_prometheus_exporter/tests/test_idrac_parser.py b/ironic_prometheus_exporter/tests/test_idrac_parser.py new file mode 100644 index 0000000..bbdf1f9 --- /dev/null +++ b/ironic_prometheus_exporter/tests/test_idrac_parser.py @@ -0,0 +1,183 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json +import os +import unittest + +from prometheus_client import CollectorRegistry + +import ironic_prometheus_exporter +from ironic_prometheus_exporter.parsers import redfish as idrac_redfish + + +sample_file = os.path.join( + os.path.dirname(ironic_prometheus_exporter.__file__), + 'tests', 'json_samples', 'notification-idrac.json') + +DATA = json.load(open(sample_file)) + + +class TestIDRACPayloadsParser(unittest.TestCase): + + def setUp(self): + self.node_message = DATA['payload'] + self.node_name = DATA['payload']['node_name'] + self.node_uuid = DATA['payload']['node_uuid'] + self.instance_uuid = DATA['payload']['instance_uuid'] + + def test_build_temperature_metrics(self): + metrics = idrac_redfish.build_temperature_metrics(self.node_message) + + expected_metric_name = 'baremetal_temperature_status' + self.assertIn(expected_metric_name, metrics) + self.assertEqual(0, metrics[expected_metric_name][0][0]) + expected_labels = { + 'identity': '0', + 'sensor_id': '0@System.Embedded.1', + 'physical_context': 'CPU', + 'reading_celsius': 49, + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_number': 1, + 'state': 'Enabled', + 'health': 'OK' + } + self.assertEqual( + expected_labels, metrics[expected_metric_name][0][1]) + + self.assertEqual(1, metrics[expected_metric_name][2][0]) + expected_labels2 = { + 'identity': '2', + 'sensor_id': '2@System.Embedded.1', + 'physical_context': 'SystemBoard', + 'reading_celsius': 80, + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_number': 5, + 'state': 'Enabled', + 'health': 'Warning' + } + self.assertEqual( + expected_labels2, metrics[expected_metric_name][2][1]) + + self.assertEqual(2, metrics[expected_metric_name][3][0]) + expected_labels3 = { + 'identity': '3', + 'sensor_id': '3@System.Embedded.1', + 'physical_context': 'SystemBoard', + 'reading_celsius': 100, + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_number': 6, + 'state': 'Enabled', + 'health': 'Critical' + } + self.assertEqual( + expected_labels3, metrics[expected_metric_name][3][1]) + + def test_build_power_metrics(self): + metrics = idrac_redfish.build_power_metrics(self.node_message) + + expected_metric = 'baremetal_power_status' + + self.assertIn(expected_metric, metrics) + + self.assertEqual(0, metrics[expected_metric][0][0]) + + expected_labels = { + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'last_power_output_watts': 148, + 'line_input_voltage': 208, + 'power_capacity_watts': 750, + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_id': '0:Power@System.Embedded.1', + 'serial_number': 'CNDED0089IA7W5', + 'state': 'Enabled', + 'health': 'OK' + } + + self.assertEqual( + expected_labels, metrics[expected_metric][0][1]) + + def test_build_fan_metrics(self): + metrics = idrac_redfish.build_fan_metrics(self.node_message) + + expected_metric = 'baremetal_fan_status' + + self.assertIn(expected_metric, metrics) + + self.assertEqual(0, metrics[expected_metric][0][0]) + + expected_labels = { + 'identity': '0', + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'physical_context': 'SystemBoard', + 'reading': 9600, + 'reading_units': 'RPM', + 'sensor_id': '0@System.Embedded.1', + 'state': 'Enabled', + 'health': 'OK' + } + + self.assertEqual( + expected_labels, metrics[expected_metric][0][1]) + + def test_build_drive_metrics(self): + metrics = idrac_redfish.build_drive_metrics(self.node_message) + + expected_metric = 'baremetal_drive_status' + + self.assertIn(expected_metric, metrics) + + self.assertEqual(0, metrics[expected_metric][0][0]) + + expected_labels = { + 'model': 'Dell Express Flash NVMe P4500 1.0TB SFF', + 'name': 'PCIe SSD in Slot 9 in Bay 1', + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_id': 'PCIe SSD in Slot 9 in Bay 1:CPU.1@System.Embedded.1', + 'capacity_bytes': 1000204886016, + 'state': 'Enabled', + 'health': 'OK' + } + + self.assertEqual( + expected_labels, metrics[expected_metric][0][1]) + + def test_category_registry(self): + metrics_registry = CollectorRegistry() + + idrac_redfish.category_registry(self.node_message, metrics_registry) + + label = { + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'name': 'PCIe SSD in Slot 9 in Bay 1', + 'model': 'Dell Express Flash NVMe P4500 1.0TB SFF', + 'capacity_bytes': '1000204886016', + 'state': 'Enabled', + 'health': 'OK', + 'sensor_id': 'PCIe SSD in Slot 9 in Bay 1:CPU.1@System.Embedded.1' + } + sensor_value = metrics_registry.get_sample_value( + 'baremetal_drive_status', label) + self.assertEqual(0, sensor_value) diff --git a/ironic_prometheus_exporter/tests/test_redfish_parser.py b/ironic_prometheus_exporter/tests/test_redfish_parser.py index cf16968..3bff837 100644 --- a/ironic_prometheus_exporter/tests/test_redfish_parser.py +++ b/ironic_prometheus_exporter/tests/test_redfish_parser.py @@ -46,11 +46,17 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(62, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'CPU', + 'identity': 'XXX-YYY-ZZZ', + 'max_reading_range_temp': 120, + 'min_reading_range_temp': 0, + 'physical_context': 'CPU', + 'sensor_number': 1, + 'health': 'OK', + 'state': 'enabled', 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', - 'sensor_id': 1 + 'sensor_id': 'XXX-YYY-ZZZ@ZZZ-YYY-XXX' } self.assertEqual( @@ -66,11 +72,21 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(0, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'PSU', + 'health': 'OK', 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'last_power_output_watts': 650, + 'line_input_voltage': 220, + 'maximum_frequency_hz': 63, + 'maximum_voltage': 250, + 'minimum_frequency_hz': 47, + 'minimum_voltage': 185, 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', - 'sensor_id': '0:Power@ZZZ-YYY-XXX' + 'output_wattage': 1450, + 'power_capacity_watts': 1450, + 'sensor_id': '0:Power@ZZZ-YYY-XXX', + 'serial_number': 'SN010203040506', + 'state': 'enabled' } self.assertEqual( @@ -86,11 +102,19 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(0, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'CPU', + 'health': 'OK', + 'identity': 'XXX-YYY-ZZZ', 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'max_reading_range': 10000, + 'min_reading_range': 0, 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', - 'sensor_id': 'XXX-YYY-ZZZ' + 'physical_context': 'CPU', + 'reading': 6000, + 'reading_units': 'RPM', + 'sensor_id': 'XXX-YYY-ZZZ@ZZZ-YYY-XXX', + 'serial_number': 'SN010203040506', + 'state': 'enabled' } self.assertEqual( @@ -106,11 +130,16 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(0, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'HDD', + 'capacity_bytes': 3750000000, + 'failure_predicted': True, + 'health': 'OK', + 'identity': '32ADF365C6C1B7BD', 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'model': 'IBM 350A', 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', - 'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX' + 'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX', + 'state': 'enabled' } self.assertEqual( @@ -122,10 +151,15 @@ class TestPayloadsParser(unittest.TestCase): redfish.category_registry(self.node_message, metrics_registry) label = { - 'entity_id': 'HDD', - 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'capacity_bytes': '3750000000', + 'failure_predicted': 'True', + 'health': 'OK', + 'identity': '32ADF365C6C1B7BD', + 'model': 'IBM 350A', + 'state': 'enabled', 'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX' } @@ -155,11 +189,21 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(0, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'PSU', + 'health': 'OK', 'instance_uuid': 'c2bd00b9-9881-4179-8b7b-bf786ec3696b', + 'last_power_output_watts': 650, + 'line_input_voltage': 220, + 'maximum_frequency_hz': 63, + 'maximum_voltage': 250, + 'minimum_frequency_hz': 47, + 'minimum_voltage': 185, 'node_name': 'knilab-master-u9', 'node_uuid': 'c2bd00b9-9881-4179-8b7b-bf786ec3696b', - 'sensor_id': '0:Power@ZZZ-YYY-XXX' + 'output_wattage': 1450, + 'power_capacity_watts': 1450, + 'sensor_id': '0:Power@ZZZ-YYY-XXX', + 'serial_number': 'SN010203040506', + 'state': 'enabled' } self.assertEqual( @@ -188,10 +232,16 @@ class TestPayloadsParserNoneNodeName(unittest.TestCase): self.assertEqual(62, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'CPU', + 'identity': 'XXX-YYY-ZZZ', + 'max_reading_range_temp': 120, + 'min_reading_range_temp': 0, + 'physical_context': 'CPU', + 'sensor_number': 1, + 'health': 'OK', + 'state': 'enabled', 'instance_uuid': '85d6b2c8-fe57-432d-868a-330e0e28cf34', 'node_uuid': 'c2bd00b9-9881-4179-8b7b-bf786ec3696b', - 'sensor_id': 1 + 'sensor_id': 'XXX-YYY-ZZZ@ZZZ-YYY-XXX' } self.assertEqual( diff --git a/releasenotes/notes/idrac-metrics-83480d59c49b13ca.yaml b/releasenotes/notes/idrac-metrics-83480d59c49b13ca.yaml new file mode 100644 index 0000000..8522de9 --- /dev/null +++ b/releasenotes/notes/idrac-metrics-83480d59c49b13ca.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + Adds support for parsing iDRAC sensor data metrics via redfish parser. + A new metrics `baremetal_temperature_status` was also added. +fixes: + - | + Fixed a bug where status metrics failed to differentiate between warning + and critical states, showing a value of 1 for both. The metrics now show + a value of 1 for Warning and 2 for Critical. \ No newline at end of file