diff --git a/ironic_prometheus_exporter/messaging.py b/ironic_prometheus_exporter/messaging.py index 07c30f1..ce0d353 100644 --- a/ironic_prometheus_exporter/messaging.py +++ b/ironic_prometheus_exporter/messaging.py @@ -65,6 +65,9 @@ class PrometheusFileDriver(notifier.Driver): elif event_type == 'hardware.redfish.metrics': redfish.category_registry(payload, registry) + elif event_type == 'hardware.idrac.metrics': + redfish.category_registry(payload, registry) + # Order of preference is for a node Name, UUID, or # payload hostname field to be used (i.e. for conductor # message payloads). diff --git a/ironic_prometheus_exporter/parsers/metrics_information/redfish.json b/ironic_prometheus_exporter/parsers/metrics_information/redfish.json index 65f7275..cbffaa9 100644 --- a/ironic_prometheus_exporter/parsers/metrics_information/redfish.json +++ b/ironic_prometheus_exporter/parsers/metrics_information/redfish.json @@ -1,6 +1,6 @@ { "baremetal_power_status": - "Power supply unit health status (0 - OK, 1 - failure)", + "Power supply unit health status (0 - OK, 1 - Warning, 2 - Critical)", "baremetal_temp_room_celsius": "Room temperature expressed in Celsius", "baremetal_temp_intake_celsius": @@ -47,8 +47,10 @@ "Chassis temperature expressed in Celsius", "baremetal_temp_fan_celsius": "Cooling fan temperature expressed in Celsius", + "baremetal_temperature_status": + "Baremetal temperature sensor status (0 - OK, 1 - Warning, 2 - Critical)", "baremetal_fan_status": - "Cooling fan health status (0 - OK, 1 - failure)", + "Cooling fan health status (0 - OK, 1 - Warning, 2 - Critical)", "baremetal_drive_status": - "Storage drive health status (0 - OK, 1 - failure)" + "Storage drive health status (0 - OK, 1 - Warning, 2 - Critical)" } diff --git a/ironic_prometheus_exporter/parsers/redfish.py b/ironic_prometheus_exporter/parsers/redfish.py index 1e3e4e4..70f7813 100644 --- a/ironic_prometheus_exporter/parsers/redfish.py +++ b/ironic_prometheus_exporter/parsers/redfish.py @@ -22,6 +22,13 @@ from ironic_prometheus_exporter import utils as ipe_utils LOG = logging.getLogger(__name__) +HEALTH_MAP = { + 'OK': 0, + 'Warning': 1, + 'Critical': 2 +} + + def _build_labels(node_message): fields = ['node_name', 'node_uuid', 'instance_uuid'] if not node_message['node_name']: @@ -31,6 +38,14 @@ def _build_labels(node_message): } +def _build_sensor_labels(sensor_labels, sensor_id, sensor_data, ignore_keys): + for k, v in sensor_data.items(): + if k not in ignore_keys and v is not None: + sensor_labels[k] = v + sensor_labels['sensor_id'] = sensor_id + return sensor_labels + + def build_temperature_metrics(node_message): """Build Prometheus temperature metrics from Oslo message. @@ -49,7 +64,7 @@ def build_temperature_metrics(node_message): [ # metric value 42, - # metric instance in form of Prometheus labels + # metric instance in form of Prometheus labels example { 'node_name': 'kninode', 'node_uuid', 'XXX-YYY-ZZZ', @@ -58,6 +73,17 @@ def build_temperature_metrics(node_message): 'sensor_id': '1' } ] + # baremetal_temperature_status: + # metric value (0 - OK, 1 - Warning, 2- Critical) + 0, + # metric labels + { + 'node_name': 'kninode', + 'node_uuid', 'XXX-YYY-ZZZ', + 'instance_uuid': 'ZZZ-YYY-XXX', + 'entity_id': 'CPU', + 'sensor_id': '1' + } ] } """ @@ -69,17 +95,32 @@ def build_temperature_metrics(node_message): metrics = collections.defaultdict(list) for sensor_id, sensor_data in payload.items(): + if sensor_data['state'].lower() != 'enabled': + continue + metric = 'baremetal_temp_%s_celsius' % ( sensor_data['physical_context'].lower()) + sensor_reading = sensor_data.pop('reading_celsius') + health_value = HEALTH_MAP.get(sensor_data['health']) + temp_metrics = { + metric: sensor_reading, + 'baremetal_temperature_status': health_value + } + ignore = [] labels = _build_labels(node_message) + _build_sensor_labels(labels, sensor_id, sensor_data, ignore) - labels['entity_id'] = sensor_data['physical_context'] - labels['sensor_id'] = sensor_data['sensor_number'] - - value = sensor_data['reading_celsius'] - - metrics[metric].append((value, labels)) + for name, value in temp_metrics.items(): + # NOTE(iurygregory): we do this to ensure the reading_celsius + # value is used as label for the baremetal_temperature_status + # metric. + if name == 'baremetal_temperature_status': + new_labels = labels.copy() + new_labels['reading_celsius'] = sensor_reading + metrics[name].append((value, new_labels)) + else: + metrics[name].append((value, labels)) return metrics @@ -100,7 +141,7 @@ def build_power_metrics(node_message): # metric name 'baremetal_power_status': [ - # metric value (0 - OK, 1 - on fire) + # metric value (0 - OK, 1 - Warning, 2- Critical) 0, # metric instance in form of Prometheus labels { @@ -122,16 +163,17 @@ def build_power_metrics(node_message): metrics = collections.defaultdict(list) for sensor_id, sensor_data in payload.items(): - metric = 'baremetal_power_status' + if sensor_data['state'].lower() != 'enabled': + continue + + name = 'baremetal_power_status' + value = HEALTH_MAP.get(sensor_data['health']) + ignore = [] labels = _build_labels(node_message) + _build_sensor_labels(labels, sensor_id, sensor_data, ignore) - labels['entity_id'] = 'PSU' - labels['sensor_id'] = sensor_id - - value = sensor_data['health'] != 'OK' and 1 or 0 - - metrics[metric].append((value, labels)) + metrics[name].append((value, labels)) return metrics @@ -152,7 +194,7 @@ def build_fan_metrics(node_message): # metric name 'baremetal_fan_status': [ - # metric value (0 - OK, 1 - on fire) + # metric value (0 - OK, 1 - Warning, 2- Critical) 0, # metric instance in form of Prometheus labels { @@ -174,16 +216,16 @@ def build_fan_metrics(node_message): metrics = collections.defaultdict(list) for sensor_id, sensor_data in payload.items(): - metric = 'baremetal_fan_status' + if sensor_data['state'].lower() != 'enabled': + continue + name = 'baremetal_fan_status' + ignore = [] + value = HEALTH_MAP.get(sensor_data['health']) labels = _build_labels(node_message) + _build_sensor_labels(labels, sensor_id, sensor_data, ignore) - labels['entity_id'] = sensor_data['physical_context'] - labels['sensor_id'] = sensor_data['identity'] - - value = sensor_data['health'] != 'OK' and 1 or 0 - - metrics[metric].append((value, labels)) + metrics[name].append((value, labels)) return metrics @@ -226,14 +268,15 @@ def build_drive_metrics(node_message): metrics = collections.defaultdict(list) for sensor_id, sensor_data in payload.items(): + if sensor_data['state'].lower() != 'enabled': + continue metric = 'baremetal_drive_status' + ignore = [] labels = _build_labels(node_message) + _build_sensor_labels(labels, sensor_id, sensor_data, ignore) - labels['entity_id'] = 'HDD' - labels['sensor_id'] = sensor_id - - value = sensor_data['health'] != 'OK' and 1 or 0 + value = HEALTH_MAP.get(sensor_data['health']) metrics[metric].append((value, labels)) diff --git a/ironic_prometheus_exporter/tests/json_samples/notification-idrac.json b/ironic_prometheus_exporter/tests/json_samples/notification-idrac.json new file mode 100644 index 0000000..fe808a2 --- /dev/null +++ b/ironic_prometheus_exporter/tests/json_samples/notification-idrac.json @@ -0,0 +1,339 @@ +{ + "message_id": "836ecf60-ada0-44a9-9832-dcf62b8b4086", + "publisher_id": "", + "event_type": "hardware.idrac.metrics", + "priority": "INFO", + "payload": { + "message_id": "81323197-0f8e-4d8a-b6ac-ee08c4c1145e", + "instance_uuid": "235e4d8a-0f1a-87a0-ea81-8a1b0277cd87", + "node_uuid": "fe81395b-1999-4ab4-8eb0-235e1ab02778", + "timestamp": "2025-06-10T02:17:40.953055", + "node_name": "r640-u12", + "event_type": "hardware.idrac.metrics.update", + "payload": { + "Fan": { + "0@System.Embedded.1": { + "identity": "0", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9600, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "1@System.Embedded.1": { + "identity": "1", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5520, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "2@System.Embedded.1": { + "identity": "2", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9360, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "3@System.Embedded.1": { + "identity": "3", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5640, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "4@System.Embedded.1": { + "identity": "4", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9600, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "5@System.Embedded.1": { + "identity": "5", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5760, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "6@System.Embedded.1": { + "identity": "6", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9840, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "7@System.Embedded.1": { + "identity": "7", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5880, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, "8@System.Embedded.1": { + "identity": "8", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9600, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "9@System.Embedded.1": { + "identity": "9", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5880, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "10@System.Embedded.1": { + "identity": "10", + "max_reading_range": null, + "min_reading_range": null, + "reading": 10080, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "11@System.Embedded.1": { + "identity": "11", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5760, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "12@System.Embedded.1": { + "identity": "12", + "max_reading_range": null, + "min_reading_range": null, + "reading": 9600, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "13@System.Embedded.1": { + "identity": "13", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5880, "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "14@System.Embedded.1": { + "identity": "14", + "max_reading_range": null, + "min_reading_range": null, + "reading": 10080, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + }, + "15@System.Embedded.1": { + "identity": "15", + "max_reading_range": null, + "min_reading_range": null, + "reading": 5760, + "reading_units": "RPM", + "serial_number": null, + "physical_context": "SystemBoard", + "state": "Enabled", + "health": "OK" + } + }, + "Temperature": { + "0@System.Embedded.1": { + "identity": "0", + "max_reading_range_temp": null, + "min_reading_range_temp": null, + "reading_celsius": 49, + "physical_context": "CPU", + "sensor_number": 1, + "state": "Enabled", + "health": "OK" + }, + "1@System.Embedded.1": { + "identity": "1", + "max_reading_range_temp": null, + "min_reading_range_temp": null, + "reading_celsius": 56, + "physical_context": "CPU", + "sensor_number": 2, + "state": "Enabled", + "health": "OK" + }, + "2@System.Embedded.1": { + "identity": "2", + "max_reading_range_temp": null, + "min_reading_range_temp": null, + "reading_celsius": 80, + "physical_context": "SystemBoard", + "sensor_number": 5, + "state": "Enabled", + "health": "Warning" + }, + "3@System.Embedded.1": { + "identity": "3", + "max_reading_range_temp": null, + "min_reading_range_temp": null, + "reading_celsius": 100, + "physical_context": "SystemBoard", + "sensor_number": 6, + "state": "Enabled", + "health": "Critical" + } + }, + "Power": { + "0:Power@System.Embedded.1": { + "power_capacity_watts": 750, + "line_input_voltage": 208, + "last_power_output_watts": 148, + "serial_number": "CNDED0089IA7W5", + "state": "Enabled", + "health": "OK" + }, + "1:Power@System.Embedded.1": { + "power_capacity_watts": 750, + "line_input_voltage": 208, + "last_power_output_watts": 139, + "serial_number": "CNDED0089IA7WU", + "state": "Enabled", + "health": "OK" + } + }, + "Drive": { + "PCIe SSD in Slot 9 in Bay 1:CPU.1@System.Embedded.1": { + "name": "PCIe SSD in Slot 9 in Bay 1", + "model": "Dell Express Flash NVMe P4500 1.0TB SFF", + "capacity_bytes": 1000204886016, + "state": "Enabled", + "health": "OK" + }, + "PCIe SSD in Slot 8 in Bay 1:CPU.1@System.Embedded.1": { + "name": "PCIe SSD in Slot 8 in Bay 1", + "model": "Dell Express Flash NVMe P4600 1.6TB SFF", + "capacity_bytes": 1600321314816, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:0:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:0", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:1:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:1", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:2:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:2", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:3:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:3", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:4:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:4", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:5:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:5", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:6:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:6", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "Physical Disk 0:1:7:NonRAID.Integrated.1-1@System.Embedded.1": { + "name": "Physical Disk 0:1:7", + "model": "AL15SEB120NY", + "capacity_bytes": 1200243695104, + "state": "Enabled", + "health": "OK" + }, + "SSD 0:AHCI.Slot.2-1@System.Embedded.1": { + "name": "SSD 0", + "model": "MTFDDAV240TCB", + "capacity_bytes": 240057409536, + "state": "Enabled", + "health": "OK" + }, + "SSD 1:AHCI.Slot.2-1@System.Embedded.1": { + "name": "SSD 1", + "model": "MTFDDAV240TCB", + "capacity_bytes": 240057409536, + "state": "Enabled", + "health": "OK" + } + } + } + }, + "timestamp": "2025-06-10 02:17:47.220276" +} diff --git a/ironic_prometheus_exporter/tests/test_driver.py b/ironic_prometheus_exporter/tests/test_driver.py index af50382..934edc6 100644 --- a/ironic_prometheus_exporter/tests/test_driver.py +++ b/ironic_prometheus_exporter/tests/test_driver.py @@ -123,17 +123,25 @@ class TestPrometheusFileNotifier(test_utils.BaseTestCase): os.path.dirname(ironic_prometheus_exporter.__file__), 'tests', 'json_samples', 'notification-redfish.json') + sample_file_3 = os.path.join( + os.path.dirname(ironic_prometheus_exporter.__file__), + 'tests', 'json_samples', 'notification-idrac.json') + msg1 = json.load(open(sample_file_1)) node1 = msg1['payload']['node_name'] msg2 = json.load(open(sample_file_2)) node2 = msg2['payload']['node_name'] + msg3 = json.load(open(sample_file_3)) + node3 = msg3['payload']['node_name'] driver.notify(None, msg1, 'info', 0) driver.notify(None, msg2, 'info', 0) + driver.notify(None, msg3, 'info', 0) DIR = self.conf.oslo_messaging_notifications.location all_files = [name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))] - self.assertEqual(len(all_files), 2) + self.assertEqual(len(all_files), 3) self.assertIn(node1 + '-hardware.ipmi.metrics', all_files) self.assertIn(node2 + '-hardware.redfish.metrics', all_files) + self.assertIn(node3 + '-hardware.idrac.metrics', all_files) diff --git a/ironic_prometheus_exporter/tests/test_idrac_parser.py b/ironic_prometheus_exporter/tests/test_idrac_parser.py new file mode 100644 index 0000000..bbdf1f9 --- /dev/null +++ b/ironic_prometheus_exporter/tests/test_idrac_parser.py @@ -0,0 +1,183 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json +import os +import unittest + +from prometheus_client import CollectorRegistry + +import ironic_prometheus_exporter +from ironic_prometheus_exporter.parsers import redfish as idrac_redfish + + +sample_file = os.path.join( + os.path.dirname(ironic_prometheus_exporter.__file__), + 'tests', 'json_samples', 'notification-idrac.json') + +DATA = json.load(open(sample_file)) + + +class TestIDRACPayloadsParser(unittest.TestCase): + + def setUp(self): + self.node_message = DATA['payload'] + self.node_name = DATA['payload']['node_name'] + self.node_uuid = DATA['payload']['node_uuid'] + self.instance_uuid = DATA['payload']['instance_uuid'] + + def test_build_temperature_metrics(self): + metrics = idrac_redfish.build_temperature_metrics(self.node_message) + + expected_metric_name = 'baremetal_temperature_status' + self.assertIn(expected_metric_name, metrics) + self.assertEqual(0, metrics[expected_metric_name][0][0]) + expected_labels = { + 'identity': '0', + 'sensor_id': '0@System.Embedded.1', + 'physical_context': 'CPU', + 'reading_celsius': 49, + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_number': 1, + 'state': 'Enabled', + 'health': 'OK' + } + self.assertEqual( + expected_labels, metrics[expected_metric_name][0][1]) + + self.assertEqual(1, metrics[expected_metric_name][2][0]) + expected_labels2 = { + 'identity': '2', + 'sensor_id': '2@System.Embedded.1', + 'physical_context': 'SystemBoard', + 'reading_celsius': 80, + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_number': 5, + 'state': 'Enabled', + 'health': 'Warning' + } + self.assertEqual( + expected_labels2, metrics[expected_metric_name][2][1]) + + self.assertEqual(2, metrics[expected_metric_name][3][0]) + expected_labels3 = { + 'identity': '3', + 'sensor_id': '3@System.Embedded.1', + 'physical_context': 'SystemBoard', + 'reading_celsius': 100, + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_number': 6, + 'state': 'Enabled', + 'health': 'Critical' + } + self.assertEqual( + expected_labels3, metrics[expected_metric_name][3][1]) + + def test_build_power_metrics(self): + metrics = idrac_redfish.build_power_metrics(self.node_message) + + expected_metric = 'baremetal_power_status' + + self.assertIn(expected_metric, metrics) + + self.assertEqual(0, metrics[expected_metric][0][0]) + + expected_labels = { + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'last_power_output_watts': 148, + 'line_input_voltage': 208, + 'power_capacity_watts': 750, + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_id': '0:Power@System.Embedded.1', + 'serial_number': 'CNDED0089IA7W5', + 'state': 'Enabled', + 'health': 'OK' + } + + self.assertEqual( + expected_labels, metrics[expected_metric][0][1]) + + def test_build_fan_metrics(self): + metrics = idrac_redfish.build_fan_metrics(self.node_message) + + expected_metric = 'baremetal_fan_status' + + self.assertIn(expected_metric, metrics) + + self.assertEqual(0, metrics[expected_metric][0][0]) + + expected_labels = { + 'identity': '0', + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'physical_context': 'SystemBoard', + 'reading': 9600, + 'reading_units': 'RPM', + 'sensor_id': '0@System.Embedded.1', + 'state': 'Enabled', + 'health': 'OK' + } + + self.assertEqual( + expected_labels, metrics[expected_metric][0][1]) + + def test_build_drive_metrics(self): + metrics = idrac_redfish.build_drive_metrics(self.node_message) + + expected_metric = 'baremetal_drive_status' + + self.assertIn(expected_metric, metrics) + + self.assertEqual(0, metrics[expected_metric][0][0]) + + expected_labels = { + 'model': 'Dell Express Flash NVMe P4500 1.0TB SFF', + 'name': 'PCIe SSD in Slot 9 in Bay 1', + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'sensor_id': 'PCIe SSD in Slot 9 in Bay 1:CPU.1@System.Embedded.1', + 'capacity_bytes': 1000204886016, + 'state': 'Enabled', + 'health': 'OK' + } + + self.assertEqual( + expected_labels, metrics[expected_metric][0][1]) + + def test_category_registry(self): + metrics_registry = CollectorRegistry() + + idrac_redfish.category_registry(self.node_message, metrics_registry) + + label = { + 'node_name': 'r640-u12', + 'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778', + 'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87', + 'name': 'PCIe SSD in Slot 9 in Bay 1', + 'model': 'Dell Express Flash NVMe P4500 1.0TB SFF', + 'capacity_bytes': '1000204886016', + 'state': 'Enabled', + 'health': 'OK', + 'sensor_id': 'PCIe SSD in Slot 9 in Bay 1:CPU.1@System.Embedded.1' + } + sensor_value = metrics_registry.get_sample_value( + 'baremetal_drive_status', label) + self.assertEqual(0, sensor_value) diff --git a/ironic_prometheus_exporter/tests/test_redfish_parser.py b/ironic_prometheus_exporter/tests/test_redfish_parser.py index cf16968..3bff837 100644 --- a/ironic_prometheus_exporter/tests/test_redfish_parser.py +++ b/ironic_prometheus_exporter/tests/test_redfish_parser.py @@ -46,11 +46,17 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(62, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'CPU', + 'identity': 'XXX-YYY-ZZZ', + 'max_reading_range_temp': 120, + 'min_reading_range_temp': 0, + 'physical_context': 'CPU', + 'sensor_number': 1, + 'health': 'OK', + 'state': 'enabled', 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', - 'sensor_id': 1 + 'sensor_id': 'XXX-YYY-ZZZ@ZZZ-YYY-XXX' } self.assertEqual( @@ -66,11 +72,21 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(0, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'PSU', + 'health': 'OK', 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'last_power_output_watts': 650, + 'line_input_voltage': 220, + 'maximum_frequency_hz': 63, + 'maximum_voltage': 250, + 'minimum_frequency_hz': 47, + 'minimum_voltage': 185, 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', - 'sensor_id': '0:Power@ZZZ-YYY-XXX' + 'output_wattage': 1450, + 'power_capacity_watts': 1450, + 'sensor_id': '0:Power@ZZZ-YYY-XXX', + 'serial_number': 'SN010203040506', + 'state': 'enabled' } self.assertEqual( @@ -86,11 +102,19 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(0, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'CPU', + 'health': 'OK', + 'identity': 'XXX-YYY-ZZZ', 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'max_reading_range': 10000, + 'min_reading_range': 0, 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', - 'sensor_id': 'XXX-YYY-ZZZ' + 'physical_context': 'CPU', + 'reading': 6000, + 'reading_units': 'RPM', + 'sensor_id': 'XXX-YYY-ZZZ@ZZZ-YYY-XXX', + 'serial_number': 'SN010203040506', + 'state': 'enabled' } self.assertEqual( @@ -106,11 +130,16 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(0, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'HDD', + 'capacity_bytes': 3750000000, + 'failure_predicted': True, + 'health': 'OK', + 'identity': '32ADF365C6C1B7BD', 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'model': 'IBM 350A', 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', - 'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX' + 'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX', + 'state': 'enabled' } self.assertEqual( @@ -122,10 +151,15 @@ class TestPayloadsParser(unittest.TestCase): redfish.category_registry(self.node_message, metrics_registry) label = { - 'entity_id': 'HDD', - 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', 'node_name': 'knilab-master-u9', 'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228', + 'capacity_bytes': '3750000000', + 'failure_predicted': 'True', + 'health': 'OK', + 'identity': '32ADF365C6C1B7BD', + 'model': 'IBM 350A', + 'state': 'enabled', 'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX' } @@ -155,11 +189,21 @@ class TestPayloadsParser(unittest.TestCase): self.assertEqual(0, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'PSU', + 'health': 'OK', 'instance_uuid': 'c2bd00b9-9881-4179-8b7b-bf786ec3696b', + 'last_power_output_watts': 650, + 'line_input_voltage': 220, + 'maximum_frequency_hz': 63, + 'maximum_voltage': 250, + 'minimum_frequency_hz': 47, + 'minimum_voltage': 185, 'node_name': 'knilab-master-u9', 'node_uuid': 'c2bd00b9-9881-4179-8b7b-bf786ec3696b', - 'sensor_id': '0:Power@ZZZ-YYY-XXX' + 'output_wattage': 1450, + 'power_capacity_watts': 1450, + 'sensor_id': '0:Power@ZZZ-YYY-XXX', + 'serial_number': 'SN010203040506', + 'state': 'enabled' } self.assertEqual( @@ -188,10 +232,16 @@ class TestPayloadsParserNoneNodeName(unittest.TestCase): self.assertEqual(62, metrics[expected_metric][0][0]) expected_labels = { - 'entity_id': 'CPU', + 'identity': 'XXX-YYY-ZZZ', + 'max_reading_range_temp': 120, + 'min_reading_range_temp': 0, + 'physical_context': 'CPU', + 'sensor_number': 1, + 'health': 'OK', + 'state': 'enabled', 'instance_uuid': '85d6b2c8-fe57-432d-868a-330e0e28cf34', 'node_uuid': 'c2bd00b9-9881-4179-8b7b-bf786ec3696b', - 'sensor_id': 1 + 'sensor_id': 'XXX-YYY-ZZZ@ZZZ-YYY-XXX' } self.assertEqual( diff --git a/releasenotes/notes/idrac-metrics-83480d59c49b13ca.yaml b/releasenotes/notes/idrac-metrics-83480d59c49b13ca.yaml new file mode 100644 index 0000000..8522de9 --- /dev/null +++ b/releasenotes/notes/idrac-metrics-83480d59c49b13ca.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + Adds support for parsing iDRAC sensor data metrics via redfish parser. + A new metrics `baremetal_temperature_status` was also added. +fixes: + - | + Fixed a bug where status metrics failed to differentiate between warning + and critical states, showing a value of 1 for both. The metrics now show + a value of 1 for Warning and 2 for Critical. \ No newline at end of file