[IPE] Support iDRAC driver metrics
Currently we only support metrics from ipmi and redfish driver. This patch adds support for idrac metrics via the redfish parser. In addition to all supported redfish metrics we also added: - baremetal_temperature_status We also fixed the status metrics to differentiate between warning and critical states, showing a value of 1 for both. The metrics now show a value of 1 for Warning and 2 for Critical. Closes-Bug: #2111832 Assisted-By: Claude Code - Claude Sonnet 4 Change-Id: I6091013900ea5ed5d14076b837c054740c2f1873 Signed-off-by: Iury Gregory Melo Ferreira <imelofer@redhat.com>
This commit is contained in:
@@ -65,6 +65,9 @@ class PrometheusFileDriver(notifier.Driver):
|
||||
elif event_type == 'hardware.redfish.metrics':
|
||||
redfish.category_registry(payload, registry)
|
||||
|
||||
elif event_type == 'hardware.idrac.metrics':
|
||||
redfish.category_registry(payload, registry)
|
||||
|
||||
# Order of preference is for a node Name, UUID, or
|
||||
# payload hostname field to be used (i.e. for conductor
|
||||
# message payloads).
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"baremetal_power_status":
|
||||
"Power supply unit health status (0 - OK, 1 - failure)",
|
||||
"Power supply unit health status (0 - OK, 1 - Warning, 2 - Critical)",
|
||||
"baremetal_temp_room_celsius":
|
||||
"Room temperature expressed in Celsius",
|
||||
"baremetal_temp_intake_celsius":
|
||||
@@ -47,8 +47,10 @@
|
||||
"Chassis temperature expressed in Celsius",
|
||||
"baremetal_temp_fan_celsius":
|
||||
"Cooling fan temperature expressed in Celsius",
|
||||
"baremetal_temperature_status":
|
||||
"Baremetal temperature sensor status (0 - OK, 1 - Warning, 2 - Critical)",
|
||||
"baremetal_fan_status":
|
||||
"Cooling fan health status (0 - OK, 1 - failure)",
|
||||
"Cooling fan health status (0 - OK, 1 - Warning, 2 - Critical)",
|
||||
"baremetal_drive_status":
|
||||
"Storage drive health status (0 - OK, 1 - failure)"
|
||||
"Storage drive health status (0 - OK, 1 - Warning, 2 - Critical)"
|
||||
}
|
||||
|
||||
@@ -22,6 +22,13 @@ from ironic_prometheus_exporter import utils as ipe_utils
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
HEALTH_MAP = {
|
||||
'OK': 0,
|
||||
'Warning': 1,
|
||||
'Critical': 2
|
||||
}
|
||||
|
||||
|
||||
def _build_labels(node_message):
|
||||
fields = ['node_name', 'node_uuid', 'instance_uuid']
|
||||
if not node_message['node_name']:
|
||||
@@ -31,6 +38,14 @@ def _build_labels(node_message):
|
||||
}
|
||||
|
||||
|
||||
def _build_sensor_labels(sensor_labels, sensor_id, sensor_data, ignore_keys):
|
||||
for k, v in sensor_data.items():
|
||||
if k not in ignore_keys and v is not None:
|
||||
sensor_labels[k] = v
|
||||
sensor_labels['sensor_id'] = sensor_id
|
||||
return sensor_labels
|
||||
|
||||
|
||||
def build_temperature_metrics(node_message):
|
||||
"""Build Prometheus temperature metrics from Oslo message.
|
||||
|
||||
@@ -49,7 +64,7 @@ def build_temperature_metrics(node_message):
|
||||
[
|
||||
# metric value
|
||||
42,
|
||||
# metric instance in form of Prometheus labels
|
||||
# metric instance in form of Prometheus labels example
|
||||
{
|
||||
'node_name': 'kninode',
|
||||
'node_uuid', 'XXX-YYY-ZZZ',
|
||||
@@ -58,6 +73,17 @@ def build_temperature_metrics(node_message):
|
||||
'sensor_id': '1'
|
||||
}
|
||||
]
|
||||
# baremetal_temperature_status:
|
||||
# metric value (0 - OK, 1 - Warning, 2- Critical)
|
||||
0,
|
||||
# metric labels
|
||||
{
|
||||
'node_name': 'kninode',
|
||||
'node_uuid', 'XXX-YYY-ZZZ',
|
||||
'instance_uuid': 'ZZZ-YYY-XXX',
|
||||
'entity_id': 'CPU',
|
||||
'sensor_id': '1'
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
@@ -69,17 +95,32 @@ def build_temperature_metrics(node_message):
|
||||
metrics = collections.defaultdict(list)
|
||||
|
||||
for sensor_id, sensor_data in payload.items():
|
||||
if sensor_data['state'].lower() != 'enabled':
|
||||
continue
|
||||
|
||||
metric = 'baremetal_temp_%s_celsius' % (
|
||||
sensor_data['physical_context'].lower())
|
||||
sensor_reading = sensor_data.pop('reading_celsius')
|
||||
health_value = HEALTH_MAP.get(sensor_data['health'])
|
||||
temp_metrics = {
|
||||
metric: sensor_reading,
|
||||
'baremetal_temperature_status': health_value
|
||||
}
|
||||
ignore = []
|
||||
|
||||
labels = _build_labels(node_message)
|
||||
_build_sensor_labels(labels, sensor_id, sensor_data, ignore)
|
||||
|
||||
labels['entity_id'] = sensor_data['physical_context']
|
||||
labels['sensor_id'] = sensor_data['sensor_number']
|
||||
|
||||
value = sensor_data['reading_celsius']
|
||||
|
||||
metrics[metric].append((value, labels))
|
||||
for name, value in temp_metrics.items():
|
||||
# NOTE(iurygregory): we do this to ensure the reading_celsius
|
||||
# value is used as label for the baremetal_temperature_status
|
||||
# metric.
|
||||
if name == 'baremetal_temperature_status':
|
||||
new_labels = labels.copy()
|
||||
new_labels['reading_celsius'] = sensor_reading
|
||||
metrics[name].append((value, new_labels))
|
||||
else:
|
||||
metrics[name].append((value, labels))
|
||||
|
||||
return metrics
|
||||
|
||||
@@ -100,7 +141,7 @@ def build_power_metrics(node_message):
|
||||
# metric name
|
||||
'baremetal_power_status':
|
||||
[
|
||||
# metric value (0 - OK, 1 - on fire)
|
||||
# metric value (0 - OK, 1 - Warning, 2- Critical)
|
||||
0,
|
||||
# metric instance in form of Prometheus labels
|
||||
{
|
||||
@@ -122,16 +163,17 @@ def build_power_metrics(node_message):
|
||||
metrics = collections.defaultdict(list)
|
||||
|
||||
for sensor_id, sensor_data in payload.items():
|
||||
metric = 'baremetal_power_status'
|
||||
if sensor_data['state'].lower() != 'enabled':
|
||||
continue
|
||||
|
||||
name = 'baremetal_power_status'
|
||||
value = HEALTH_MAP.get(sensor_data['health'])
|
||||
ignore = []
|
||||
|
||||
labels = _build_labels(node_message)
|
||||
_build_sensor_labels(labels, sensor_id, sensor_data, ignore)
|
||||
|
||||
labels['entity_id'] = 'PSU'
|
||||
labels['sensor_id'] = sensor_id
|
||||
|
||||
value = sensor_data['health'] != 'OK' and 1 or 0
|
||||
|
||||
metrics[metric].append((value, labels))
|
||||
metrics[name].append((value, labels))
|
||||
|
||||
return metrics
|
||||
|
||||
@@ -152,7 +194,7 @@ def build_fan_metrics(node_message):
|
||||
# metric name
|
||||
'baremetal_fan_status':
|
||||
[
|
||||
# metric value (0 - OK, 1 - on fire)
|
||||
# metric value (0 - OK, 1 - Warning, 2- Critical)
|
||||
0,
|
||||
# metric instance in form of Prometheus labels
|
||||
{
|
||||
@@ -174,16 +216,16 @@ def build_fan_metrics(node_message):
|
||||
metrics = collections.defaultdict(list)
|
||||
|
||||
for sensor_id, sensor_data in payload.items():
|
||||
metric = 'baremetal_fan_status'
|
||||
if sensor_data['state'].lower() != 'enabled':
|
||||
continue
|
||||
name = 'baremetal_fan_status'
|
||||
ignore = []
|
||||
value = HEALTH_MAP.get(sensor_data['health'])
|
||||
|
||||
labels = _build_labels(node_message)
|
||||
_build_sensor_labels(labels, sensor_id, sensor_data, ignore)
|
||||
|
||||
labels['entity_id'] = sensor_data['physical_context']
|
||||
labels['sensor_id'] = sensor_data['identity']
|
||||
|
||||
value = sensor_data['health'] != 'OK' and 1 or 0
|
||||
|
||||
metrics[metric].append((value, labels))
|
||||
metrics[name].append((value, labels))
|
||||
|
||||
return metrics
|
||||
|
||||
@@ -226,14 +268,15 @@ def build_drive_metrics(node_message):
|
||||
metrics = collections.defaultdict(list)
|
||||
|
||||
for sensor_id, sensor_data in payload.items():
|
||||
if sensor_data['state'].lower() != 'enabled':
|
||||
continue
|
||||
metric = 'baremetal_drive_status'
|
||||
|
||||
ignore = []
|
||||
labels = _build_labels(node_message)
|
||||
_build_sensor_labels(labels, sensor_id, sensor_data, ignore)
|
||||
|
||||
labels['entity_id'] = 'HDD'
|
||||
labels['sensor_id'] = sensor_id
|
||||
|
||||
value = sensor_data['health'] != 'OK' and 1 or 0
|
||||
value = HEALTH_MAP.get(sensor_data['health'])
|
||||
|
||||
metrics[metric].append((value, labels))
|
||||
|
||||
|
||||
@@ -0,0 +1,339 @@
|
||||
{
|
||||
"message_id": "836ecf60-ada0-44a9-9832-dcf62b8b4086",
|
||||
"publisher_id": "",
|
||||
"event_type": "hardware.idrac.metrics",
|
||||
"priority": "INFO",
|
||||
"payload": {
|
||||
"message_id": "81323197-0f8e-4d8a-b6ac-ee08c4c1145e",
|
||||
"instance_uuid": "235e4d8a-0f1a-87a0-ea81-8a1b0277cd87",
|
||||
"node_uuid": "fe81395b-1999-4ab4-8eb0-235e1ab02778",
|
||||
"timestamp": "2025-06-10T02:17:40.953055",
|
||||
"node_name": "r640-u12",
|
||||
"event_type": "hardware.idrac.metrics.update",
|
||||
"payload": {
|
||||
"Fan": {
|
||||
"0@System.Embedded.1": {
|
||||
"identity": "0",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 9600,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"1@System.Embedded.1": {
|
||||
"identity": "1",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 5520,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"2@System.Embedded.1": {
|
||||
"identity": "2",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 9360,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"3@System.Embedded.1": {
|
||||
"identity": "3",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 5640,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"4@System.Embedded.1": {
|
||||
"identity": "4",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 9600,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"5@System.Embedded.1": {
|
||||
"identity": "5",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 5760,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"6@System.Embedded.1": {
|
||||
"identity": "6",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 9840,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"7@System.Embedded.1": {
|
||||
"identity": "7",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 5880,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
}, "8@System.Embedded.1": {
|
||||
"identity": "8",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 9600,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"9@System.Embedded.1": {
|
||||
"identity": "9",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 5880,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"10@System.Embedded.1": {
|
||||
"identity": "10",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 10080,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"11@System.Embedded.1": {
|
||||
"identity": "11",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 5760,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"12@System.Embedded.1": {
|
||||
"identity": "12",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 9600,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"13@System.Embedded.1": {
|
||||
"identity": "13",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 5880, "reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"14@System.Embedded.1": {
|
||||
"identity": "14",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 10080,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"15@System.Embedded.1": {
|
||||
"identity": "15",
|
||||
"max_reading_range": null,
|
||||
"min_reading_range": null,
|
||||
"reading": 5760,
|
||||
"reading_units": "RPM",
|
||||
"serial_number": null,
|
||||
"physical_context": "SystemBoard",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
}
|
||||
},
|
||||
"Temperature": {
|
||||
"0@System.Embedded.1": {
|
||||
"identity": "0",
|
||||
"max_reading_range_temp": null,
|
||||
"min_reading_range_temp": null,
|
||||
"reading_celsius": 49,
|
||||
"physical_context": "CPU",
|
||||
"sensor_number": 1,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"1@System.Embedded.1": {
|
||||
"identity": "1",
|
||||
"max_reading_range_temp": null,
|
||||
"min_reading_range_temp": null,
|
||||
"reading_celsius": 56,
|
||||
"physical_context": "CPU",
|
||||
"sensor_number": 2,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"2@System.Embedded.1": {
|
||||
"identity": "2",
|
||||
"max_reading_range_temp": null,
|
||||
"min_reading_range_temp": null,
|
||||
"reading_celsius": 80,
|
||||
"physical_context": "SystemBoard",
|
||||
"sensor_number": 5,
|
||||
"state": "Enabled",
|
||||
"health": "Warning"
|
||||
},
|
||||
"3@System.Embedded.1": {
|
||||
"identity": "3",
|
||||
"max_reading_range_temp": null,
|
||||
"min_reading_range_temp": null,
|
||||
"reading_celsius": 100,
|
||||
"physical_context": "SystemBoard",
|
||||
"sensor_number": 6,
|
||||
"state": "Enabled",
|
||||
"health": "Critical"
|
||||
}
|
||||
},
|
||||
"Power": {
|
||||
"0:Power@System.Embedded.1": {
|
||||
"power_capacity_watts": 750,
|
||||
"line_input_voltage": 208,
|
||||
"last_power_output_watts": 148,
|
||||
"serial_number": "CNDED0089IA7W5",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"1:Power@System.Embedded.1": {
|
||||
"power_capacity_watts": 750,
|
||||
"line_input_voltage": 208,
|
||||
"last_power_output_watts": 139,
|
||||
"serial_number": "CNDED0089IA7WU",
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
}
|
||||
},
|
||||
"Drive": {
|
||||
"PCIe SSD in Slot 9 in Bay 1:CPU.1@System.Embedded.1": {
|
||||
"name": "PCIe SSD in Slot 9 in Bay 1",
|
||||
"model": "Dell Express Flash NVMe P4500 1.0TB SFF",
|
||||
"capacity_bytes": 1000204886016,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"PCIe SSD in Slot 8 in Bay 1:CPU.1@System.Embedded.1": {
|
||||
"name": "PCIe SSD in Slot 8 in Bay 1",
|
||||
"model": "Dell Express Flash NVMe P4600 1.6TB SFF",
|
||||
"capacity_bytes": 1600321314816,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"Physical Disk 0:1:0:NonRAID.Integrated.1-1@System.Embedded.1": {
|
||||
"name": "Physical Disk 0:1:0",
|
||||
"model": "AL15SEB120NY",
|
||||
"capacity_bytes": 1200243695104,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"Physical Disk 0:1:1:NonRAID.Integrated.1-1@System.Embedded.1": {
|
||||
"name": "Physical Disk 0:1:1",
|
||||
"model": "AL15SEB120NY",
|
||||
"capacity_bytes": 1200243695104,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"Physical Disk 0:1:2:NonRAID.Integrated.1-1@System.Embedded.1": {
|
||||
"name": "Physical Disk 0:1:2",
|
||||
"model": "AL15SEB120NY",
|
||||
"capacity_bytes": 1200243695104,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"Physical Disk 0:1:3:NonRAID.Integrated.1-1@System.Embedded.1": {
|
||||
"name": "Physical Disk 0:1:3",
|
||||
"model": "AL15SEB120NY",
|
||||
"capacity_bytes": 1200243695104,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"Physical Disk 0:1:4:NonRAID.Integrated.1-1@System.Embedded.1": {
|
||||
"name": "Physical Disk 0:1:4",
|
||||
"model": "AL15SEB120NY",
|
||||
"capacity_bytes": 1200243695104,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"Physical Disk 0:1:5:NonRAID.Integrated.1-1@System.Embedded.1": {
|
||||
"name": "Physical Disk 0:1:5",
|
||||
"model": "AL15SEB120NY",
|
||||
"capacity_bytes": 1200243695104,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"Physical Disk 0:1:6:NonRAID.Integrated.1-1@System.Embedded.1": {
|
||||
"name": "Physical Disk 0:1:6",
|
||||
"model": "AL15SEB120NY",
|
||||
"capacity_bytes": 1200243695104,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"Physical Disk 0:1:7:NonRAID.Integrated.1-1@System.Embedded.1": {
|
||||
"name": "Physical Disk 0:1:7",
|
||||
"model": "AL15SEB120NY",
|
||||
"capacity_bytes": 1200243695104,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"SSD 0:AHCI.Slot.2-1@System.Embedded.1": {
|
||||
"name": "SSD 0",
|
||||
"model": "MTFDDAV240TCB",
|
||||
"capacity_bytes": 240057409536,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
},
|
||||
"SSD 1:AHCI.Slot.2-1@System.Embedded.1": {
|
||||
"name": "SSD 1",
|
||||
"model": "MTFDDAV240TCB",
|
||||
"capacity_bytes": 240057409536,
|
||||
"state": "Enabled",
|
||||
"health": "OK"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-06-10 02:17:47.220276"
|
||||
}
|
||||
@@ -123,17 +123,25 @@ class TestPrometheusFileNotifier(test_utils.BaseTestCase):
|
||||
os.path.dirname(ironic_prometheus_exporter.__file__),
|
||||
'tests', 'json_samples', 'notification-redfish.json')
|
||||
|
||||
sample_file_3 = os.path.join(
|
||||
os.path.dirname(ironic_prometheus_exporter.__file__),
|
||||
'tests', 'json_samples', 'notification-idrac.json')
|
||||
|
||||
msg1 = json.load(open(sample_file_1))
|
||||
node1 = msg1['payload']['node_name']
|
||||
msg2 = json.load(open(sample_file_2))
|
||||
node2 = msg2['payload']['node_name']
|
||||
msg3 = json.load(open(sample_file_3))
|
||||
node3 = msg3['payload']['node_name']
|
||||
|
||||
driver.notify(None, msg1, 'info', 0)
|
||||
driver.notify(None, msg2, 'info', 0)
|
||||
driver.notify(None, msg3, 'info', 0)
|
||||
|
||||
DIR = self.conf.oslo_messaging_notifications.location
|
||||
all_files = [name for name in os.listdir(DIR)
|
||||
if os.path.isfile(os.path.join(DIR, name))]
|
||||
self.assertEqual(len(all_files), 2)
|
||||
self.assertEqual(len(all_files), 3)
|
||||
self.assertIn(node1 + '-hardware.ipmi.metrics', all_files)
|
||||
self.assertIn(node2 + '-hardware.redfish.metrics', all_files)
|
||||
self.assertIn(node3 + '-hardware.idrac.metrics', all_files)
|
||||
|
||||
183
ironic_prometheus_exporter/tests/test_idrac_parser.py
Normal file
183
ironic_prometheus_exporter/tests/test_idrac_parser.py
Normal file
@@ -0,0 +1,183 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from prometheus_client import CollectorRegistry
|
||||
|
||||
import ironic_prometheus_exporter
|
||||
from ironic_prometheus_exporter.parsers import redfish as idrac_redfish
|
||||
|
||||
|
||||
sample_file = os.path.join(
|
||||
os.path.dirname(ironic_prometheus_exporter.__file__),
|
||||
'tests', 'json_samples', 'notification-idrac.json')
|
||||
|
||||
DATA = json.load(open(sample_file))
|
||||
|
||||
|
||||
class TestIDRACPayloadsParser(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.node_message = DATA['payload']
|
||||
self.node_name = DATA['payload']['node_name']
|
||||
self.node_uuid = DATA['payload']['node_uuid']
|
||||
self.instance_uuid = DATA['payload']['instance_uuid']
|
||||
|
||||
def test_build_temperature_metrics(self):
|
||||
metrics = idrac_redfish.build_temperature_metrics(self.node_message)
|
||||
|
||||
expected_metric_name = 'baremetal_temperature_status'
|
||||
self.assertIn(expected_metric_name, metrics)
|
||||
self.assertEqual(0, metrics[expected_metric_name][0][0])
|
||||
expected_labels = {
|
||||
'identity': '0',
|
||||
'sensor_id': '0@System.Embedded.1',
|
||||
'physical_context': 'CPU',
|
||||
'reading_celsius': 49,
|
||||
'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87',
|
||||
'node_name': 'r640-u12',
|
||||
'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778',
|
||||
'sensor_number': 1,
|
||||
'state': 'Enabled',
|
||||
'health': 'OK'
|
||||
}
|
||||
self.assertEqual(
|
||||
expected_labels, metrics[expected_metric_name][0][1])
|
||||
|
||||
self.assertEqual(1, metrics[expected_metric_name][2][0])
|
||||
expected_labels2 = {
|
||||
'identity': '2',
|
||||
'sensor_id': '2@System.Embedded.1',
|
||||
'physical_context': 'SystemBoard',
|
||||
'reading_celsius': 80,
|
||||
'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87',
|
||||
'node_name': 'r640-u12',
|
||||
'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778',
|
||||
'sensor_number': 5,
|
||||
'state': 'Enabled',
|
||||
'health': 'Warning'
|
||||
}
|
||||
self.assertEqual(
|
||||
expected_labels2, metrics[expected_metric_name][2][1])
|
||||
|
||||
self.assertEqual(2, metrics[expected_metric_name][3][0])
|
||||
expected_labels3 = {
|
||||
'identity': '3',
|
||||
'sensor_id': '3@System.Embedded.1',
|
||||
'physical_context': 'SystemBoard',
|
||||
'reading_celsius': 100,
|
||||
'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87',
|
||||
'node_name': 'r640-u12',
|
||||
'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778',
|
||||
'sensor_number': 6,
|
||||
'state': 'Enabled',
|
||||
'health': 'Critical'
|
||||
}
|
||||
self.assertEqual(
|
||||
expected_labels3, metrics[expected_metric_name][3][1])
|
||||
|
||||
def test_build_power_metrics(self):
|
||||
metrics = idrac_redfish.build_power_metrics(self.node_message)
|
||||
|
||||
expected_metric = 'baremetal_power_status'
|
||||
|
||||
self.assertIn(expected_metric, metrics)
|
||||
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87',
|
||||
'last_power_output_watts': 148,
|
||||
'line_input_voltage': 208,
|
||||
'power_capacity_watts': 750,
|
||||
'node_name': 'r640-u12',
|
||||
'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778',
|
||||
'sensor_id': '0:Power@System.Embedded.1',
|
||||
'serial_number': 'CNDED0089IA7W5',
|
||||
'state': 'Enabled',
|
||||
'health': 'OK'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
expected_labels, metrics[expected_metric][0][1])
|
||||
|
||||
def test_build_fan_metrics(self):
|
||||
metrics = idrac_redfish.build_fan_metrics(self.node_message)
|
||||
|
||||
expected_metric = 'baremetal_fan_status'
|
||||
|
||||
self.assertIn(expected_metric, metrics)
|
||||
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'identity': '0',
|
||||
'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87',
|
||||
'node_name': 'r640-u12',
|
||||
'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778',
|
||||
'physical_context': 'SystemBoard',
|
||||
'reading': 9600,
|
||||
'reading_units': 'RPM',
|
||||
'sensor_id': '0@System.Embedded.1',
|
||||
'state': 'Enabled',
|
||||
'health': 'OK'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
expected_labels, metrics[expected_metric][0][1])
|
||||
|
||||
def test_build_drive_metrics(self):
|
||||
metrics = idrac_redfish.build_drive_metrics(self.node_message)
|
||||
|
||||
expected_metric = 'baremetal_drive_status'
|
||||
|
||||
self.assertIn(expected_metric, metrics)
|
||||
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'model': 'Dell Express Flash NVMe P4500 1.0TB SFF',
|
||||
'name': 'PCIe SSD in Slot 9 in Bay 1',
|
||||
'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87',
|
||||
'node_name': 'r640-u12',
|
||||
'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778',
|
||||
'sensor_id': 'PCIe SSD in Slot 9 in Bay 1:CPU.1@System.Embedded.1',
|
||||
'capacity_bytes': 1000204886016,
|
||||
'state': 'Enabled',
|
||||
'health': 'OK'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
expected_labels, metrics[expected_metric][0][1])
|
||||
|
||||
def test_category_registry(self):
|
||||
metrics_registry = CollectorRegistry()
|
||||
|
||||
idrac_redfish.category_registry(self.node_message, metrics_registry)
|
||||
|
||||
label = {
|
||||
'node_name': 'r640-u12',
|
||||
'node_uuid': 'fe81395b-1999-4ab4-8eb0-235e1ab02778',
|
||||
'instance_uuid': '235e4d8a-0f1a-87a0-ea81-8a1b0277cd87',
|
||||
'name': 'PCIe SSD in Slot 9 in Bay 1',
|
||||
'model': 'Dell Express Flash NVMe P4500 1.0TB SFF',
|
||||
'capacity_bytes': '1000204886016',
|
||||
'state': 'Enabled',
|
||||
'health': 'OK',
|
||||
'sensor_id': 'PCIe SSD in Slot 9 in Bay 1:CPU.1@System.Embedded.1'
|
||||
}
|
||||
sensor_value = metrics_registry.get_sample_value(
|
||||
'baremetal_drive_status', label)
|
||||
self.assertEqual(0, sensor_value)
|
||||
@@ -46,11 +46,17 @@ class TestPayloadsParser(unittest.TestCase):
|
||||
self.assertEqual(62, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'CPU',
|
||||
'identity': 'XXX-YYY-ZZZ',
|
||||
'max_reading_range_temp': 120,
|
||||
'min_reading_range_temp': 0,
|
||||
'physical_context': 'CPU',
|
||||
'sensor_number': 1,
|
||||
'health': 'OK',
|
||||
'state': 'enabled',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'sensor_id': 1
|
||||
'sensor_id': 'XXX-YYY-ZZZ@ZZZ-YYY-XXX'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
@@ -66,11 +72,21 @@ class TestPayloadsParser(unittest.TestCase):
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'PSU',
|
||||
'health': 'OK',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'last_power_output_watts': 650,
|
||||
'line_input_voltage': 220,
|
||||
'maximum_frequency_hz': 63,
|
||||
'maximum_voltage': 250,
|
||||
'minimum_frequency_hz': 47,
|
||||
'minimum_voltage': 185,
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'sensor_id': '0:Power@ZZZ-YYY-XXX'
|
||||
'output_wattage': 1450,
|
||||
'power_capacity_watts': 1450,
|
||||
'sensor_id': '0:Power@ZZZ-YYY-XXX',
|
||||
'serial_number': 'SN010203040506',
|
||||
'state': 'enabled'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
@@ -86,11 +102,19 @@ class TestPayloadsParser(unittest.TestCase):
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'CPU',
|
||||
'health': 'OK',
|
||||
'identity': 'XXX-YYY-ZZZ',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'max_reading_range': 10000,
|
||||
'min_reading_range': 0,
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'sensor_id': 'XXX-YYY-ZZZ'
|
||||
'physical_context': 'CPU',
|
||||
'reading': 6000,
|
||||
'reading_units': 'RPM',
|
||||
'sensor_id': 'XXX-YYY-ZZZ@ZZZ-YYY-XXX',
|
||||
'serial_number': 'SN010203040506',
|
||||
'state': 'enabled'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
@@ -106,11 +130,16 @@ class TestPayloadsParser(unittest.TestCase):
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'HDD',
|
||||
'capacity_bytes': 3750000000,
|
||||
'failure_predicted': True,
|
||||
'health': 'OK',
|
||||
'identity': '32ADF365C6C1B7BD',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'model': 'IBM 350A',
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX'
|
||||
'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX',
|
||||
'state': 'enabled'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
@@ -122,10 +151,15 @@ class TestPayloadsParser(unittest.TestCase):
|
||||
redfish.category_registry(self.node_message, metrics_registry)
|
||||
|
||||
label = {
|
||||
'entity_id': 'HDD',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
|
||||
'capacity_bytes': '3750000000',
|
||||
'failure_predicted': 'True',
|
||||
'health': 'OK',
|
||||
'identity': '32ADF365C6C1B7BD',
|
||||
'model': 'IBM 350A',
|
||||
'state': 'enabled',
|
||||
'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX'
|
||||
}
|
||||
|
||||
@@ -155,11 +189,21 @@ class TestPayloadsParser(unittest.TestCase):
|
||||
self.assertEqual(0, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'PSU',
|
||||
'health': 'OK',
|
||||
'instance_uuid': 'c2bd00b9-9881-4179-8b7b-bf786ec3696b',
|
||||
'last_power_output_watts': 650,
|
||||
'line_input_voltage': 220,
|
||||
'maximum_frequency_hz': 63,
|
||||
'maximum_voltage': 250,
|
||||
'minimum_frequency_hz': 47,
|
||||
'minimum_voltage': 185,
|
||||
'node_name': 'knilab-master-u9',
|
||||
'node_uuid': 'c2bd00b9-9881-4179-8b7b-bf786ec3696b',
|
||||
'sensor_id': '0:Power@ZZZ-YYY-XXX'
|
||||
'output_wattage': 1450,
|
||||
'power_capacity_watts': 1450,
|
||||
'sensor_id': '0:Power@ZZZ-YYY-XXX',
|
||||
'serial_number': 'SN010203040506',
|
||||
'state': 'enabled'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
@@ -188,10 +232,16 @@ class TestPayloadsParserNoneNodeName(unittest.TestCase):
|
||||
self.assertEqual(62, metrics[expected_metric][0][0])
|
||||
|
||||
expected_labels = {
|
||||
'entity_id': 'CPU',
|
||||
'identity': 'XXX-YYY-ZZZ',
|
||||
'max_reading_range_temp': 120,
|
||||
'min_reading_range_temp': 0,
|
||||
'physical_context': 'CPU',
|
||||
'sensor_number': 1,
|
||||
'health': 'OK',
|
||||
'state': 'enabled',
|
||||
'instance_uuid': '85d6b2c8-fe57-432d-868a-330e0e28cf34',
|
||||
'node_uuid': 'c2bd00b9-9881-4179-8b7b-bf786ec3696b',
|
||||
'sensor_id': 1
|
||||
'sensor_id': 'XXX-YYY-ZZZ@ZZZ-YYY-XXX'
|
||||
}
|
||||
|
||||
self.assertEqual(
|
||||
|
||||
10
releasenotes/notes/idrac-metrics-83480d59c49b13ca.yaml
Normal file
10
releasenotes/notes/idrac-metrics-83480d59c49b13ca.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Adds support for parsing iDRAC sensor data metrics via redfish parser.
|
||||
A new metrics `baremetal_temperature_status` was also added.
|
||||
fixes:
|
||||
- |
|
||||
Fixed a bug where status metrics failed to differentiate between warning
|
||||
and critical states, showing a value of 1 for both. The metrics now show
|
||||
a value of 1 for Warning and 2 for Critical.
|
||||
Reference in New Issue
Block a user