Support for metrics description

This patch adds the capability to add a description for the ipmi
metrics.

The description are based on [1]

[1] https://www.intel.com/content/www/us/en/servers/ipmi/ipmi-second-gen-interface-spec-v2-rev1-1.html

Change-Id: Ic21a778b06456140ac5dbbec0e44271c72d80b29
This commit is contained in:
Iury Gregory Melo Ferreira 2019-08-07 14:04:57 +02:00 committed by Iury Gregory Melo Ferreira
parent 1190c4edc7
commit 5398f864f1
3 changed files with 115 additions and 3 deletions

View File

@ -10,7 +10,9 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import json
import logging import logging
import pkg_resources
import re import re
from datetime import datetime from datetime import datetime
@ -71,6 +73,11 @@ CATEGORY_PARAMS = {
} }
IPMI_JSON = pkg_resources.resource_filename(__name__,
"metrics_information/ipmi.json")
IPMI_METRICS_DESCRIPTION = json.load(open(IPMI_JSON))
def metric_names(category_info): def metric_names(category_info):
LOG.info('metric_names function called with data=%s' % str(category_info)) LOG.info('metric_names function called with data=%s' % str(category_info))
@ -214,7 +221,8 @@ def prometheus_format(category_info, ipmi_metric_registry, available_metrics):
values = extract_values(entries, category_info) values = extract_values(entries, category_info)
if all(v is None for v in values.values()): if all(v is None for v in values.values()):
continue continue
g = Gauge(metric, '', labelnames=labels.get(entries[0]).keys(), g = Gauge(metric, get_metric_description(metric),
labelnames=list(labels.get(entries[0])),
registry=ipmi_metric_registry) registry=ipmi_metric_registry)
for e in entries: for e in entries:
if values[e] is None: if values[e] is None:
@ -244,6 +252,10 @@ def timestamp_registry(node_information, ipmi_metric_registry):
dt_timestamp = datetime.strptime(node_information['timestamp'], dt_timestamp = datetime.strptime(node_information['timestamp'],
'%Y-%m-%dT%H:%M:%S.%f') '%Y-%m-%dT%H:%M:%S.%f')
value = int((dt_timestamp - dt_1970).total_seconds()) value = int((dt_timestamp - dt_1970).total_seconds())
g = Gauge(metric, 'Timestamp of the last received payload', g = Gauge(metric, get_metric_description(metric),
labelnames=labels.keys(), registry=ipmi_metric_registry) labelnames=list(labels), registry=ipmi_metric_registry)
g.labels(**labels).set(value) g.labels(**labels).set(value)
def get_metric_description(metric_name):
return IPMI_METRICS_DESCRIPTION.get(metric_name, '')

View File

@ -0,0 +1,96 @@
{
"baremetal_front_led_panel":
"The System Board Management Subsys Health",
"baremetal_temp_celsius":
"The temperature of each Processor",
"baremetal_exhaust_temp_celsius":
"The exhaust temperature for the Systeam Board",
"baremetal_inlet_temp_celsius":
"The inlet temperature for the Systeam Board",
"baremetal_system_unknown":
"Unknown System Event",
"baremetal_system_post_err":
"The System Firmware progress",
"baremetal_current":
"Current power supply",
"baremetal_pwr_consumption":
"Current power consumption",
"baremetal_tpm_presence":
"Indicates if the Trusted Platform Module is present",
"baremetal_hdwr_version_err":
"Indicates if there is a hardware version error",
"baremetal_chassis_mismatch":
"Indicates if there is a mismatch in the chassis information",
"baremetal_memory_ecc_corr_err":
"Status of the ECC Corr Err",
"baremetal_idpt_mem_fail":
"Status of the iDPT Mem Fail",
"baremetal_memory_ecc_uncorr_err":
"Status of the ECC Uncorr Err",
"baremetal_memory_mirrored":
"Status of the Memory Mirrored",
"baremetal_mem_ecc_warning":
"Status of the Mem ECC Warning",
"baremetal_memory_b":
"Status of the Memory B",
"baremetal_memory_a":
"Status of the Memory A",
"baremetal_memory_usb_over_current":
"Status of the USB Over-current",
"baremetal_memory_post_pkg_repair":
"Status of the POST Pkg Repair",
"baremetal_memory_spared":
"Status of the Memory Spared",
"baremetal_power_ps_redundancy":
"Indicates if there is redundancy for power supply",
"baremetal_power_status":
"Indicates the status of the power supply",
"baremetal_os_watchdog_time":
"watchdog timer expirations",
"baremetal_os_watchdog":
"Indicates if watchdog is enabled",
"baremetal_fan_redundancy":
"Indicates if fan redundancy is actived",
"baremetal_fan_rpm":
"Indicates the fan speed",
"baremetal_last_payload_timestamp_seconds":
"Timestamp of the last received payload",
"baremetal_voltage_mem_vtt_pg":
"Status of the MEM___ VTT PG",
"baremetal_voltage_sw_pg":
"Status of the SW PG",
"baremetal_voltage_vsa_pg":
"Status of the VSA PG",
"baremetal_voltage_vcore_pg":
"Status of the VCORE PG",
"baremetal_voltage_volts":
"Indicates the voltage for the power supply",
"baremetal_voltage_dimm_pg":
"Status of the DIMM PG",
"baremetal_voltage_vsbm_sw_pg":
"Status of the VSBM SW PG",
"baremetal_voltage_ndc_pg":
"Status of the NDC PG",
"baremetal_voltage_ps_pg_fail":
"Status of the PS_ PG FAIL",
"baremetal_voltage_vccio_pg":
"Status of the VCCIO PG",
"baremetal_voltage_vsb_sw_pg":
"Status of the VSB__ SW PG",
"baremetal_voltage_mem_vddq_pg":
"Status of the MEM___ VDDQ PG",
"baremetal_voltage_bp_pg":
"Status of the BP PG",
"baremetal_voltage_a_pg":
"Status of the A PG",
"baremetal_voltage_fivr_pg":
"Status of the FIVR PG",
"baremetal_voltage_pvnn_sw_pg":
"Status of the PVNN SW PG",
"baremetal_voltage_mem_vpp_pg":
"Status of the MEM___ VPP PG",
"baremetal_voltage_pfault_fail_safe":
"Status of the Pfault Fail Safe",
"baremetal_voltage_b_pg":
"Status of the B PG"
}

View File

@ -0,0 +1,4 @@
---
features:
- |
Adds support to add a description for each ipmi metric.