Browse Source

Add Redfish metrics support

Added a parsing module to process Redfish-originated metrics
and submit them to Prometheus.

Change-Id: I1c751041488366304c92d4df07cb8a62dcb371fa
changes/31/680731/4
Ilya Etingof 2 weeks ago
parent
commit
9959cf36bc

+ 17
- 7
ironic_prometheus_exporter/messaging.py View File

@@ -15,6 +15,7 @@ import os
15 15
 
16 16
 from ironic_prometheus_exporter.parsers import ipmi
17 17
 from ironic_prometheus_exporter.parsers import header
18
+from ironic_prometheus_exporter.parsers import redfish
18 19
 from oslo_config import cfg
19 20
 from oslo_messaging.notify import notifier
20 21
 from prometheus_client import write_to_textfile, CollectorRegistry
@@ -43,14 +44,23 @@ class PrometheusFileDriver(notifier.Driver):
43 44
 
44 45
     def notify(self, ctxt, message, priority, retry):
45 46
         try:
46
-            if message['event_type'] == 'hardware.ipmi.metrics':
47
-                registry = CollectorRegistry()
48
-                node_message = message['payload']
49
-                header.timestamp_registry(node_message, registry)
47
+            registry = CollectorRegistry()
48
+
49
+            event_type = message['event_type']
50
+            node_message = message['payload']
51
+            header.timestamp_registry(node_message, registry)
52
+
53
+            if event_type == 'hardware.ipmi.metrics':
50 54
                 ipmi.category_registry(node_message, registry)
51
-                nodeFile = os.path.join(self.location,
52
-                                        node_message['node_name'])
53
-                write_to_textfile(nodeFile, registry)
55
+
56
+            elif event_type == 'hardware.redfish.metrics':
57
+                redfish.category_registry(node_message, registry)
58
+
59
+            nodeFile = os.path.join(
60
+                self.location,
61
+                node_message['node_name'] + '-' + event_type)
62
+            write_to_textfile(nodeFile, registry)
63
+
54 64
         except Exception as e:
55 65
             LOG.error(e)
56 66
 

+ 256
- 0
ironic_prometheus_exporter/parsers/redfish.py View File

@@ -0,0 +1,256 @@
1
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
2
+#    not use this file except in compliance with the License. You may obtain
3
+#    a copy of the License at
4
+#
5
+#         http://www.apache.org/licenses/LICENSE-2.0
6
+#
7
+#    Unless required by applicable law or agreed to in writing, software
8
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10
+#    License for the specific language governing permissions and limitations
11
+#    under the License.
12
+
13
+import collections
14
+import logging
15
+
16
+from prometheus_client import Gauge
17
+
18
+
19
+LOG = logging.getLogger(__name__)
20
+
21
+
22
+def _build_labels(node_message):
23
+    return {
24
+        k: node_message[k]
25
+        for k in ('node_name', 'node_uuid', 'instance_uuid')
26
+    }
27
+
28
+
29
+def build_temperature_metrics(node_message):
30
+    """Build Prometheus temperature metrics from Oslo message.
31
+
32
+    Takes Oslo notification message carrying Redfish sensor data and
33
+    produces a data structure suitable for submitting to Prometheus.
34
+
35
+    :param node_message: Oslo notification message
36
+
37
+    Examples::
38
+
39
+        .. code-block:: python
40
+
41
+        {
42
+            # metric name
43
+            'baremetal_temp_cpu_celsius':
44
+                [
45
+                    # metric value
46
+                    42,
47
+                    # metric instance in form of Prometheus labels
48
+                    {
49
+                        'node_name': 'kninode',
50
+                        'node_uuid', 'XXX-YYY-ZZZ',
51
+                        'instance_uuid': 'ZZZ-YYY-XXX',
52
+                        'entity_id': 'CPU',
53
+                        'sensor_id': '1'
54
+                    }
55
+                ]
56
+            ]
57
+        }
58
+    """
59
+    payload = node_message
60
+
61
+    for key in ('payload', 'Temperature'):
62
+        payload = payload.get(key, {})
63
+
64
+    metrics = collections.defaultdict(list)
65
+
66
+    for sensor_id, sensor_data in payload.items():
67
+        metric = 'baremetal_temp_%s_celsius' % (
68
+            sensor_data['physical_context'].lower())
69
+
70
+        labels = _build_labels(node_message)
71
+
72
+        labels['entity_id'] = sensor_data['physical_context']
73
+        labels['sensor_id'] = sensor_data['sensor_number']
74
+
75
+        value = sensor_data['reading_celsius']
76
+
77
+        metrics[metric].append((value, labels))
78
+
79
+    return metrics
80
+
81
+
82
+def build_power_metrics(node_message):
83
+    """Build Prometheus power metrics from Oslo message.
84
+
85
+    Takes Oslo notification message carrying Redfish sensor data and
86
+    produces a data structure suitable for submitting to Prometheus.
87
+
88
+    :param node_message: Oslo notification message
89
+
90
+    Examples::
91
+
92
+        .. code-block:: python
93
+
94
+        {
95
+            # metric name
96
+            'baremetal_power_status':
97
+                [
98
+                    # metric value (0 - OK, 1 - on fire)
99
+                    0,
100
+                    # metric instance in form of Prometheus labels
101
+                    {
102
+                        'node_name': 'kninode',
103
+                        'node_uuid', 'XXX-YYY-ZZZ',
104
+                        'instance_uuid': 'ZZZ-YYY-XXX',
105
+                        'entity_id': 'PSU',
106
+                        'sensor_id': '0:Power@ZZZ-YYY-XXX'
107
+                    }
108
+                ]
109
+            ]
110
+        }
111
+    """
112
+    payload = node_message
113
+
114
+    for key in ('payload', 'Power'):
115
+        payload = payload.get(key, {})
116
+
117
+    metrics = collections.defaultdict(list)
118
+
119
+    for sensor_id, sensor_data in payload.items():
120
+        metric = 'baremetal_power_status'
121
+
122
+        labels = _build_labels(node_message)
123
+
124
+        labels['entity_id'] = 'PSU'
125
+        labels['sensor_id'] = sensor_id
126
+
127
+        value = sensor_data['health'] != 'OK' and 1 or 0
128
+
129
+        metrics[metric].append((value, labels))
130
+
131
+    return metrics
132
+
133
+
134
+def build_fan_metrics(node_message):
135
+    """Build Prometheus fan metrics from Oslo message.
136
+
137
+    Takes Oslo notification message carrying Redfish sensor data and
138
+    produces a data structure suitable for submitting to Prometheus.
139
+
140
+    :param node_message: Oslo notification message
141
+
142
+    Examples::
143
+
144
+        .. code-block:: python
145
+
146
+        {
147
+            # metric name
148
+            'baremetal_fan_status':
149
+                [
150
+                    # metric value (0 - OK, 1 - on fire)
151
+                    0,
152
+                    # metric instance in form of Prometheus labels
153
+                    {
154
+                        'node_name': 'kninode',
155
+                        'node_uuid', 'XXX-YYY-ZZZ',
156
+                        'instance_uuid': 'ZZZ-YYY-XXX',
157
+                        'entity_id': 'CPU',
158
+                        'sensor_id': '0:Power@ZZZ-YYY-XXX'
159
+                    }
160
+                ]
161
+            ]
162
+        }
163
+    """
164
+    payload = node_message
165
+
166
+    for key in ('payload', 'Fan'):
167
+        payload = payload.get(key, {})
168
+
169
+    metrics = collections.defaultdict(list)
170
+
171
+    for sensor_id, sensor_data in payload.items():
172
+        metric = 'baremetal_fan_status'
173
+
174
+        labels = _build_labels(node_message)
175
+
176
+        labels['entity_id'] = sensor_data['physical_context']
177
+        labels['sensor_id'] = sensor_data['identity']
178
+
179
+        value = sensor_data['health'] != 'OK' and 1 or 0
180
+
181
+        metrics[metric].append((value, labels))
182
+
183
+    return metrics
184
+
185
+
186
+def build_drive_metrics(node_message):
187
+    """Build Prometheus drive metrics from Oslo message.
188
+
189
+    Takes Oslo notification message carrying Redfish sensor data and
190
+    produces a data structure suitable for submitting to Prometheus.
191
+
192
+    :param node_message: Oslo notification message
193
+
194
+    Examples::
195
+
196
+        .. code-block:: python
197
+
198
+        {
199
+            # metric name
200
+            'baremetal_drive_status':
201
+                [
202
+                    # metric value (0 - OK, 1 - on fire)
203
+                    0,
204
+                    # metric instance in form of Prometheus labels
205
+                    {
206
+                        'node_name': 'kninode',
207
+                        'node_uuid', 'XXX-YYY-ZZZ',
208
+                        'instance_uuid': 'ZZZ-YYY-XXX',
209
+                        'entity_id': 'HDD',
210
+                        'sensor_id': '32ADF365C6C1B7BD'
211
+                    }
212
+                ]
213
+            ]
214
+        }
215
+    """
216
+    payload = node_message
217
+
218
+    for key in ('payload', 'Drive'):
219
+        payload = payload.get(key, {})
220
+
221
+    metrics = collections.defaultdict(list)
222
+
223
+    for sensor_id, sensor_data in payload.items():
224
+        metric = 'baremetal_drive_status'
225
+
226
+        labels = _build_labels(node_message)
227
+
228
+        labels['entity_id'] = 'HDD'
229
+        labels['sensor_id'] = sensor_id
230
+
231
+        value = sensor_data['health'] != 'OK' and 1 or 0
232
+
233
+        metrics[metric].append((value, labels))
234
+
235
+    return metrics
236
+
237
+
238
+def category_registry(node_message, metrics_registry):
239
+    """Parse Redfish metrics and submit them to Prometheus
240
+
241
+    :param node_message: Oslo notification message
242
+    :param metrics_registry: Prometheus registry
243
+    """
244
+    metrics = build_temperature_metrics(node_message)
245
+    metrics.update(build_power_metrics(node_message))
246
+    metrics.update(build_fan_metrics(node_message))
247
+    metrics.update(build_drive_metrics(node_message))
248
+
249
+    for metric, details in metrics.items():
250
+
251
+        for value, labels in details:
252
+
253
+            gauge = Gauge(metric, '', labelnames=labels,
254
+                          registry=metrics_registry)
255
+
256
+            gauge.labels(**labels).set(value)

+ 67
- 0
ironic_prometheus_exporter/tests/json_samples/notification-redfish.json View File

@@ -0,0 +1,67 @@
1
+{
2
+    "priority": "INFO",
3
+    "event_type": "hardware.redfish.metrics",
4
+    "timestamp": "2019-03-29 20:12:26.885347",
5
+    "publisher_id": "None.localhost.localdomain",
6
+    "payload": {
7
+        "instance_uuid": "ac2aa2fd-6e1a-41c8-a114-2084c8705228",
8
+        "node_uuid": "ac2aa2fd-6e1a-41c8-a114-2084c8705228",
9
+        "event_type": "hardware.redfish.metrics.update",
10
+        "timestamp": "2019-03-29T20:12:22.989020",
11
+        "node_name": "knilab-master-u9",
12
+        "message_id": "85d6b2c8-fe57-432d-868a-330e0e28cf34",
13
+        "payload": {
14
+            "Temperature": {
15
+                "XXX-YYY-ZZZ@ZZZ-YYY-XXX": {
16
+                    "identity": "XXX-YYY-ZZZ",
17
+                    "max_reading_range_temp": 120,
18
+                    "min_reading_range_temp": 0,
19
+                    "physical_context": "CPU",
20
+                    "reading_celsius": 62,
21
+                    "sensor_number": 1,
22
+                    "health": "OK",
23
+                    "state": "enabled"
24
+                }
25
+            },
26
+            "Power": {
27
+                "0:Power@ZZZ-YYY-XXX": {
28
+                    "health": "OK",
29
+                    "last_power_output_watts": 650,
30
+                    "line_input_voltage": 220,
31
+                    "maximum_frequency_hz": 63,
32
+                    "maximum_voltage": 250,
33
+                    "minimum_frequency_hz": 47,
34
+                    "minimum_voltage": 185,
35
+                    "output_wattage": 1450,
36
+                    "power_capacity_watts": 1450,
37
+                    "serial_number": "SN010203040506",
38
+                    "state": "enabled"
39
+                }
40
+            },
41
+            "Fan": {
42
+                "XXX-YYY-ZZZ@ZZZ-YYY-XXX": {
43
+                    "identity": "XXX-YYY-ZZZ",
44
+                    "max_reading_range": 10000,
45
+                    "min_reading_range": 0,
46
+                    "physical_context": "CPU",
47
+                    "reading": 6000,
48
+                    "reading_units": "RPM",
49
+                    "serial_number": "SN010203040506",
50
+                    "health": "OK",
51
+                    "state": "enabled"
52
+                }
53
+            },
54
+            "Drive": {
55
+                "32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX": {
56
+                    "capacity_bytes": 3750000000,
57
+                    "failure_predicted": true,
58
+                    "health": "OK",
59
+                    "identity": "32ADF365C6C1B7BD",
60
+                    "model": "IBM 350A",
61
+                    "state": "enabled"
62
+                }
63
+            }
64
+        }
65
+    },
66
+    "message_id": "2c0da1e8-1958-484f-9bdd-9117d717f7fa"
67
+}

+ 35
- 5
ironic_prometheus_exporter/tests/test_driver.py View File

@@ -25,7 +25,7 @@ class TestPrometheusFileNotifier(test_utils.BaseTestCase):
25 25
     def setUp(self):
26 26
         super(TestPrometheusFileNotifier, self).setUp()
27 27
 
28
-    def test_instanciate(self):
28
+    def test_instantiate(self):
29 29
         temp_dir = self.useFixture(fixtures.TempDir()).path
30 30
         self.config(location=temp_dir,
31 31
                     group='oslo_messaging_notifications')
@@ -72,8 +72,8 @@ class TestPrometheusFileNotifier(test_utils.BaseTestCase):
72 72
                      if os.path.isfile(os.path.join(DIR, name))]
73 73
         self.assertEqual(node1, node2)
74 74
         self.assertEqual(len(all_files), 1)
75
-        self.assertIn(node1, all_files)
76
-        self.assertIn(node2, all_files)
75
+        self.assertIn(node1 + '-hardware.ipmi.metrics', all_files)
76
+        self.assertIn(node2 + '-hardware.ipmi.metrics', all_files)
77 77
 
78 78
     def test_messages_from_different_nodes(self):
79 79
         temp_dir = self.useFixture(fixtures.TempDir()).path
@@ -104,5 +104,35 @@ class TestPrometheusFileNotifier(test_utils.BaseTestCase):
104 104
         all_files = [name for name in os.listdir(DIR)
105 105
                      if os.path.isfile(os.path.join(DIR, name))]
106 106
         self.assertEqual(len(all_files), 2)
107
-        self.assertIn(node1, all_files)
108
-        self.assertIn(node2, all_files)
107
+        self.assertIn(node1 + '-hardware.ipmi.metrics', all_files)
108
+        self.assertIn(node2 + '-hardware.ipmi.metrics', all_files)
109
+
110
+    def test_messages_of_different_types(self):
111
+        temp_dir = self.useFixture(fixtures.TempDir()).path
112
+        self.config(location=temp_dir,
113
+                    group='oslo_messaging_notifications')
114
+        transport = oslo_messaging.get_notification_transport(self.conf)
115
+        driver = PrometheusFileDriver(self.conf, None, transport)
116
+
117
+        sample_file_1 = os.path.join(
118
+            os.path.dirname(ironic_prometheus_exporter.__file__),
119
+            'tests', 'json_samples', 'notification-ipmi-1.json')
120
+
121
+        sample_file_2 = os.path.join(
122
+            os.path.dirname(ironic_prometheus_exporter.__file__),
123
+            'tests', 'json_samples', 'notification-redfish.json')
124
+
125
+        msg1 = json.load(open(sample_file_1))
126
+        node1 = msg1['payload']['node_name']
127
+        msg2 = json.load(open(sample_file_2))
128
+        node2 = msg2['payload']['node_name']
129
+
130
+        driver.notify(None, msg1, 'info', 0)
131
+        driver.notify(None, msg2, 'info', 0)
132
+
133
+        DIR = self.conf.oslo_messaging_notifications.location
134
+        all_files = [name for name in os.listdir(DIR)
135
+                     if os.path.isfile(os.path.join(DIR, name))]
136
+        self.assertEqual(len(all_files), 2)
137
+        self.assertIn(node1 + '-hardware.ipmi.metrics', all_files)
138
+        self.assertIn(node2 + '-hardware.redfish.metrics', all_files)

+ 121
- 0
ironic_prometheus_exporter/tests/test_redfish_parser.py View File

@@ -0,0 +1,121 @@
1
+import json
2
+import os
3
+import unittest
4
+
5
+import ironic_prometheus_exporter
6
+from ironic_prometheus_exporter.parsers import redfish
7
+from prometheus_client import CollectorRegistry
8
+
9
+
10
+sample_file = os.path.join(
11
+    os.path.dirname(ironic_prometheus_exporter.__file__),
12
+    'tests', 'json_samples', 'notification-redfish.json')
13
+
14
+DATA = json.load(open(sample_file))
15
+
16
+
17
+class TestPayloadsParser(unittest.TestCase):
18
+
19
+    def setUp(self):
20
+        self.node_message = DATA['payload']
21
+        self.node_name = DATA['payload']['node_name']
22
+        self.node_uuid = DATA['payload']['node_uuid']
23
+        self.instance_uuid = DATA['payload']['instance_uuid']
24
+
25
+    def test_build_temperature_metrics(self):
26
+        metrics = redfish.build_temperature_metrics(self.node_message)
27
+
28
+        expected_metric = 'baremetal_temp_cpu_celsius'
29
+
30
+        self.assertIn(expected_metric, metrics)
31
+
32
+        self.assertEqual(62, metrics[expected_metric][0][0])
33
+
34
+        expected_labels = {
35
+            'entity_id': 'CPU',
36
+            'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
37
+            'node_name': 'knilab-master-u9',
38
+            'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
39
+            'sensor_id': 1
40
+        }
41
+
42
+        self.assertEqual(
43
+            expected_labels, metrics[expected_metric][0][1])
44
+
45
+    def test_build_power_metrics(self):
46
+        metrics = redfish.build_power_metrics(self.node_message)
47
+
48
+        expected_metric = 'baremetal_power_status'
49
+
50
+        self.assertIn(expected_metric, metrics)
51
+
52
+        self.assertEqual(0, metrics[expected_metric][0][0])
53
+
54
+        expected_labels = {
55
+            'entity_id': 'PSU',
56
+            'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
57
+            'node_name': 'knilab-master-u9',
58
+            'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
59
+            'sensor_id': '0:Power@ZZZ-YYY-XXX'
60
+        }
61
+
62
+        self.assertEqual(
63
+            expected_labels, metrics[expected_metric][0][1])
64
+
65
+    def test_build_fan_metrics(self):
66
+        metrics = redfish.build_fan_metrics(self.node_message)
67
+
68
+        expected_metric = 'baremetal_fan_status'
69
+
70
+        self.assertIn(expected_metric, metrics)
71
+
72
+        self.assertEqual(0, metrics[expected_metric][0][0])
73
+
74
+        expected_labels = {
75
+            'entity_id': 'CPU',
76
+            'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
77
+            'node_name': 'knilab-master-u9',
78
+            'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
79
+            'sensor_id': 'XXX-YYY-ZZZ'
80
+        }
81
+
82
+        self.assertEqual(
83
+            expected_labels, metrics[expected_metric][0][1])
84
+
85
+    def test_build_drive_metrics(self):
86
+        metrics = redfish.build_drive_metrics(self.node_message)
87
+
88
+        expected_metric = 'baremetal_drive_status'
89
+
90
+        self.assertIn(expected_metric, metrics)
91
+
92
+        self.assertEqual(0, metrics[expected_metric][0][0])
93
+
94
+        expected_labels = {
95
+            'entity_id': 'HDD',
96
+            'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
97
+            'node_name': 'knilab-master-u9',
98
+            'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
99
+            'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX'
100
+        }
101
+
102
+        self.assertEqual(
103
+            expected_labels, metrics[expected_metric][0][1])
104
+
105
+    def test_category_registry(self):
106
+        metrics_registry = CollectorRegistry()
107
+
108
+        redfish.category_registry(self.node_message, metrics_registry)
109
+
110
+        label = {
111
+            'entity_id': 'HDD',
112
+            'instance_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
113
+            'node_name': 'knilab-master-u9',
114
+            'node_uuid': 'ac2aa2fd-6e1a-41c8-a114-2084c8705228',
115
+            'sensor_id': '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX'
116
+        }
117
+
118
+        sensor_value = metrics_registry.get_sample_value(
119
+            'baremetal_drive_status', label)
120
+
121
+        self.assertEqual(0, sensor_value)

+ 5
- 0
releasenotes/notes/add-redfish-parser-af5b3b01a4e5d02d.yaml View File

@@ -0,0 +1,5 @@
1
+---
2
+features:
3
+  - |
4
+    Adds support for handling Redfish-originated metrics alongside
5
+    IPMI ones.

Loading…
Cancel
Save