Merge "Add Infiniband metrics plugin"

This commit is contained in:
Zuul 2020-01-16 08:25:26 +00:00 committed by Gerrit Code Review
commit ebd42eb5de
5 changed files with 288 additions and 0 deletions

View File

@ -50,6 +50,7 @@
- [Host Alive](#host-alive)
- [HTTP (endpoint status)](#http-endpoint-status)
- [HTTP Metrics](#http-metrics)
- [Infiniband](#infiniband)
- [InfluxDB](#influxdb)
- [InfluxDB-Relay](#influxdb-relay)
- [IIS](#iis)
@ -343,6 +344,7 @@ These are the detection plugins included with the Monasca Agent. See [Customiza
| heat | ServicePlugin |
| host_alive | ArgsPlugin |
| http_check | ArgsPlugin |
| ib_network | Plugin |
| ironic | ServicePlugin |
| kafka_consumer | Plugin |
| keystone | ServicePlugin |
@ -1537,6 +1539,31 @@ instances:
type: gauge
```
## Infiniband
This section describes the Infiniband metrics check (IBNetwork) that can be performed by the agent.
The plugin reads network traffic counters for all IB devices from /sys/class/infiniband/.
Auto-detection for this plugin yields a basic configuration file, with no configuration
options:
ib_network.yaml:
```yaml
init_config:
instances:
- built_by: IBNetworkDetect
name: ib_network_stats
```
The following metrics are provided:
| Metric Name | Dimensions | Semantics |
| ----------- | ---------- | --------- |
| net.in_bytes_sec | device | Number of network bytes received per second
| net.out_bytes_sec | device | Number of network bytes sent per second
## InfluxDB
Auto-detection for InfluxDB plugin comes with two checks enabled:

View File

@ -0,0 +1,87 @@
# Copyright (c) 2017 StackHPC Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os
import monasca_agent.collector.checks as checks
log = logging.getLogger(__name__)
# According to https://community.mellanox.com/docs/DOC-2572 these fields
# are divided by the number of lanes, so we need to multiply them by the lane
# count to get a number valid for the link as a whole.
_FIELDS_TO_MULTIPLY_BY_LANE_COUNT = {
'port_rcv_data',
'port_xmit_data'
}
_METRIC_NAME_PREFIX = "ibnet"
_IB_DEVICE_PATH = "/sys/class/infiniband/"
_IB_COUNTER_PATH = "ports/1/counters/"
class IBNetwork(checks.AgentCheck):
def __init__(self, name, init_config, agent_config):
super(IBNetwork, self).__init__(name, init_config, agent_config)
@staticmethod
def _get_lane_count():
# It is possible that we could get the number of lanes from the driver,
# for example:
#
# # cat /sys/class/infiniband/mlx5_0/ports/1/rate
# 100 Gb/sec (4X EDR)
#
# However, according to the following PR this isn't expected to change:
# https://github.com/prometheus/node_exporter/pull/579 so hard code it
# for now.
return 4
def _normalise_counter(self, field, counter):
if field in _FIELDS_TO_MULTIPLY_BY_LANE_COUNT:
counter *= self._get_lane_count()
return counter
def _read_counter(self, device, field):
counter_path = os.path.join(
_IB_DEVICE_PATH, device, _IB_COUNTER_PATH, field)
with open(counter_path) as f:
counter = f.read()
counter = int(counter.rstrip())
counter = self._normalise_counter(field, counter)
return counter
@staticmethod
def _get_devices():
return os.listdir(_IB_DEVICE_PATH)
@staticmethod
def _get_fields(device):
return os.listdir(os.path.join(
_IB_DEVICE_PATH, device, _IB_COUNTER_PATH))
def check(self, instance):
dimensions = self._set_dimensions(None, instance)
for device in self._get_devices():
for field in self._get_fields(device):
counter = self._read_counter(device, field)
metric_name = '{0}.{1}'.format(_METRIC_NAME_PREFIX, field)
self.rate(metric_name,
counter,
device_name=device,
dimensions=dimensions)
log.debug('Collected network interface status for device {0}'.
format(device))

View File

@ -0,0 +1,43 @@
# Copyright (c) 2018 StackHPC Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os
import monasca_setup.agent_config
import monasca_setup.detection
LOG = logging.getLogger(__name__)
_IB_DEVICE_PATH = "/sys/class/infiniband/"
class IBNetworkDetect(monasca_setup.detection.Plugin):
"""Detects and configures Infiniband plugin."""
def _detect(self):
self.available = False
if not self._detect_infiniband():
LOG.info('Infiniband hardware was not detected: ib_network plugin'
'will not be loaded.')
return
self.available = True
def build_config(self):
config = monasca_setup.agent_config.Plugins()
config['ib_network'] = {'init_config': None,
'instances': [{'name': 'ib_network_stats'}]}
return config
def _detect_infiniband(self):
return os.path.isdir(_IB_DEVICE_PATH)

View File

@ -0,0 +1,83 @@
# Copyright (c) 2018 StackHPC Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import unittest
import mock
import monasca_agent.collector.checks_d.ib_network as ib_network
class MockIBNetworkPlugin(ib_network.IBNetwork):
def __init__(self):
# Don't call the base class constructor
pass
@staticmethod
def _set_dimensions(dimensions, instance=None):
return {'hostname': 'dummy_hostname'}
@staticmethod
def _get_devices():
return ['mlx5_0', 'mlx5_1']
@staticmethod
def _get_fields(device):
return ['port_rcv_data', 'port_rcv_pkts']
class TestIBNetwork(unittest.TestCase):
def setUp(self):
self.ib_network = MockIBNetworkPlugin()
@mock.patch('monasca_agent.collector.checks_d.ib_network.open',
mock.mock_open(read_data='1024'))
@mock.patch('monasca_agent.collector.checks.AgentCheck.rate',
autospec=True)
def test_check(self, mock_rate):
self.ib_network.check(None)
# For each of the two dummy devices we expect to collect two dummy
# fields. The count for port_rcv_data should be multiplied by the lane
# count.
calls = [
mock.call(
mock.ANY,
ib_network._METRIC_NAME_PREFIX + '.port_rcv_data',
4096,
device_name='mlx5_0',
dimensions={'hostname': 'dummy_hostname'}
),
mock.call(
mock.ANY,
ib_network._METRIC_NAME_PREFIX + '.port_rcv_pkts',
1024,
device_name='mlx5_0',
dimensions={'hostname': 'dummy_hostname'}
),
mock.call(
mock.ANY,
ib_network._METRIC_NAME_PREFIX + '.port_rcv_data',
4096,
device_name='mlx5_1',
dimensions={'hostname': 'dummy_hostname'}
),
mock.call(
mock.ANY,
ib_network._METRIC_NAME_PREFIX + '.port_rcv_pkts',
1024,
device_name='mlx5_1',
dimensions={'hostname': 'dummy_hostname'}
),
]
mock_rate.assert_has_calls(calls, any_order=True)

View File

@ -0,0 +1,48 @@
# Copyright (c) 2018 StackHPC Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import unittest
import mock
import monasca_setup.detection.plugins.ib_network as ib_network
class MockIBNetworkDetectPlugin(ib_network.IBNetworkDetect):
def __init__(self):
# Don't call the base class constructor
pass
class TestIBNetworkDetect(unittest.TestCase):
def setUp(self):
self.ib_network = MockIBNetworkDetectPlugin()
def test_build_config(self):
config = self.ib_network.build_config()
self.assertIn('ib_network', config)
@mock.patch('os.path.isdir')
def test__detect_ok(self, mock_isdir):
mock_isdir.return_value = True
self.ib_network._detect()
mock_isdir.assert_called_once_with(ib_network._IB_DEVICE_PATH)
self.assertTrue(self.ib_network.available)
@mock.patch('os.path.isdir')
def test__detect_no_infiniband(self, mock_isdir):
mock_isdir.return_value = False
self.ib_network._detect()
mock_isdir.assert_called_once_with(ib_network._IB_DEVICE_PATH)
self.assertFalse(self.ib_network.available)