Merge "Add Infiniband metrics plugin"
This commit is contained in:
commit
ebd42eb5de
@ -50,6 +50,7 @@
|
||||
- [Host Alive](#host-alive)
|
||||
- [HTTP (endpoint status)](#http-endpoint-status)
|
||||
- [HTTP Metrics](#http-metrics)
|
||||
- [Infiniband](#infiniband)
|
||||
- [InfluxDB](#influxdb)
|
||||
- [InfluxDB-Relay](#influxdb-relay)
|
||||
- [IIS](#iis)
|
||||
@ -343,6 +344,7 @@ These are the detection plugins included with the Monasca Agent. See [Customiza
|
||||
| heat | ServicePlugin |
|
||||
| host_alive | ArgsPlugin |
|
||||
| http_check | ArgsPlugin |
|
||||
| ib_network | Plugin |
|
||||
| ironic | ServicePlugin |
|
||||
| kafka_consumer | Plugin |
|
||||
| keystone | ServicePlugin |
|
||||
@ -1537,6 +1539,31 @@ instances:
|
||||
type: gauge
|
||||
```
|
||||
|
||||
## Infiniband
|
||||
|
||||
This section describes the Infiniband metrics check (IBNetwork) that can be performed by the agent.
|
||||
|
||||
The plugin reads network traffic counters for all IB devices from /sys/class/infiniband/.
|
||||
|
||||
Auto-detection for this plugin yields a basic configuration file, with no configuration
|
||||
options:
|
||||
|
||||
ib_network.yaml:
|
||||
```yaml
|
||||
init_config:
|
||||
|
||||
instances:
|
||||
- built_by: IBNetworkDetect
|
||||
name: ib_network_stats
|
||||
```
|
||||
|
||||
The following metrics are provided:
|
||||
|
||||
| Metric Name | Dimensions | Semantics |
|
||||
| ----------- | ---------- | --------- |
|
||||
| net.in_bytes_sec | device | Number of network bytes received per second
|
||||
| net.out_bytes_sec | device | Number of network bytes sent per second
|
||||
|
||||
## InfluxDB
|
||||
|
||||
Auto-detection for InfluxDB plugin comes with two checks enabled:
|
||||
|
87
monasca_agent/collector/checks_d/ib_network.py
Normal file
87
monasca_agent/collector/checks_d/ib_network.py
Normal file
@ -0,0 +1,87 @@
|
||||
# Copyright (c) 2017 StackHPC Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
import monasca_agent.collector.checks as checks
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# According to https://community.mellanox.com/docs/DOC-2572 these fields
|
||||
# are divided by the number of lanes, so we need to multiply them by the lane
|
||||
# count to get a number valid for the link as a whole.
|
||||
_FIELDS_TO_MULTIPLY_BY_LANE_COUNT = {
|
||||
'port_rcv_data',
|
||||
'port_xmit_data'
|
||||
}
|
||||
|
||||
_METRIC_NAME_PREFIX = "ibnet"
|
||||
_IB_DEVICE_PATH = "/sys/class/infiniband/"
|
||||
_IB_COUNTER_PATH = "ports/1/counters/"
|
||||
|
||||
|
||||
class IBNetwork(checks.AgentCheck):
|
||||
def __init__(self, name, init_config, agent_config):
|
||||
super(IBNetwork, self).__init__(name, init_config, agent_config)
|
||||
|
||||
@staticmethod
|
||||
def _get_lane_count():
|
||||
# It is possible that we could get the number of lanes from the driver,
|
||||
# for example:
|
||||
#
|
||||
# # cat /sys/class/infiniband/mlx5_0/ports/1/rate
|
||||
# 100 Gb/sec (4X EDR)
|
||||
#
|
||||
# However, according to the following PR this isn't expected to change:
|
||||
# https://github.com/prometheus/node_exporter/pull/579 so hard code it
|
||||
# for now.
|
||||
return 4
|
||||
|
||||
def _normalise_counter(self, field, counter):
|
||||
if field in _FIELDS_TO_MULTIPLY_BY_LANE_COUNT:
|
||||
counter *= self._get_lane_count()
|
||||
return counter
|
||||
|
||||
def _read_counter(self, device, field):
|
||||
counter_path = os.path.join(
|
||||
_IB_DEVICE_PATH, device, _IB_COUNTER_PATH, field)
|
||||
with open(counter_path) as f:
|
||||
counter = f.read()
|
||||
counter = int(counter.rstrip())
|
||||
counter = self._normalise_counter(field, counter)
|
||||
return counter
|
||||
|
||||
@staticmethod
|
||||
def _get_devices():
|
||||
return os.listdir(_IB_DEVICE_PATH)
|
||||
|
||||
@staticmethod
|
||||
def _get_fields(device):
|
||||
return os.listdir(os.path.join(
|
||||
_IB_DEVICE_PATH, device, _IB_COUNTER_PATH))
|
||||
|
||||
def check(self, instance):
|
||||
dimensions = self._set_dimensions(None, instance)
|
||||
|
||||
for device in self._get_devices():
|
||||
for field in self._get_fields(device):
|
||||
counter = self._read_counter(device, field)
|
||||
metric_name = '{0}.{1}'.format(_METRIC_NAME_PREFIX, field)
|
||||
self.rate(metric_name,
|
||||
counter,
|
||||
device_name=device,
|
||||
dimensions=dimensions)
|
||||
log.debug('Collected network interface status for device {0}'.
|
||||
format(device))
|
43
monasca_setup/detection/plugins/ib_network.py
Normal file
43
monasca_setup/detection/plugins/ib_network.py
Normal file
@ -0,0 +1,43 @@
|
||||
# Copyright (c) 2018 StackHPC Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
import monasca_setup.agent_config
|
||||
import monasca_setup.detection
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
_IB_DEVICE_PATH = "/sys/class/infiniband/"
|
||||
|
||||
|
||||
class IBNetworkDetect(monasca_setup.detection.Plugin):
|
||||
"""Detects and configures Infiniband plugin."""
|
||||
def _detect(self):
|
||||
self.available = False
|
||||
if not self._detect_infiniband():
|
||||
LOG.info('Infiniband hardware was not detected: ib_network plugin'
|
||||
'will not be loaded.')
|
||||
return
|
||||
self.available = True
|
||||
|
||||
def build_config(self):
|
||||
config = monasca_setup.agent_config.Plugins()
|
||||
config['ib_network'] = {'init_config': None,
|
||||
'instances': [{'name': 'ib_network_stats'}]}
|
||||
return config
|
||||
|
||||
def _detect_infiniband(self):
|
||||
return os.path.isdir(_IB_DEVICE_PATH)
|
83
tests/checks_d/test_ib_network.py
Normal file
83
tests/checks_d/test_ib_network.py
Normal file
@ -0,0 +1,83 @@
|
||||
# Copyright (c) 2018 StackHPC Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
import mock
|
||||
|
||||
import monasca_agent.collector.checks_d.ib_network as ib_network
|
||||
|
||||
|
||||
class MockIBNetworkPlugin(ib_network.IBNetwork):
|
||||
def __init__(self):
|
||||
# Don't call the base class constructor
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _set_dimensions(dimensions, instance=None):
|
||||
return {'hostname': 'dummy_hostname'}
|
||||
|
||||
@staticmethod
|
||||
def _get_devices():
|
||||
return ['mlx5_0', 'mlx5_1']
|
||||
|
||||
@staticmethod
|
||||
def _get_fields(device):
|
||||
return ['port_rcv_data', 'port_rcv_pkts']
|
||||
|
||||
|
||||
class TestIBNetwork(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.ib_network = MockIBNetworkPlugin()
|
||||
|
||||
@mock.patch('monasca_agent.collector.checks_d.ib_network.open',
|
||||
mock.mock_open(read_data='1024'))
|
||||
@mock.patch('monasca_agent.collector.checks.AgentCheck.rate',
|
||||
autospec=True)
|
||||
def test_check(self, mock_rate):
|
||||
self.ib_network.check(None)
|
||||
# For each of the two dummy devices we expect to collect two dummy
|
||||
# fields. The count for port_rcv_data should be multiplied by the lane
|
||||
# count.
|
||||
calls = [
|
||||
mock.call(
|
||||
mock.ANY,
|
||||
ib_network._METRIC_NAME_PREFIX + '.port_rcv_data',
|
||||
4096,
|
||||
device_name='mlx5_0',
|
||||
dimensions={'hostname': 'dummy_hostname'}
|
||||
),
|
||||
mock.call(
|
||||
mock.ANY,
|
||||
ib_network._METRIC_NAME_PREFIX + '.port_rcv_pkts',
|
||||
1024,
|
||||
device_name='mlx5_0',
|
||||
dimensions={'hostname': 'dummy_hostname'}
|
||||
),
|
||||
mock.call(
|
||||
mock.ANY,
|
||||
ib_network._METRIC_NAME_PREFIX + '.port_rcv_data',
|
||||
4096,
|
||||
device_name='mlx5_1',
|
||||
dimensions={'hostname': 'dummy_hostname'}
|
||||
),
|
||||
mock.call(
|
||||
mock.ANY,
|
||||
ib_network._METRIC_NAME_PREFIX + '.port_rcv_pkts',
|
||||
1024,
|
||||
device_name='mlx5_1',
|
||||
dimensions={'hostname': 'dummy_hostname'}
|
||||
),
|
||||
]
|
||||
mock_rate.assert_has_calls(calls, any_order=True)
|
48
tests/detection/test_ib_network.py
Normal file
48
tests/detection/test_ib_network.py
Normal file
@ -0,0 +1,48 @@
|
||||
# Copyright (c) 2018 StackHPC Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
import mock
|
||||
|
||||
import monasca_setup.detection.plugins.ib_network as ib_network
|
||||
|
||||
|
||||
class MockIBNetworkDetectPlugin(ib_network.IBNetworkDetect):
|
||||
def __init__(self):
|
||||
# Don't call the base class constructor
|
||||
pass
|
||||
|
||||
|
||||
class TestIBNetworkDetect(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.ib_network = MockIBNetworkDetectPlugin()
|
||||
|
||||
def test_build_config(self):
|
||||
config = self.ib_network.build_config()
|
||||
self.assertIn('ib_network', config)
|
||||
|
||||
@mock.patch('os.path.isdir')
|
||||
def test__detect_ok(self, mock_isdir):
|
||||
mock_isdir.return_value = True
|
||||
self.ib_network._detect()
|
||||
mock_isdir.assert_called_once_with(ib_network._IB_DEVICE_PATH)
|
||||
self.assertTrue(self.ib_network.available)
|
||||
|
||||
@mock.patch('os.path.isdir')
|
||||
def test__detect_no_infiniband(self, mock_isdir):
|
||||
mock_isdir.return_value = False
|
||||
self.ib_network._detect()
|
||||
mock_isdir.assert_called_once_with(ib_network._IB_DEVICE_PATH)
|
||||
self.assertFalse(self.ib_network.available)
|
Loading…
x
Reference in New Issue
Block a user