Collect sensor data in `redfish` hardware type

Adds sensor data collector to ``redfish`` management interface.
Temperature, power, cooling and drive health metrics are collected.

Change-Id: I8accdcc73c7e0261579d753633f9dfc02a868115
Story: 2005878
Task: 33692
This commit is contained in:
Ilya Etingof 2019-06-13 19:01:18 +02:00
parent 9a0bd8a774
commit 82abc0beac
4 changed files with 348 additions and 8 deletions

View File

@ -16,7 +16,7 @@ python-xclarityclient>=0.1.6
ImcSdk>=0.7.2 ImcSdk>=0.7.2
# The Redfish hardware type uses the Sushy library # The Redfish hardware type uses the Sushy library
sushy>=1.6.0 sushy>=1.9.0
# Ansible-deploy interface # Ansible-deploy interface
ansible>=2.4 ansible>=2.4

View File

@ -13,6 +13,8 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import collections
from oslo_log import log from oslo_log import log
from oslo_utils import importutils from oslo_utils import importutils
@ -222,14 +224,155 @@ class RedfishManagement(base.ManagementInterface):
return BOOT_MODE_MAP.get(system.boot.get('mode')) return BOOT_MODE_MAP.get(system.boot.get('mode'))
@staticmethod
def _sensor2dict(resource, *fields):
return {field: getattr(resource, field)
for field in fields
if hasattr(resource, field)}
@classmethod
def _get_sensors_fan(cls, chassis):
"""Get fan sensors reading.
:param chassis: Redfish `chassis` object
:returns: returns a dict of sensor data.
"""
sensors = {}
for fan in chassis.thermal.fans.get_members():
sensor = cls._sensor2dict(
fan, 'identity', 'max_reading_range',
'min_reading_range', 'reading', 'reading_units',
'serial_number', 'physical_context')
sensor.update(cls._sensor2dict(fan.status, 'state', 'health'))
unique_name = '%s@%s' % (fan.identity, chassis.identity)
sensors[unique_name] = sensor
return sensors
@classmethod
def _get_sensors_temperatures(cls, chassis):
"""Get temperature sensors reading.
:param chassis: Redfish `chassis` object
:returns: returns a dict of sensor data.
"""
sensors = {}
for temps in chassis.thermal.temperatures.get_members():
sensor = cls._sensor2dict(
temps, 'identity', 'max_reading_range_temp',
'min_reading_range_temp', 'reading_celsius',
'physical_context', 'sensor_number')
sensor.update(cls._sensor2dict(temps.status, 'state', 'health'))
unique_name = '%s@%s' % (temps.identity, chassis.identity)
sensors[unique_name] = sensor
return sensors
@classmethod
def _get_sensors_power(cls, chassis):
"""Get power supply sensors reading.
:param chassis: Redfish `chassis` object
:returns: returns a dict of sensor data.
"""
sensors = {}
for power in chassis.power.power_supplies:
sensor = cls._sensor2dict(
power, 'power_capacity_watts',
'line_input_voltage', 'last_power_output_watts',
'serial_number')
sensor.update(cls._sensor2dict(power.status, 'state', 'health'))
sensor.update(cls._sensor2dict(
power.input_ranges, 'minimum_voltage',
'maximum_voltage', 'minimum_frequency_hz',
'maximum_frequency_hz', 'output_wattage'))
unique_name = '%s:%s@%s' % (
power.member_id, chassis.power.identity,
chassis.identity)
sensors[unique_name] = sensor
return sensors
@classmethod
def _get_sensors_drive(cls, system):
"""Get storage drive sensors reading.
:param chassis: Redfish `system` object
:returns: returns a dict of sensor data.
"""
sensors = {}
for storage in system.simple_storage.get_members():
for drive in storage.devices:
sensor = cls._sensor2dict(
drive, 'identity', 'model', 'capacity_bytes',
'failure_predicted')
sensor.update(
cls._sensor2dict(drive.status, 'state', 'health'))
unique_name = '%s:%s@%s' % (
drive.identity, system.simple_storage.identity,
system.identity)
sensors[unique_name] = sensor
return sensors
def get_sensors_data(self, task): def get_sensors_data(self, task):
"""Get sensors data. """Get sensors data.
Not implemented for this driver. :param task: a TaskManager instance.
:raises: FailedToGetSensorData when getting the sensor data fails.
:raises: NotImplementedError :raises: FailedToParseSensorData when parsing sensor data fails.
:raises: InvalidParameterValue if required parameters
are missing.
:raises: MissingParameterValue if a required parameter is missing.
:returns: returns a dict of sensor data grouped by sensor type.
""" """
raise NotImplementedError() node = task.node
sensors = collections.defaultdict(dict)
system = redfish_utils.get_system(node)
for chassis in system.chassis:
try:
sensors['Fan'].update(self._get_sensors_fan(chassis))
except sushy.exceptions.SushyError as exc:
LOG.debug("Failed reading fan information for node "
"%(node)s: %(error)s", {'node': node.uuid,
'error': exc})
try:
sensors['Temperature'].update(
self._get_sensors_temperatures(chassis))
except sushy.exceptions.SushyError as exc:
LOG.debug("Failed reading temperature information for node "
"%(node)s: %(error)s", {'node': node.uuid,
'error': exc})
try:
sensors['Power'].update(self._get_sensors_power(chassis))
except sushy.exceptions.SushyError as exc:
LOG.debug("Failed reading power information for node "
"%(node)s: %(error)s", {'node': node.uuid,
'error': exc})
try:
sensors['Drive'].update(self._get_sensors_drive(system))
except sushy.exceptions.SushyError as exc:
LOG.debug("Failed reading drive information for node "
"%(node)s: %(error)s", {'node': node.uuid,
'error': exc})
LOG.debug("Gathered sensor data: %(sensors)s", {'sensors': sensors})
return sensors
@task_manager.require_exclusive_lock @task_manager.require_exclusive_lock
def inject_nmi(self, task): def inject_nmi(self, task):

View File

@ -215,11 +215,203 @@ class RedfishManagementTestCase(db_base.DbTestCase):
expected = boot_modes.LEGACY_BIOS expected = boot_modes.LEGACY_BIOS
self.assertEqual(expected, response) self.assertEqual(expected, response)
def test_get_sensors_data(self): def test__get_sensors_fan(self):
attributes = {
"identity": "XXX-YYY-ZZZ",
"name": "CPU Fan",
"status": {
"state": "enabled",
"health": "OK"
},
"reading": 6000,
"reading_units": "RPM",
"lower_threshold_fatal": 2000,
"min_reading_range": 0,
"max_reading_range": 10000,
"serial_number": "SN010203040506",
"physical_context": "CPU"
}
mock_chassis = mock.MagicMock(identity='ZZZ-YYY-XXX')
mock_fans = mock_chassis.thermal.fans
mock_fan = mock.MagicMock(**attributes)
mock_fan.name = attributes['name']
mock_fan.status = mock.MagicMock(**attributes['status'])
mock_fans.get_members.return_value = [mock_fan]
with task_manager.acquire(self.context, self.node.uuid, with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task: shared=True) as task:
self.assertRaises(NotImplementedError, sensors = task.driver.management._get_sensors_fan(mock_chassis)
task.driver.management.get_sensors_data, task)
expected = {
'XXX-YYY-ZZZ@ZZZ-YYY-XXX': {
'identity': 'XXX-YYY-ZZZ',
'max_reading_range': 10000,
'min_reading_range': 0,
'physical_context': 'CPU',
'reading': 6000,
'reading_units': 'RPM',
'serial_number': 'SN010203040506',
'health': 'OK',
'state': 'enabled'
}
}
self.assertEqual(expected, sensors)
def test__get_sensors_temperatures(self):
attributes = {
"identity": "XXX-YYY-ZZZ",
"name": "CPU Temp",
"status": {
"state": "enabled",
"health": "OK"
},
"reading_celsius": 62,
"upper_threshold_non_critical": 75,
"upper_threshold_critical": 90,
"upperThresholdFatal": 95,
"min_reading_range_temp": 0,
"max_reading_range_temp": 120,
"physical_context": "CPU",
"sensor_number": 1
}
mock_chassis = mock.MagicMock(identity='ZZZ-YYY-XXX')
mock_temperatures = mock_chassis.thermal.temperatures
mock_temperature = mock.MagicMock(**attributes)
mock_temperature.name = attributes['name']
mock_temperature.status = mock.MagicMock(**attributes['status'])
mock_temperatures.get_members.return_value = [mock_temperature]
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
sensors = task.driver.management._get_sensors_temperatures(
mock_chassis)
expected = {
'XXX-YYY-ZZZ@ZZZ-YYY-XXX': {
'identity': 'XXX-YYY-ZZZ',
'max_reading_range_temp': 120,
'min_reading_range_temp': 0,
'physical_context': 'CPU',
'reading_celsius': 62,
'sensor_number': 1,
'health': 'OK',
'state': 'enabled'
}
}
self.assertEqual(expected, sensors)
def test__get_sensors_power(self):
attributes = {
'member_id': 0,
'name': 'Power Supply 0',
'power_capacity_watts': 1450,
'last_power_output_watts': 650,
'line_input_voltage': 220,
'input_ranges': {
'minimum_voltage': 185,
'maximum_voltage': 250,
'minimum_frequency_hz': 47,
'maximum_frequency_hz': 63,
'output_wattage': 1450
},
'serial_number': 'SN010203040506',
"status": {
"state": "enabled",
"health": "OK"
}
}
mock_chassis = mock.MagicMock(identity='ZZZ-YYY-XXX')
mock_power = mock_chassis.power
mock_power.identity = 'Power'
mock_psu = mock.MagicMock(**attributes)
mock_psu.name = attributes['name']
mock_psu.status = mock.MagicMock(**attributes['status'])
mock_psu.input_ranges = mock.MagicMock(**attributes['input_ranges'])
mock_power.power_supplies = [mock_psu]
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
sensors = task.driver.management._get_sensors_power(mock_chassis)
expected = {
'0:Power@ZZZ-YYY-XXX': {
'health': 'OK',
'last_power_output_watts': 650,
'line_input_voltage': 220,
'maximum_frequency_hz': 63,
'maximum_voltage': 250,
'minimum_frequency_hz': 47,
'minimum_voltage': 185,
'output_wattage': 1450,
'power_capacity_watts': 1450,
'serial_number': 'SN010203040506',
'state': 'enabled'
}
}
self.assertEqual(expected, sensors)
def test__get_sensors_data_drive(self):
attributes = {
'identity': '32ADF365C6C1B7BD',
'model': 'IBM 350A',
'capacity_bytes': 3750000000,
'failure_predicted': True,
'serial_number': 'SN010203040506',
'status': {
'health': 'OK',
'state': 'enabled'
}
}
mock_system = mock.MagicMock(identity='ZZZ-YYY-XXX')
mock_drive = mock.MagicMock(**attributes)
mock_drive.status = mock.MagicMock(**attributes['status'])
mock_storage = mock.MagicMock()
mock_storage.devices = [mock_drive]
mock_system.simple_storage.identity = 'XXX-YYY-ZZZ'
mock_system.simple_storage.get_members.return_value = [mock_storage]
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
sensors = task.driver.management._get_sensors_drive(mock_system)
expected = {
'32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX': {
'capacity_bytes': 3750000000,
'failure_predicted': True,
'health': 'OK',
'identity': '32ADF365C6C1B7BD',
'model': 'IBM 350A',
'state': 'enabled'
}
}
self.assertEqual(expected, sensors)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
def test_get_sensors_data(self, mock_system):
mock_chassis = mock.MagicMock()
mock_system.return_value.chassis = [mock_chassis]
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
sensors = task.driver.management.get_sensors_data(task)
expected = {
'Fan': {},
'Temperature': {},
'Power': {},
'Drive': {}
}
self.assertEqual(expected, sensors)
@mock.patch.object(redfish_utils, 'get_system', autospec=True) @mock.patch.object(redfish_utils, 'get_system', autospec=True)
def test_inject_nmi(self, mock_get_system): def test_inject_nmi(self, mock_get_system):

View File

@ -0,0 +1,5 @@
---
features:
- |
Adds sensor data collector to ``redfish`` management interface.
Temperature, power, cooling and drive health metrics are collected.