From 601f6d51f693c5b0e6b570234db415f12f881257 Mon Sep 17 00:00:00 2001 From: DhuldevValekar3 Date: Tue, 21 Apr 2020 07:15:02 -0500 Subject: [PATCH] Add iDRAC management via Redfish to idrac HW type This change adds support for managing an iDRAC -- reset, clear job queue, and reset to known good state -- via the Redfish out-of-band (OOB) management protocol to the idrac hardware type. This is offered by new idrac-redfish management hardware interface implementation cleaning steps: reset_idrac, clear_job_queue, and known_good_state. known_good_state both resets an iDRAC and clears its job queue. Story: 2007617 Task: 39628 Depends-On: https://review.opendev.org/c/x/sushy-oem-idrac/+/782254 Change-Id: Iad69c8d7cf3a373f5cfcc619a479a106efa2e4d4 --- driver-requirements.txt | 2 +- ironic/drivers/modules/drac/management.py | 118 +++++++++++++++++- ironic/drivers/modules/redfish/utils.py | 26 ++++ .../drivers/modules/drac/test_management.py | 45 ++++++- ...ish_mgmt_clean_steps-c983a8858835046d.yaml | 9 ++ 5 files changed, 197 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/redfish_mgmt_clean_steps-c983a8858835046d.yaml diff --git a/driver-requirements.txt b/driver-requirements.txt index 921c2a6490..457040ed48 100644 --- a/driver-requirements.txt +++ b/driver-requirements.txt @@ -20,4 +20,4 @@ ansible>=2.7 python-ibmcclient>=0.2.2,<0.3.0 # Dell EMC iDRAC sushy OEM extension -sushy-oem-idrac>=2.0.0,<3.0.0 +sushy-oem-idrac>=2.1.0,<3.0.0 diff --git a/ironic/drivers/modules/drac/management.py b/ironic/drivers/modules/drac/management.py index e069e086c4..ee8fc37796 100644 --- a/ironic/drivers/modules/drac/management.py +++ b/ironic/drivers/modules/drac/management.py @@ -2,7 +2,7 @@ # # Copyright 2014 Red Hat, Inc. # All Rights Reserved. -# Copyright (c) 2017-2020 Dell Inc. or its subsidiaries. +# Copyright (c) 2017-2021 Dell Inc. or its subsidiaries. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -637,6 +637,122 @@ class DracRedfishManagement(redfish_management.RedfishManagement): driver_internal_info.pop('export_configuration_location', None) node.driver_internal_info = driver_internal_info + @METRICS.timer('DracRedfishManagement.clear_job_queue') + @base.clean_step(priority=0) + def clear_job_queue(self, task): + """Clear iDRAC job queue. + + :param task: a TaskManager instance containing the node to act + on. + :raises: RedfishError on an error. + """ + system = redfish_utils.get_system(task.node) + for manager in system.managers: + try: + oem_manager = manager.get_oem_extension('Dell') + except sushy.exceptions.OEMExtensionNotFoundError as e: + error_msg = (_("Search for Sushy OEM extension Python package " + "'sushy-oem-idrac' failed for node %(node)s. " + "Ensure it is installed. Error: %(error)s") % + {'node': task.node.uuid, 'error': e}) + LOG.error(error_msg) + raise exception.RedfishError(error=error_msg) + try: + oem_manager.job_service.delete_jobs(job_ids=['JID_CLEARALL']) + except sushy.exceptions.SushyError as e: + error_msg = ('Failed to clear iDRAC job queue with system ' + '%(system)s manager %(manager)s for node ' + '%(node)s. Will try next manager, if available. ' + 'Error: %(error)s' % + {'system': system.uuid if system.uuid else + system.identity, + 'manager': manager.uuid if manager.uuid else + manager.identity, + 'node': task.node.uuid, + 'error': e}) + LOG.debug(error_msg) + continue + LOG.info('Cleared iDRAC job queue for node %(node)s', + {'node': task.node.uuid}) + break + else: + error_msg = (_('iDRAC Redfish clear job queue failed for node ' + '%(node)s, because system %(system)s has no ' + 'manager%(no_manager)s.') % + {'node': task.node.uuid, + 'system': system.uuid if system.uuid else + system.identity, + 'no_manager': '' if not system.managers else + ' which could'}) + LOG.error(error_msg) + raise exception.RedfishError(error=error_msg) + + @METRICS.timer('DracRedfishManagement.reset_idrac') + @base.clean_step(priority=0) + def reset_idrac(self, task): + """Reset the iDRAC. + + :param task: a TaskManager instance containing the node to act + on. + :raises: RedfishError on an error. + """ + system = redfish_utils.get_system(task.node) + for manager in system.managers: + try: + oem_manager = manager.get_oem_extension('Dell') + except sushy.exceptions.OEMExtensionNotFoundError as e: + error_msg = (_("Search for Sushy OEM extension Python package " + "'sushy-oem-idrac' failed for node %(node)s. " + "Ensure it is installed. Error: %(error)s") % + {'node': task.node.uuid, 'error': e}) + LOG.error(error_msg) + raise exception.RedfishError(error=error_msg) + try: + oem_manager.reset_idrac() + except sushy.exceptions.SushyError as e: + error_msg = ('Failed to reset iDRAC with system %(system)s ' + 'manager %(manager)s for node %(node)s. Will try ' + 'next manager, if available. Error: %(error)s' % + {'system': system.uuid if system.uuid else + system.identity, + 'manager': manager.uuid if manager.uuid else + manager.identity, + 'node': task.node.uuid, + 'error': e}) + LOG.debug(error_msg) + continue + redfish_utils.wait_until_get_system_ready(task.node) + LOG.info('Reset iDRAC for node %(node)s', {'node': task.node.uuid}) + break + else: + error_msg = (_('iDRAC Redfish reset iDRAC failed for node ' + '%(node)s, because system %(system)s has no ' + 'manager%(no_manager)s.') % + {'node': task.node.uuid, + 'system': system.uuid if system.uuid else + system.identity, + 'no_manager': '' if not system.managers else + ' which could'}) + LOG.error(error_msg) + raise exception.RedfishError(error=error_msg) + + @METRICS.timer('DracRedfishManagement.known_good_state') + @base.clean_step(priority=0) + def known_good_state(self, task): + """Reset iDRAC to known good state. + + An iDRAC is reset to a known good state by resetting it and + clearing its job queue. + + :param task: a TaskManager instance containing the node to act + on. + :raises: RedfishError on an error. + """ + self.reset_idrac(task) + self.clear_job_queue(task) + LOG.info('Reset iDRAC to known good state for node %(node)s', + {'node': task.node.uuid}) + class DracWSManManagement(base.ManagementInterface): diff --git a/ironic/drivers/modules/redfish/utils.py b/ironic/drivers/modules/redfish/utils.py index 49e7062333..63259a87b9 100644 --- a/ironic/drivers/modules/redfish/utils.py +++ b/ironic/drivers/modules/redfish/utils.py @@ -1,5 +1,6 @@ # Copyright 2017 Red Hat, Inc. # All Rights Reserved. +# Copyright (c) 2020-2021 Dell Inc. or its subsidiaries. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -372,3 +373,28 @@ def get_enabled_macs(task, system): else: LOG.debug("No ethernet interface information is available " "for node %(node)s", {'node': task.node.uuid}) + + +@tenacity.retry( + retry=tenacity.retry_if_exception_type( + exception.RedfishConnectionError), + stop=tenacity.stop_after_attempt(CONF.redfish.connection_attempts), + wait=tenacity.wait_fixed(CONF.redfish.connection_retry_interval), + reraise=True) +def wait_until_get_system_ready(node): + """Wait until Redfish system is ready. + + :param node: an Ironic node object + :raises: RedfishConnectionError on time out. + """ + driver_info = parse_driver_info(node) + system_id = driver_info['system_id'] + try: + with SessionCache(driver_info) as conn: + return conn.get_system(system_id) + except sushy.exceptions.BadRequestError as e: + err_msg = ("System is not ready for node %(node)s, with error" + "%(error)s, so retrying it", + {'node': node.uuid, 'error': e}) + LOG.warning(err_msg) + raise exception.RedfishConnectionError(node=node.uuid, error=e) diff --git a/ironic/tests/unit/drivers/modules/drac/test_management.py b/ironic/tests/unit/drivers/modules/drac/test_management.py index 227d5cd5f7..d2a290525a 100644 --- a/ironic/tests/unit/drivers/modules/drac/test_management.py +++ b/ironic/tests/unit/drivers/modules/drac/test_management.py @@ -2,7 +2,7 @@ # # Copyright 2014 Red Hat, Inc. # All Rights Reserved. -# Copyright (c) 2017-2018 Dell Inc. or its subsidiaries. +# Copyright (c) 2017-2021 Dell Inc. or its subsidiaries. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -839,6 +839,10 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): driver_info=INFO_DICT) self.management = drac_mgmt.DracRedfishManagement() + self.config(enabled_hardware_types=['idrac'], + enabled_power_interfaces=['idrac-redfish'], + enabled_management_interfaces=['idrac-redfish']) + def test_export_configuration_name_missing(self): task = mock.Mock(node=self.node, context=self.context) self.assertRaises(exception.MissingParameterValue, @@ -1423,3 +1427,42 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): mock_deploy_handler.assert_called_once_with( task, 'error', 'log message') + + @mock.patch.object(drac_mgmt, 'redfish_utils', autospec=True) + def test_clear_job_queue(self, mock_redfish_utils): + mock_system = mock_redfish_utils.get_system.return_value + mock_manager = mock.MagicMock() + mock_system.managers = [mock_manager] + mock_manager_oem = mock_manager.get_oem_extension.return_value + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + task.driver.management.clear_job_queue(task) + mock_manager_oem.job_service.delete_jobs.assert_called_once_with( + job_ids=['JID_CLEARALL']) + + @mock.patch.object(drac_mgmt, 'redfish_utils', autospec=True) + def test_reset_idrac(self, mock_redfish_utils): + mock_system = mock_redfish_utils.get_system.return_value + mock_manager = mock.MagicMock() + mock_system.managers = [mock_manager] + mock_manager_oem = mock_manager.get_oem_extension.return_value + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + task.driver.management.reset_idrac(task) + mock_manager_oem.reset_idrac.assert_called_once_with() + + @mock.patch.object(drac_mgmt, 'redfish_utils', autospec=True) + def test_known_good_state(self, mock_redfish_utils): + mock_system = mock_redfish_utils.get_system.return_value + mock_manager = mock.MagicMock() + mock_system.managers = [mock_manager] + mock_manager_oem = mock_manager.get_oem_extension.return_value + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + task.driver.management.known_good_state(task) + mock_manager_oem.job_service.delete_jobs.assert_called_once_with( + job_ids=['JID_CLEARALL']) + mock_manager_oem.reset_idrac.assert_called_once_with() diff --git a/releasenotes/notes/redfish_mgmt_clean_steps-c983a8858835046d.yaml b/releasenotes/notes/redfish_mgmt_clean_steps-c983a8858835046d.yaml new file mode 100644 index 0000000000..af4a531215 --- /dev/null +++ b/releasenotes/notes/redfish_mgmt_clean_steps-c983a8858835046d.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Adds support for managing an iDRAC -- reset, clear job queue, and reset to + known good state -- via the Redfish out-of-band (OOB) management protocol + to the ``idrac`` hardware type. This is offered by new ``idrac-redfish`` + management hardware interface implementation cleaning steps: + ``reset_idrac``, ``clear_job_queue``, and ``known_good_state``. + ``known_good_state`` both resets an iDRAC and clears its job queue.