From e4f3b7b813007b90a579b7d3f6c4990a6affeb39 Mon Sep 17 00:00:00 2001 From: Rachit7194 Date: Fri, 6 Sep 2019 07:01:07 -0400 Subject: [PATCH] DRAC: Fix a bug for delete_config with multiple controllers This fix a bug where a race condition can occur on a host that has a mix of controllers where some supports realtime mode and some do not. The approach is to use only realtime mode if all controllers supports realtime. This removes the race condition. Story: #2006502 Task: #36480 Change-Id: I18aa79774c0b562bf1e8b973db3d7860a01367e7 --- ironic/drivers/modules/drac/raid.py | 94 +++++++++++-------- .../unit/drivers/modules/drac/test_raid.py | 68 +++++++++++++- ...multiple-controllers-06fc3fca94ba870f.yaml | 9 ++ 3 files changed, 132 insertions(+), 39 deletions(-) create mode 100644 releasenotes/notes/fix-delete_configuration-with-multiple-controllers-06fc3fca94ba870f.yaml diff --git a/ironic/drivers/modules/drac/raid.py b/ironic/drivers/modules/drac/raid.py index bcddee5108..c513334320 100644 --- a/ironic/drivers/modules/drac/raid.py +++ b/ironic/drivers/modules/drac/raid.py @@ -754,6 +754,25 @@ def _filter_logical_disks(logical_disks, include_root_volume, return filtered_disks +def _create_config_job(node, controller, reboot=False, realtime=False, + raid_config_job_ids=[], + raid_config_parameters=[]): + job_id = commit_config(node, raid_controller=controller, + reboot=reboot, realtime=realtime) + + raid_config_job_ids.append(job_id) + if controller not in raid_config_parameters: + raid_config_parameters.append(controller) + + LOG.info('Change has been committed to RAID controller ' + '%(controller)s on node %(node)s. ' + 'DRAC job id: %(job_id)s', + {'controller': controller, 'node': node.uuid, + 'job_id': job_id}) + return {'raid_config_job_ids': raid_config_job_ids, + 'raid_config_parameters': raid_config_parameters} + + def _commit_to_controllers(node, controllers, substep="completed"): """Commit changes to RAID controllers on the node. @@ -776,9 +795,18 @@ def _commit_to_controllers(node, controllers, substep="completed"): configuration is in progress asynchronously or None if it is completed. """ + # remove controller which does not require configuration job + controllers = [controller for controller in controllers + if controller['is_commit_required']] + if not controllers: LOG.debug('No changes on any of the controllers on node %s', node.uuid) + driver_internal_info = node.driver_internal_info + driver_internal_info['raid_config_substep'] = substep + driver_internal_info['raid_config_parameters'] = [] + node.driver_internal_info = driver_internal_info + node.save() return driver_internal_info = node.driver_internal_info @@ -788,43 +816,35 @@ def _commit_to_controllers(node, controllers, substep="completed"): if 'raid_config_job_ids' not in driver_internal_info: driver_internal_info['raid_config_job_ids'] = [] - # remove controller which does not require configuration job - controllers = [controller for controller in controllers - if controller['is_commit_required']] - - all_realtime = True optional = drac_constants.RebootRequired.optional - for controller in controllers: - raid_controller = controller['raid_controller'] + all_realtime = all(cntlr['is_reboot_required'] == optional + for cntlr in controllers) + raid_config_job_ids = [] + raid_config_parameters = [] + if all_realtime: + for controller in controllers: + realtime_controller = controller['raid_controller'] + job_details = _create_config_job( + node, controller=realtime_controller, + reboot=False, realtime=True, + raid_config_job_ids=raid_config_job_ids, + raid_config_parameters=raid_config_parameters) - # Commit the configuration - # The logic below will reboot the node if there is at least one - # controller without real time support. In that case the reboot - # is triggered when the configuration is committed to the last - # controller. - realtime = controller['is_reboot_required'] == optional - all_realtime = all_realtime and realtime - if controller == controllers[-1]: - job_id = commit_config(node, raid_controller=raid_controller, - reboot=not all_realtime, - realtime=realtime) - else: - job_id = commit_config(node, raid_controller=raid_controller, - reboot=False, - realtime=realtime) + else: + for controller in controllers: + mix_controller = controller['raid_controller'] + reboot = True if controller == controllers[-1] else False + job_details = _create_config_job( + node, controller=mix_controller, + reboot=reboot, realtime=False, + raid_config_job_ids=raid_config_job_ids, + raid_config_parameters=raid_config_parameters) - LOG.info('Change has been committed to RAID controller ' - '%(controller)s on node %(node)s. ' - 'DRAC job id: %(job_id)s', - {'controller': controller, 'node': node.uuid, - 'job_id': job_id}) + driver_internal_info['raid_config_job_ids'] = job_details[ + 'raid_config_job_ids'] - driver_internal_info['raid_config_job_ids'].append(job_id) - - if raid_controller not in driver_internal_info[ - 'raid_config_parameters']: - driver_internal_info['raid_config_parameters'].append( - raid_controller) + driver_internal_info['raid_config_parameters'] = job_details[ + 'raid_config_parameters'] node.driver_internal_info = driver_internal_info @@ -1094,10 +1114,10 @@ class DracWSManRAID(base.RAIDInterface): 'raid_config_parameters']: controller_cap = clear_foreign_config( node, controller_id) - controller = {'raid_controller': controller_id, - 'is_reboot_required': - controller_cap[ - 'is_reboot_required']} + controller = { + 'raid_controller': controller_id, + 'is_reboot_required': controller_cap['is_reboot_required'], + 'is_commit_required': controller_cap['is_commit_required']} controllers.append(controller) jobs_required = jobs_required or controller_cap[ 'is_commit_required'] diff --git a/ironic/tests/unit/drivers/modules/drac/test_raid.py b/ironic/tests/unit/drivers/modules/drac/test_raid.py index d0246f3829..2c9603c91c 100644 --- a/ironic/tests/unit/drivers/modules/drac/test_raid.py +++ b/ironic/tests/unit/drivers/modules/drac/test_raid.py @@ -362,8 +362,7 @@ class DracManageVirtualDisksTestCase(test_utils.BaseDracTest): substep=substep) self.assertEqual(0, mock_commit_config.call_count) - self.assertEqual([], - self.node.driver_internal_info['raid_config_job_ids']) + self.assertNotIn('raid_config_job_ids', self.node.driver_internal_info) self.assertEqual(substep, self.node.driver_internal_info['raid_config_substep']) @@ -1702,6 +1701,71 @@ class DracRaidInterfaceTestCase(test_utils.BaseDracTest): self.node.save() self._test_delete_configuration(states.DEPLOYWAIT) + @mock.patch.object(drac_common, 'get_drac_client', spec_set=True, + autospec=True) + @mock.patch.object(drac_raid, 'list_raid_controllers', autospec=True) + @mock.patch.object(drac_job, 'validate_job_queue', spec_set=True, + autospec=True) + @mock.patch.object(drac_raid, 'commit_config', spec_set=True, + autospec=True) + @mock.patch.object(drac_raid, '_reset_raid_config', spec_set=True, + autospec=True) + def test_delete_configuration_with_non_realtime_controller( + self, mock__reset_raid_config, mock_commit_config, + mock_validate_job_queue, mock_list_raid_controllers, + mock_get_drac_client): + mock_client = mock.Mock() + mock_get_drac_client.return_value = mock_client + expected_raid_config_params = ['AHCI.Slot.3-1', 'RAID.Integrated.1-1'] + mix_controllers = [{'id': 'AHCI.Slot.3-1', + 'description': 'AHCI controller in slot 3', + 'manufacturer': 'DELL', + 'model': 'BOSS-S1', + 'primary_status': 'unknown', + 'firmware_version': '2.5.13.3016', + 'bus': '5E', + 'supports_realtime': False}, + {'id': 'RAID.Integrated.1-1', + 'description': 'Integrated RAID Controller 1', + 'manufacturer': 'DELL', + 'model': 'PERC H740 Mini', + 'primary_status': 'unknown', + 'firmware_version': '50.5.0-1750', + 'bus': '3C', + 'supports_realtime': True}] + + mock_list_raid_controllers.return_value = [ + test_utils.make_raid_controller(controller) for + controller in mix_controllers] + + mock_commit_config.side_effect = ['42', '12'] + mock__reset_raid_config.side_effect = [{ + 'is_reboot_required': constants.RebootRequired.optional, + 'is_commit_required': True + }, { + 'is_reboot_required': constants.RebootRequired.true, + 'is_commit_required': True + }] + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + return_value = task.driver.raid.delete_configuration(task) + + mock_commit_config.assert_has_calls( + [mock.call(mock.ANY, raid_controller='AHCI.Slot.3-1', + reboot=False, realtime=False), + mock.call(mock.ANY, raid_controller='RAID.Integrated.1-1', + reboot=True, realtime=False)], + any_order=True) + + self.assertEqual(states.CLEANWAIT, return_value) + self.node.refresh() + self.assertEqual(expected_raid_config_params, + self.node.driver_internal_info[ + 'raid_config_parameters']) + self.assertEqual(['42', '12'], + self.node.driver_internal_info['raid_config_job_ids']) + @mock.patch.object(drac_common, 'get_drac_client', spec_set=True, autospec=True) @mock.patch.object(drac_raid, 'list_raid_controllers', autospec=True) diff --git a/releasenotes/notes/fix-delete_configuration-with-multiple-controllers-06fc3fca94ba870f.yaml b/releasenotes/notes/fix-delete_configuration-with-multiple-controllers-06fc3fca94ba870f.yaml new file mode 100644 index 0000000000..38c637df4c --- /dev/null +++ b/releasenotes/notes/fix-delete_configuration-with-multiple-controllers-06fc3fca94ba870f.yaml @@ -0,0 +1,9 @@ +fixes: + - | + Fixes a bug in the ``idrac`` hardware type where a race condition + can occur on a host that has a mix of controllers where some support + realtime mode and some do not. The approach is to use only realtime + mode if all controllers support realtime. This removes the race + condition. + See bug `2006502 https://storyboard.openstack.org/#!/story/2006502` + for details