DRAC: Fix a bug for delete_config with multiple controllers

This fix a bug where a race condition can occur on a host that
has a mix of controllers where some supports realtime mode and
some do not. The approach is to use only realtime mode if all
controllers supports realtime. This removes the race condition.

Story: #2006502
Task: #36480

Change-Id: I18aa79774c0b562bf1e8b973db3d7860a01367e7
This commit is contained in:
Rachit7194 2019-09-06 07:01:07 -04:00
parent 2408987175
commit e4f3b7b813
3 changed files with 132 additions and 39 deletions

View File

@ -754,6 +754,25 @@ def _filter_logical_disks(logical_disks, include_root_volume,
return filtered_disks
def _create_config_job(node, controller, reboot=False, realtime=False,
raid_config_job_ids=[],
raid_config_parameters=[]):
job_id = commit_config(node, raid_controller=controller,
reboot=reboot, realtime=realtime)
raid_config_job_ids.append(job_id)
if controller not in raid_config_parameters:
raid_config_parameters.append(controller)
LOG.info('Change has been committed to RAID controller '
'%(controller)s on node %(node)s. '
'DRAC job id: %(job_id)s',
{'controller': controller, 'node': node.uuid,
'job_id': job_id})
return {'raid_config_job_ids': raid_config_job_ids,
'raid_config_parameters': raid_config_parameters}
def _commit_to_controllers(node, controllers, substep="completed"):
"""Commit changes to RAID controllers on the node.
@ -776,9 +795,18 @@ def _commit_to_controllers(node, controllers, substep="completed"):
configuration is in progress asynchronously or None if it is
completed.
"""
# remove controller which does not require configuration job
controllers = [controller for controller in controllers
if controller['is_commit_required']]
if not controllers:
LOG.debug('No changes on any of the controllers on node %s',
node.uuid)
driver_internal_info = node.driver_internal_info
driver_internal_info['raid_config_substep'] = substep
driver_internal_info['raid_config_parameters'] = []
node.driver_internal_info = driver_internal_info
node.save()
return
driver_internal_info = node.driver_internal_info
@ -788,43 +816,35 @@ def _commit_to_controllers(node, controllers, substep="completed"):
if 'raid_config_job_ids' not in driver_internal_info:
driver_internal_info['raid_config_job_ids'] = []
# remove controller which does not require configuration job
controllers = [controller for controller in controllers
if controller['is_commit_required']]
all_realtime = True
optional = drac_constants.RebootRequired.optional
for controller in controllers:
raid_controller = controller['raid_controller']
all_realtime = all(cntlr['is_reboot_required'] == optional
for cntlr in controllers)
raid_config_job_ids = []
raid_config_parameters = []
if all_realtime:
for controller in controllers:
realtime_controller = controller['raid_controller']
job_details = _create_config_job(
node, controller=realtime_controller,
reboot=False, realtime=True,
raid_config_job_ids=raid_config_job_ids,
raid_config_parameters=raid_config_parameters)
# Commit the configuration
# The logic below will reboot the node if there is at least one
# controller without real time support. In that case the reboot
# is triggered when the configuration is committed to the last
# controller.
realtime = controller['is_reboot_required'] == optional
all_realtime = all_realtime and realtime
if controller == controllers[-1]:
job_id = commit_config(node, raid_controller=raid_controller,
reboot=not all_realtime,
realtime=realtime)
else:
job_id = commit_config(node, raid_controller=raid_controller,
reboot=False,
realtime=realtime)
else:
for controller in controllers:
mix_controller = controller['raid_controller']
reboot = True if controller == controllers[-1] else False
job_details = _create_config_job(
node, controller=mix_controller,
reboot=reboot, realtime=False,
raid_config_job_ids=raid_config_job_ids,
raid_config_parameters=raid_config_parameters)
LOG.info('Change has been committed to RAID controller '
'%(controller)s on node %(node)s. '
'DRAC job id: %(job_id)s',
{'controller': controller, 'node': node.uuid,
'job_id': job_id})
driver_internal_info['raid_config_job_ids'] = job_details[
'raid_config_job_ids']
driver_internal_info['raid_config_job_ids'].append(job_id)
if raid_controller not in driver_internal_info[
'raid_config_parameters']:
driver_internal_info['raid_config_parameters'].append(
raid_controller)
driver_internal_info['raid_config_parameters'] = job_details[
'raid_config_parameters']
node.driver_internal_info = driver_internal_info
@ -1094,10 +1114,10 @@ class DracWSManRAID(base.RAIDInterface):
'raid_config_parameters']:
controller_cap = clear_foreign_config(
node, controller_id)
controller = {'raid_controller': controller_id,
'is_reboot_required':
controller_cap[
'is_reboot_required']}
controller = {
'raid_controller': controller_id,
'is_reboot_required': controller_cap['is_reboot_required'],
'is_commit_required': controller_cap['is_commit_required']}
controllers.append(controller)
jobs_required = jobs_required or controller_cap[
'is_commit_required']

View File

@ -362,8 +362,7 @@ class DracManageVirtualDisksTestCase(test_utils.BaseDracTest):
substep=substep)
self.assertEqual(0, mock_commit_config.call_count)
self.assertEqual([],
self.node.driver_internal_info['raid_config_job_ids'])
self.assertNotIn('raid_config_job_ids', self.node.driver_internal_info)
self.assertEqual(substep,
self.node.driver_internal_info['raid_config_substep'])
@ -1702,6 +1701,71 @@ class DracRaidInterfaceTestCase(test_utils.BaseDracTest):
self.node.save()
self._test_delete_configuration(states.DEPLOYWAIT)
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
autospec=True)
@mock.patch.object(drac_raid, 'list_raid_controllers', autospec=True)
@mock.patch.object(drac_job, 'validate_job_queue', spec_set=True,
autospec=True)
@mock.patch.object(drac_raid, 'commit_config', spec_set=True,
autospec=True)
@mock.patch.object(drac_raid, '_reset_raid_config', spec_set=True,
autospec=True)
def test_delete_configuration_with_non_realtime_controller(
self, mock__reset_raid_config, mock_commit_config,
mock_validate_job_queue, mock_list_raid_controllers,
mock_get_drac_client):
mock_client = mock.Mock()
mock_get_drac_client.return_value = mock_client
expected_raid_config_params = ['AHCI.Slot.3-1', 'RAID.Integrated.1-1']
mix_controllers = [{'id': 'AHCI.Slot.3-1',
'description': 'AHCI controller in slot 3',
'manufacturer': 'DELL',
'model': 'BOSS-S1',
'primary_status': 'unknown',
'firmware_version': '2.5.13.3016',
'bus': '5E',
'supports_realtime': False},
{'id': 'RAID.Integrated.1-1',
'description': 'Integrated RAID Controller 1',
'manufacturer': 'DELL',
'model': 'PERC H740 Mini',
'primary_status': 'unknown',
'firmware_version': '50.5.0-1750',
'bus': '3C',
'supports_realtime': True}]
mock_list_raid_controllers.return_value = [
test_utils.make_raid_controller(controller) for
controller in mix_controllers]
mock_commit_config.side_effect = ['42', '12']
mock__reset_raid_config.side_effect = [{
'is_reboot_required': constants.RebootRequired.optional,
'is_commit_required': True
}, {
'is_reboot_required': constants.RebootRequired.true,
'is_commit_required': True
}]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
return_value = task.driver.raid.delete_configuration(task)
mock_commit_config.assert_has_calls(
[mock.call(mock.ANY, raid_controller='AHCI.Slot.3-1',
reboot=False, realtime=False),
mock.call(mock.ANY, raid_controller='RAID.Integrated.1-1',
reboot=True, realtime=False)],
any_order=True)
self.assertEqual(states.CLEANWAIT, return_value)
self.node.refresh()
self.assertEqual(expected_raid_config_params,
self.node.driver_internal_info[
'raid_config_parameters'])
self.assertEqual(['42', '12'],
self.node.driver_internal_info['raid_config_job_ids'])
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
autospec=True)
@mock.patch.object(drac_raid, 'list_raid_controllers', autospec=True)

View File

@ -0,0 +1,9 @@
fixes:
- |
Fixes a bug in the ``idrac`` hardware type where a race condition
can occur on a host that has a mix of controllers where some support
realtime mode and some do not. The approach is to use only realtime
mode if all controllers support realtime. This removes the race
condition.
See bug `2006502 https://storyboard.openstack.org/#!/story/2006502`
for details