Fix idrac-wsman having Completed with Errors jobs

iDRAC jobs can finish in 'Completed', 'Failed' and also
'Completed with Errors' state. This fix adds handling of
'Completed with Errors' as finished failed job otherwise node
stays in wait state as it does not consider such jobs
as finished.

Change-Id: I5018bf8ef6c86c6d303258f1497fa83d33b3cb76
This commit is contained in:
Aija Jauntēva 2021-09-17 10:17:25 -04:00
parent de50ff2df5
commit 6e0c0e7fd0
5 changed files with 73 additions and 2 deletions

View File

@ -292,7 +292,8 @@ class DracWSManBIOS(base.BIOSInterface):
if config_job is None or config_job.status == 'Completed': if config_job is None or config_job.status == 'Completed':
finished_job_ids.append(config_job_id) finished_job_ids.append(config_job_id)
elif config_job.status == 'Failed': elif (config_job.status == 'Failed'
or config_job.status == 'Completed with Errors'):
finished_job_ids.append(config_job_id) finished_job_ids.append(config_job_id)
job_failed = True job_failed = True

View File

@ -1815,7 +1815,8 @@ class DracWSManRAID(base.RAIDInterface):
if config_job is None or config_job.status == 'Completed': if config_job is None or config_job.status == 'Completed':
finished_job_ids.append(config_job_id) finished_job_ids.append(config_job_id)
elif config_job.status == 'Failed': elif (config_job.status == 'Failed'
or config_job.status == 'Completed with Errors'):
finished_job_ids.append(config_job_id) finished_job_ids.append(config_job_id)
self._set_raid_config_job_failure(node) self._set_raid_config_job_failure(node)

View File

@ -329,6 +329,34 @@ class DracWSManBIOSConfigurationTestCase(test_utils.BaseDracTest):
mock_cleaning_error_handler.assert_called_once_with( mock_cleaning_error_handler.assert_called_once_with(
task, mock.ANY, "Failed config job: 123. Message: 'Invalid'.") task, mock.ANY, "Failed config job: 123. Message: 'Invalid'.")
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
@mock.patch.object(drac_job, 'get_job', spec_set=True,
autospec=True)
def test__check_node_bios_jobs_completed_with_errors(
self, mock_get_job, mock_cleaning_error_handler):
mock_job = mock.Mock()
mock_job.status = 'Completed with Errors'
mock_job.id = '123'
mock_job.message = 'PR31: Completed with Errors'
mock_get_job.return_value = mock_job
with task_manager.acquire(self.context, self.node.uuid) as task:
driver_internal_info = task.node.driver_internal_info
driver_internal_info['bios_config_job_ids'] = ['123']
task.node.driver_internal_info = driver_internal_info
task.node.clean_step = {'priority': 100, 'interface': 'bios',
'step': 'factory_reset', 'argsinfo': {}}
task.node.save()
task.driver.bios._check_node_bios_jobs(task)
self.assertEqual([],
task.node.driver_internal_info.get(
'bios_config_job_ids'))
mock_cleaning_error_handler.assert_called_once_with(
task, mock.ANY, "Failed config job: 123. Message: "
"'PR31: Completed with Errors'.")
def test__check_last_system_inventory_changed_different_inventory_time( def test__check_last_system_inventory_changed_different_inventory_time(
self): self):
with task_manager.acquire(self.context, self.node.uuid, with task_manager.acquire(self.context, self.node.uuid,

View File

@ -236,6 +236,39 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
self.assertEqual({}, self.node.raid_config) self.assertEqual({}, self.node.raid_config)
mock_cleaning_error_handler.assert_called_once_with(task, mock.ANY) mock_cleaning_error_handler.assert_called_once_with(task, mock.ANY)
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
autospec=True)
def test__check_node_raid_jobs_with_completed_with_errors_job(
self, mock_get_drac_client, mock_cleaning_error_handler):
# mock node.driver_internal_info and node.clean_step
driver_internal_info = {'raid_config_job_ids': ['42']}
self.node.driver_internal_info = driver_internal_info
self.node.clean_step = {'foo': 'bar'}
self.node.save()
# mock task
task = mock.Mock(node=self.node, context=self.context)
# mock dracclient.get_job
self.job['status'] = 'Completed with Errors'
self.job['message'] = 'PR31: Completed with Errors'
mock_client = mock.Mock()
mock_get_drac_client.return_value = mock_client
mock_client.get_job.return_value = test_utils.dict_to_namedtuple(
values=self.job)
# mock dracclient.list_virtual_disks
mock_client.list_virtual_disks.return_value = [
test_utils.dict_to_namedtuple(values=self.virtual_disk)]
self.raid._check_node_raid_jobs(task)
mock_client.get_job.assert_called_once_with('42')
self.assertEqual(0, mock_client.list_virtual_disks.call_count)
self.node.refresh()
self.assertEqual([],
self.node.driver_internal_info['raid_config_job_ids'])
self.assertEqual({}, self.node.raid_config)
mock_cleaning_error_handler.assert_called_once_with(task, mock.ANY)
@mock.patch.object(manager_utils, 'deploying_error_handler', autospec=True) @mock.patch.object(manager_utils, 'deploying_error_handler', autospec=True)
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True, @mock.patch.object(drac_common, 'get_drac_client', spec_set=True,

View File

@ -0,0 +1,8 @@
---
fixes:
- |
Fixes ``idrac-wsman`` BIOS and RAID interface steps to correctly check
status of iDRAC job that completed with errors. Now these jobs are treated
as failures. Before this fix node stayed in wait state as it was only
checking for "Completed" or "Failed" job status, but not "Completed
with Errors".