From dd9c5fcedfe4acdeaf8dd525ddc41a62229d4d53 Mon Sep 17 00:00:00 2001 From: Pradip Kadam Date: Thu, 1 Aug 2019 08:53:16 -0400 Subject: [PATCH] DRAC : clear_job_queue clean step to fix pending bios config jobs If pending bios config job for changing the boot mode is in job queue on an overcloud node, then ironic will abandon the introspection attempt for the node, which will cause overall introspection to fail. Added clear_job_queue cleaning step in the iDRAC driver that clears the Lifecycle Controller job queue even if there is pending bios config job or any other pending job in job queue. Change-Id: I35e4c61dd44384429224dee6de9827d8debe9cf9 Story: 2006322 Task: 36053 --- ironic/drivers/modules/drac/management.py | 36 +++- .../drivers/modules/drac/test_management.py | 178 ++++++++++++++---- ...step_clear_job_queue-7b774d8d0e36d1b2.yaml | 11 ++ 3 files changed, 185 insertions(+), 40 deletions(-) create mode 100644 releasenotes/notes/add_clean_step_clear_job_queue-7b774d8d0e36d1b2.yaml diff --git a/ironic/drivers/modules/drac/management.py b/ironic/drivers/modules/drac/management.py index 339f3268d2..ee4767b669 100644 --- a/ironic/drivers/modules/drac/management.py +++ b/ironic/drivers/modules/drac/management.py @@ -80,6 +80,9 @@ _NON_PERSISTENT_BOOT_MODE = 'OneTime' # Clear job id's constant _CLEAR_JOB_IDS = 'JID_CLEARALL_FORCE' +# BIOS pending job constant +_BIOS_JOB_NAME = 'BIOS.Setup.1-1' + def _get_boot_device(node, drac_boot_devices=None): client = drac_common.get_drac_client(node) @@ -187,10 +190,19 @@ def set_boot_device(node, device, persistent=False): :raises: DracOperationError on an error from python-dracclient. """ - drac_job.validate_job_queue(node) - client = drac_common.get_drac_client(node) + # If pending BIOS job found in job queue, we need to clear that job + # before executing cleaning step of management interface. + # Otherwise, pending BIOS config job can cause creating new config jobs + # to fail. + unfinished_jobs = drac_job.list_unfinished_jobs(node) + if unfinished_jobs: + unfinished_bios_jobs = [job.id for job in unfinished_jobs if + _BIOS_JOB_NAME in job.name] + if unfinished_bios_jobs: + client.delete_jobs(job_ids=unfinished_bios_jobs) + try: drac_boot_devices = client.list_boot_devices() @@ -414,3 +426,23 @@ class DracManagement(base.ManagementInterface): client = drac_common.get_drac_client(node) client.reset_idrac(force=True, wait=True) client.delete_jobs(job_ids=[_CLEAR_JOB_IDS]) + + @METRICS.timer('DracManagement.clear_job_queue') + @base.clean_step(priority=0) + def clear_job_queue(self, task): + """Clear the job queue. + + :param task: a TaskManager instance containing the node to act on. + :returns: None if it is completed. + :raises: DracOperationError on an error from python-dracclient. + """ + try: + node = task.node + + client = drac_common.get_drac_client(node) + client.delete_jobs(job_ids=[_CLEAR_JOB_IDS]) + except drac_exceptions.BaseClientException as exc: + LOG.error('DRAC driver failed to clear the job queue for node ' + '%(node_uuid)s. Reason: %(error)s.', + {'node_uuid': node.uuid, 'error': exc}) + raise exception.DracOperationError(error=exc) diff --git a/ironic/tests/unit/drivers/modules/drac/test_management.py b/ironic/tests/unit/drivers/modules/drac/test_management.py index 447d64e95a..c16662e3c4 100644 --- a/ironic/tests/unit/drivers/modules/drac/test_management.py +++ b/ironic/tests/unit/drivers/modules/drac/test_management.py @@ -248,15 +248,17 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): spec_set=True, autospec=True) @mock.patch.object(drac_mgmt, '_get_boot_device', spec_set=True, autospec=True) - @mock.patch.object(drac_job, 'validate_job_queue', spec_set=True, + @mock.patch.object(drac_job, 'list_unfinished_jobs', spec_set=True, autospec=True) - def test_set_boot_device(self, mock_validate_job_queue, + def test_set_boot_device(self, mock_list_unfinished_jobs, mock__get_boot_device, mock__get_next_persistent_boot_mode, mock_get_drac_client): mock_client = mock.Mock() mock_get_drac_client.return_value = mock_client mock_client.list_boot_devices.return_value = self.boot_devices['IPL'] + mock_list_unfinished_jobs.return_value = [] + mock_job = mock.Mock() mock_job.status = "Scheduled" mock_client.get_job.return_value = mock_job @@ -269,7 +271,7 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): boot_device = drac_mgmt.set_boot_device( self.node, ironic.common.boot_devices.PXE, persistent=False) - mock_validate_job_queue.assert_called_once_with(self.node) + mock_list_unfinished_jobs.assert_called_once_with(self.node) mock_client.change_boot_device_order.assert_called_once_with( 'OneTime', 'BIOS.Setup.1-1#BootSeq#NIC.Embedded.1-1-1') self.assertEqual(0, mock_client.set_bios_settings.call_count) @@ -279,10 +281,10 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): spec_set=True, autospec=True) @mock.patch.object(drac_mgmt, '_get_boot_device', spec_set=True, autospec=True) - @mock.patch.object(drac_job, 'validate_job_queue', spec_set=True, + @mock.patch.object(drac_job, 'list_unfinished_jobs', spec_set=True, autospec=True) def test_set_boot_device_called_with_no_change( - self, mock_validate_job_queue, mock__get_boot_device, + self, mock_list_unfinished_jobs, mock__get_boot_device, mock__get_next_persistent_boot_mode, mock_get_drac_client): mock_client = mock.Mock() mock_get_drac_client.return_value = mock_client @@ -291,11 +293,12 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): 'persistent': True} mock__get_boot_device.return_value = boot_device mock__get_next_persistent_boot_mode.return_value = 'IPL' + mock_list_unfinished_jobs.return_value = [] boot_device = drac_mgmt.set_boot_device( self.node, ironic.common.boot_devices.PXE, persistent=True) - mock_validate_job_queue.assert_called_once_with(self.node) + mock_list_unfinished_jobs.assert_called_once_with(self.node) self.assertEqual(0, mock_client.change_boot_device_order.call_count) self.assertEqual(0, mock_client.set_bios_settings.call_count) self.assertEqual(0, mock_client.commit_pending_bios_changes.call_count) @@ -308,17 +311,18 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): spec_set=True, autospec=True) @mock.patch.object(drac_mgmt, '_get_boot_device', spec_set=True, autospec=True) - @mock.patch.object(drac_job, 'validate_job_queue', spec_set=True, + @mock.patch.object(drac_job, 'list_unfinished_jobs', spec_set=True, autospec=True) def test_set_boot_device_called_with_no_drac_boot_device( - self, mock_validate_job_queue, mock__get_boot_device, - mock__get_next_persistent_boot_mode, + self, mock_list_unfinished_jobs, + mock__get_boot_device, mock__get_next_persistent_boot_mode, mock__is_boot_order_flexibly_programmable, mock__flexibly_program_boot_order, mock_get_drac_client): mock_client = mock.Mock() mock_get_drac_client.return_value = mock_client mock_client.list_boot_devices.return_value = self.boot_devices['UEFI'] + mock_list_unfinished_jobs.return_value = [] mock_job = mock.Mock() mock_job.status = "Scheduled" @@ -354,7 +358,7 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): drac_mgmt.set_boot_device(self.node, ironic.common.boot_devices.DISK, persistent=True) - mock_validate_job_queue.assert_called_once_with(self.node) + mock_list_unfinished_jobs.assert_called_once_with(self.node) self.assertEqual(0, mock_client.change_boot_device_order.call_count) mock_client.set_bios_settings.assert_called_once_with( flexibly_program_settings) @@ -366,15 +370,16 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): spec_set=True, autospec=True) @mock.patch.object(drac_mgmt, '_get_boot_device', spec_set=True, autospec=True) - @mock.patch.object(drac_job, 'validate_job_queue', spec_set=True, + @mock.patch.object(drac_job, 'list_unfinished_jobs', spec_set=True, autospec=True) def test_set_boot_device_called_with_not_flexibly_programmable( - self, mock_validate_job_queue, mock__get_boot_device, - mock__get_next_persistent_boot_mode, + self, mock_list_unfinished_jobs, + mock__get_boot_device, mock__get_next_persistent_boot_mode, mock__is_boot_order_flexibly_programmable, mock_get_drac_client): mock_client = mock.Mock() mock_get_drac_client.return_value = mock_client + mock_list_unfinished_jobs.return_value = [] mock_client.list_boot_devices.return_value = self.boot_devices['UEFI'] boot_device = {'boot_device': ironic.common.boot_devices.PXE, 'persistent': False} @@ -386,7 +391,7 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): drac_mgmt.set_boot_device, self.node, ironic.common.boot_devices.CDROM, persistent=False) - mock_validate_job_queue.assert_called_once_with(self.node) + mock_list_unfinished_jobs.assert_called_once_with(self.node) self.assertEqual(0, mock_client.change_boot_device_order.call_count) self.assertEqual(0, mock_client.set_bios_settings.call_count) self.assertEqual(0, mock_client.commit_pending_bios_changes.call_count) @@ -397,15 +402,16 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): spec_set=True, autospec=True) @mock.patch.object(drac_mgmt, '_get_boot_device', spec_set=True, autospec=True) - @mock.patch.object(drac_job, 'validate_job_queue', spec_set=True, + @mock.patch.object(drac_job, 'list_unfinished_jobs', spec_set=True, autospec=True) def test_set_boot_device_called_with_unknown_boot_mode( - self, mock_validate_job_queue, mock__get_boot_device, + self, mock_list_unfinished_jobs, mock__get_boot_device, mock__get_next_persistent_boot_mode, mock__is_boot_order_flexibly_programmable, mock_get_drac_client): mock_client = mock.Mock() mock_get_drac_client.return_value = mock_client + mock_client.list_boot_devices.return_value = self.boot_devices['UEFI'] boot_device = {'boot_device': ironic.common.boot_devices.PXE, 'persistent': False} @@ -426,29 +432,11 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): values=s) for s in settings} mock_client.list_bios_settings.return_value = bios_settings mock__is_boot_order_flexibly_programmable.return_value = True - + mock_list_unfinished_jobs.return_value = [] self.assertRaises(exception.DracOperationError, drac_mgmt.set_boot_device, self.node, ironic.common.boot_devices.DISK, persistent=True) - - mock_validate_job_queue.assert_called_once_with(self.node) - self.assertEqual(0, mock_client.change_boot_device_order.call_count) - self.assertEqual(0, mock_client.set_bios_settings.call_count) - self.assertEqual(0, mock_client.commit_pending_bios_changes.call_count) - - @mock.patch.object(drac_job, 'validate_job_queue', spec_set=True, - autospec=True) - def test_set_boot_device_with_invalid_job_queue( - self, mock_validate_job_queue, mock_get_drac_client): - mock_client = mock.Mock() - mock_get_drac_client.return_value = mock_client - mock_validate_job_queue.side_effect = exception.DracOperationError( - 'boom') - - self.assertRaises(exception.DracOperationError, - drac_mgmt.set_boot_device, self.node, - ironic.common.boot_devices.PXE, persistent=True) - + mock_list_unfinished_jobs.assert_called_once_with(self.node) self.assertEqual(0, mock_client.change_boot_device_order.call_count) self.assertEqual(0, mock_client.set_bios_settings.call_count) self.assertEqual(0, mock_client.commit_pending_bios_changes.call_count) @@ -459,11 +447,11 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): spec_set=True, autospec=True) @mock.patch.object(drac_mgmt, '_get_boot_device', spec_set=True, autospec=True) - @mock.patch.object(drac_job, 'validate_job_queue', spec_set=True, + @mock.patch.object(drac_job, 'list_unfinished_jobs', spec_set=True, autospec=True) def test_set_boot_device_job_not_scheduled( self, - mock_validate_job_queue, + mock_list_unfinished_jobs, mock__get_boot_device, mock__get_next_persistent_boot_mode, mock_sleep, @@ -471,6 +459,7 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): mock_get_drac_client): mock_client = mock.Mock() mock_get_drac_client.return_value = mock_client + mock_list_unfinished_jobs.return_value = [] mock_client.list_boot_devices.return_value = self.boot_devices['IPL'] mock_job = mock.Mock() mock_job.status = "New" @@ -486,6 +475,107 @@ class DracManagementInternalMethodsTestCase(test_utils.BaseDracTest): drac_mgmt.set_boot_device, self.node, ironic.common.boot_devices.PXE, persistent=True) + mock_list_unfinished_jobs.assert_called_once_with(self.node) + + @mock.patch.object(drac_job, 'list_unfinished_jobs', spec_set=True, + autospec=True) + def test_set_boot_device_with_list_unfinished_jobs_fail( + self, mock_list_unfinished_jobs, mock_get_drac_client): + mock_client = mock.Mock() + mock_get_drac_client.return_value = mock_client + + mock_list_unfinished_jobs.side_effect = exception.DracOperationError( + 'boom') + + self.assertRaises(exception.DracOperationError, + drac_mgmt.set_boot_device, self.node, + ironic.common.boot_devices.PXE, persistent=True) + + self.assertEqual(0, mock_client.change_boot_device_order.call_count) + self.assertEqual(0, mock_client.set_bios_settings.call_count) + self.assertEqual(0, mock_client.commit_pending_bios_changes.call_count) + + @mock.patch.object(drac_job, 'list_unfinished_jobs', spec_set=True, + autospec=True) + @mock.patch.object(drac_mgmt, '_get_boot_device', spec_set=True, + autospec=True) + @mock.patch.object(drac_mgmt, '_get_next_persistent_boot_mode', + spec_set=True, autospec=True) + def test_set_boot_device_with_list_unfinished_jobs( + self, mock__get_next_persistent_boot_mode, mock__get_boot_device, + mock_list_unfinished_jobs, mock_get_drac_client): + mock_client = mock.Mock() + mock_get_drac_client.return_value = mock_client + + bios_job_dict = { + 'id': 'JID_602553293345', + 'name': 'ConfigBIOS:BIOS.Setup.1-1', + 'start_time': 'TIME_NOW', + 'until_time': 'TIME_NA', + 'message': 'Task successfully scheduled.', + 'status': 'Scheduled', + 'percent_complete': 0} + bios_job = test_utils.make_job(bios_job_dict) + + mock_list_unfinished_jobs.return_value = [bios_job] + mock_client.list_boot_devices.return_value = self.boot_devices['IPL'] + boot_device = {'boot_device': ironic.common.boot_devices.DISK, + 'persistent': True} + + mock__get_boot_device.return_value = boot_device + mock__get_next_persistent_boot_mode.return_value = 'IPL' + + drac_mgmt.set_boot_device(self.node, ironic.common.boot_devices.DISK, + persistent=True) + mock_list_unfinished_jobs.assert_called_once_with(self.node) + mock_client.delete_jobs.assert_called_once_with( + job_ids=['JID_602553293345']) + + @mock.patch.object(drac_job, 'list_unfinished_jobs', spec_set=True, + autospec=True) + @mock.patch.object(drac_mgmt, '_get_boot_device', spec_set=True, + autospec=True) + @mock.patch.object(drac_mgmt, '_get_next_persistent_boot_mode', + spec_set=True, autospec=True) + def test_set_boot_device_with_multiple_unfinished_jobs( + self, mock__get_next_persistent_boot_mode, mock__get_boot_device, + mock_list_unfinished_jobs, mock_get_drac_client): + mock_client = mock.Mock() + mock_get_drac_client.return_value = mock_client + + job_dict = { + 'id': 'JID_602553293345', + 'name': 'Config:RAID:RAID.Integrated.1-1', + 'start_time': 'TIME_NOW', + 'until_time': 'TIME_NA', + 'message': 'Task successfully scheduled.', + 'status': 'Scheduled', + 'percent_complete': 0} + job = test_utils.make_job(job_dict) + + bios_job_dict = { + 'id': 'JID_602553293346', + 'name': 'ConfigBIOS:BIOS.Setup.1-1', + 'start_time': 'TIME_NOW', + 'until_time': 'TIME_NA', + 'message': 'Task successfully scheduled.', + 'status': 'Scheduled', + 'percent_complete': 0} + bios_job = test_utils.make_job(bios_job_dict) + + mock_list_unfinished_jobs.return_value = [job, bios_job] + mock_client.list_boot_devices.return_value = self.boot_devices['IPL'] + boot_device = {'boot_device': ironic.common.boot_devices.DISK, + 'persistent': True} + + mock__get_boot_device.return_value = boot_device + mock__get_next_persistent_boot_mode.return_value = 'IPL' + + drac_mgmt.set_boot_device(self.node, ironic.common.boot_devices.DISK, + persistent=True) + mock_list_unfinished_jobs.assert_called_once_with(self.node) + mock_client.delete_jobs.assert_called_once_with( + job_ids=['JID_602553293346']) @mock.patch.object(drac_common, 'get_drac_client', spec_set=True, @@ -600,3 +690,15 @@ class DracManagementTestCase(test_utils.BaseDracTest): job_ids=['JID_CLEARALL_FORCE']) self.assertIsNone(return_value) + + def test_clear_job_queue(self, mock_get_drac_client): + mock_client = mock.Mock() + mock_get_drac_client.return_value = mock_client + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + return_value = task.driver.management.clear_job_queue(task) + mock_client.delete_jobs.assert_called_once_with( + job_ids=['JID_CLEARALL_FORCE']) + + self.assertIsNone(return_value) diff --git a/releasenotes/notes/add_clean_step_clear_job_queue-7b774d8d0e36d1b2.yaml b/releasenotes/notes/add_clean_step_clear_job_queue-7b774d8d0e36d1b2.yaml new file mode 100644 index 0000000000..0232fc7ffc --- /dev/null +++ b/releasenotes/notes/add_clean_step_clear_job_queue-7b774d8d0e36d1b2.yaml @@ -0,0 +1,11 @@ +--- +features: + - | + Adds ``clear_job_queue`` cleaning step to hardware type ``idrac``. + ``clear_job_queue`` cleaning step clears the Lifecycle Controller + job queue including any pending jobs. +fixes: + - | + Fixes an issue where if there is a pending bios config job in job queue, + then ironic will abandon the introspection attempt for the node, + which will cause overall introspection to fail.