diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index be81483d77..adbfb7b44b 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -3985,7 +3985,8 @@ def _do_next_deploy_step(task, step_index, conductor_id): LOG.info('Deploy step %(step)s on node %(node)s being ' 'executed asynchronously, waiting for driver.', {'node': node.uuid, 'step': step}) - task.process_event('wait') + if task.node.provision_state != states.DEPLOYWAIT: + task.process_event('wait') return elif result is not None: # NOTE(rloo): This is an internal/dev error; shouldn't happen. diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 9e1a067a06..9c6f693680 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -184,17 +184,13 @@ class AgentDeployMixin(agent_base_vendor.AgentDeployMixin): if not commands: return False - last_command = commands[-1] - - if last_command['command_name'] != 'prepare_image': - # catches race condition where prepare_image is still processing - # so deploy hasn't started yet + try: + last_command = next(cmd for cmd in reversed(commands) + if cmd['command_name'] == 'prepare_image') + except StopIteration: return False - - if last_command['command_status'] != 'RUNNING': - return True - - return False + else: + return last_command['command_status'] != 'RUNNING' @METRICS.timer('AgentDeployMixin.continue_deploy') @task_manager.require_exclusive_lock @@ -462,6 +458,7 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface): # the state machine state going from DEPLOYWAIT -> DEPLOYING task.process_event('wait') self.continue_deploy(task) + return states.DEPLOYWAIT elif task.driver.storage.should_write_image(task): # Check if the driver has already performed a reboot in a previous # deploy step. diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index 15d5b675a2..65fbc049ca 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -377,7 +377,7 @@ class TestAgentDeploy(db_base.DbTestCase): self.node.save() with task_manager.acquire( self.context, self.node['uuid'], shared=False) as task: - self.driver.deploy(task) + self.assertEqual(states.DEPLOYWAIT, self.driver.deploy(task)) self.assertFalse(power_mock.called) self.assertFalse(mock_pxe_instance.called) task.node.refresh() @@ -1529,6 +1529,27 @@ class TestAgentDeploy(db_base.DbTestCase): 'command_status': 'RUNNING'}] self.assertFalse(task.driver.deploy.deploy_is_done(task)) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_deploy_is_done_several_results(self, mock_get_cmd): + with task_manager.acquire(self.context, self.node.uuid) as task: + mock_get_cmd.return_value = [ + {'command_name': 'prepare_image', 'command_status': 'SUCCESS'}, + {'command_name': 'other_command', 'command_status': 'SUCCESS'}, + {'command_name': 'prepare_image', 'command_status': 'RUNNING'}, + ] + self.assertFalse(task.driver.deploy.deploy_is_done(task)) + + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_deploy_is_done_not_the_last(self, mock_get_cmd): + with task_manager.acquire(self.context, self.node.uuid) as task: + mock_get_cmd.return_value = [ + {'command_name': 'prepare_image', 'command_status': 'SUCCESS'}, + {'command_name': 'other_command', 'command_status': 'SUCCESS'}, + ] + self.assertTrue(task.driver.deploy.deploy_is_done(task)) + @mock.patch.object(manager_utils, 'restore_power_state_if_needed', autospec=True) @mock.patch.object(manager_utils, 'power_on_node_if_needed', diff --git a/releasenotes/notes/direct-fast-track-d0f43850b6e80751.yaml b/releasenotes/notes/direct-fast-track-d0f43850b6e80751.yaml new file mode 100644 index 0000000000..fee9738a9d --- /dev/null +++ b/releasenotes/notes/direct-fast-track-d0f43850b6e80751.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes fast-track deployments with the ``direct`` deploy interface that + used to hang previously.