diff --git a/ironic/conductor/deployments.py b/ironic/conductor/deployments.py index 6608e5c9ad..e146a26d60 100644 --- a/ironic/conductor/deployments.py +++ b/ironic/conductor/deployments.py @@ -236,22 +236,29 @@ def do_next_deploy_step(task, step_index): to execute. """ node = task.node - if step_index is None: - steps = [] - else: - steps = node.driver_internal_info['deploy_steps'][step_index:] - LOG.info('Executing %(state)s on node %(node)s, remaining steps: ' - '%(steps)s', {'node': node.uuid, 'steps': steps, - 'state': node.provision_state}) + def _iter_steps(): + if step_index is None: + return # short-circuit to the end + idx = step_index + # The list can change in-flight, do not cache it! + while idx < len(node.driver_internal_info['deploy_steps']): + yield idx, node.driver_internal_info['deploy_steps'][idx] + idx += 1 - # Execute each step until we hit an async step or run out of steps - for ind, step in enumerate(steps): + # Execute each step until we hit an async step or run out of steps, keeping + # in mind that the steps list can be modified in-flight. + for idx, step in _iter_steps(): + LOG.info('Deploying on node %(node)s, remaining steps: ' + '%(steps)s', { + 'node': node.uuid, + 'steps': node.driver_internal_info['deploy_steps'][idx:], + }) # Save which step we're about to start so we can restart # if necessary node.deploy_step = step driver_internal_info = node.driver_internal_info - driver_internal_info['deploy_step_index'] = step_index + ind + driver_internal_info['deploy_step_index'] = idx node.driver_internal_info = driver_internal_info node.save() interface = getattr(task.driver, step.get('interface')) @@ -346,6 +353,19 @@ def do_next_deploy_step(task, step_index): {'node': node.uuid, 'instance': node.instance_uuid}) +@task_manager.require_exclusive_lock +def validate_deploy_steps(task): + """Validate the deploy steps after the ramdisk learns about them.""" + conductor_steps.validate_user_deploy_steps_and_templates(task) + conductor_steps.set_node_deployment_steps( + task, reset_current=False) + + info = task.node.driver_internal_info + info['steps_validated'] = True + task.node.driver_internal_info = info + task.node.save() + + @utils.fail_on_error(utils.deploying_error_handler, _("Unexpected error when processing next deploy step")) @task_manager.require_exclusive_lock @@ -361,22 +381,15 @@ def continue_node_deploy(task): node = task.node # Agent is now running, we're ready to validate the remaining steps - if not node.driver_internal_info.get('steps_validated'): + if not task.node.driver_internal_info.get('steps_validated'): try: - conductor_steps.validate_user_deploy_steps_and_templates(task) - conductor_steps.set_node_deployment_steps( - task, reset_current=False) + validate_deploy_steps(task) except exception.IronicException as exc: msg = _('Failed to validate the final deploy steps list ' 'for node %(node)s: %(exc)s') % {'node': node.uuid, 'exc': exc} return utils.deploying_error_handler(task, msg) - info = node.driver_internal_info - info['steps_validated'] = True - node.driver_internal_info = info - node.save() - next_step_index = utils.update_next_step_index(task, 'deploy') do_next_deploy_step(task, next_step_index) diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 48779b963e..bcef5495e1 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -26,6 +26,7 @@ from ironic.common import images from ironic.common import raid from ironic.common import states from ironic.common import utils +from ironic.conductor import deployments from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils from ironic.conf import CONF @@ -519,6 +520,7 @@ class AgentDeploy(AgentDeployMixin, agent_base.AgentBaseMixin, # NOTE(dtantsur): while the node is up and heartbeating, we don't # necessary have the deploy steps cached. Force a refresh here. self.refresh_steps(task, 'deploy') + deployments.validate_deploy_steps(task) elif task.driver.storage.should_write_image(task): # Check if the driver has already performed a reboot in a previous # deploy step. diff --git a/ironic/tests/unit/conductor/test_deployments.py b/ironic/tests/unit/conductor/test_deployments.py index f0d09952b9..f0e8944611 100644 --- a/ironic/tests/unit/conductor/test_deployments.py +++ b/ironic/tests/unit/conductor/test_deployments.py @@ -435,8 +435,7 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, self.in_band_step = { 'step': 'deploy_middle', 'priority': 30, 'interface': 'deploy'} - @mock.patch.object(deployments, 'LOG', autospec=True) - def test__do_next_deploy_step_none(self, mock_log): + def test__do_next_deploy_step_none(self): self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware') task = task_manager.TaskManager(self.context, node.uuid) @@ -446,7 +445,6 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, node.refresh() self.assertEqual(states.ACTIVE, node.provision_state) - self.assertEqual(2, mock_log.info.call_count) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step', autospec=True) @@ -616,6 +614,47 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, self.assertNotIn('agent_url', node.driver_internal_info) self.assertNotIn('agent_secret_token', node.driver_internal_info) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step', + autospec=True) + def test__do_next_deploy_step_dynamic(self, mock_execute): + # Simulate adding more steps in runtime. + driver_internal_info = {'deploy_step_index': None, + 'deploy_steps': self.deploy_steps[:1], + 'agent_url': 'url', + 'agent_secret_token': 'token'} + self._start_service() + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + driver_internal_info=driver_internal_info, + deploy_step={}) + + def _fake_execute(iface, task, step): + dii = task.node.driver_internal_info + dii['deploy_steps'] = self.deploy_steps + task.node.driver_internal_info = dii + task.node.save() + + mock_execute.side_effect = _fake_execute + + task = task_manager.TaskManager(self.context, node.uuid) + task.process_event('deploy') + + deployments.do_next_deploy_step(task, 0) + + # Deploying should be complete + node.refresh() + self.assertIsNone(node.last_error) + self.assertEqual(states.ACTIVE, node.provision_state) + self.assertEqual(states.NOSTATE, node.target_provision_state) + self.assertEqual({}, node.deploy_step) + self.assertNotIn('deploy_step_index', node.driver_internal_info) + self.assertIsNone(node.driver_internal_info['deploy_steps']) + mock_execute.assert_has_calls( + [mock.call(task.driver.deploy, task, self.deploy_steps[0]), + mock.call(task.driver.deploy, task, self.deploy_steps[1])]) + self.assertNotIn('agent_url', node.driver_internal_info) + self.assertNotIn('agent_secret_token', node.driver_internal_info) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step', autospec=True) def test__do_next_deploy_step_fast_track(self, mock_execute): diff --git a/releasenotes/notes/fast-track-validate-723f27986a012ffe.yaml b/releasenotes/notes/fast-track-validate-723f27986a012ffe.yaml new file mode 100644 index 0000000000..dea755aa24 --- /dev/null +++ b/releasenotes/notes/fast-track-validate-723f27986a012ffe.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Correctly processes in-band deploy steps on fast-track deployment.