From 9afa9b86d1c5fd11bd34a321708237d26ebeedfc Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Tue, 13 Apr 2021 13:43:39 +0200 Subject: [PATCH] Process in-band deploy steps on fast-track Currently we're only recording them, but do not validate and take into account. This change fixes it. The deployment code has been updated to account for the fact that deploy steps can change in run time. Change-Id: I01bd9e3a11fed68213bb392c04aa1d33bbe16b30 --- ironic/conductor/deployments.py | 51 ++++++++++++------- ironic/drivers/modules/agent.py | 2 + .../tests/unit/conductor/test_deployments.py | 45 ++++++++++++++-- .../fast-track-validate-723f27986a012ffe.yaml | 4 ++ 4 files changed, 80 insertions(+), 22 deletions(-) create mode 100644 releasenotes/notes/fast-track-validate-723f27986a012ffe.yaml diff --git a/ironic/conductor/deployments.py b/ironic/conductor/deployments.py index 6608e5c9ad..e146a26d60 100644 --- a/ironic/conductor/deployments.py +++ b/ironic/conductor/deployments.py @@ -236,22 +236,29 @@ def do_next_deploy_step(task, step_index): to execute. """ node = task.node - if step_index is None: - steps = [] - else: - steps = node.driver_internal_info['deploy_steps'][step_index:] - LOG.info('Executing %(state)s on node %(node)s, remaining steps: ' - '%(steps)s', {'node': node.uuid, 'steps': steps, - 'state': node.provision_state}) + def _iter_steps(): + if step_index is None: + return # short-circuit to the end + idx = step_index + # The list can change in-flight, do not cache it! + while idx < len(node.driver_internal_info['deploy_steps']): + yield idx, node.driver_internal_info['deploy_steps'][idx] + idx += 1 - # Execute each step until we hit an async step or run out of steps - for ind, step in enumerate(steps): + # Execute each step until we hit an async step or run out of steps, keeping + # in mind that the steps list can be modified in-flight. + for idx, step in _iter_steps(): + LOG.info('Deploying on node %(node)s, remaining steps: ' + '%(steps)s', { + 'node': node.uuid, + 'steps': node.driver_internal_info['deploy_steps'][idx:], + }) # Save which step we're about to start so we can restart # if necessary node.deploy_step = step driver_internal_info = node.driver_internal_info - driver_internal_info['deploy_step_index'] = step_index + ind + driver_internal_info['deploy_step_index'] = idx node.driver_internal_info = driver_internal_info node.save() interface = getattr(task.driver, step.get('interface')) @@ -346,6 +353,19 @@ def do_next_deploy_step(task, step_index): {'node': node.uuid, 'instance': node.instance_uuid}) +@task_manager.require_exclusive_lock +def validate_deploy_steps(task): + """Validate the deploy steps after the ramdisk learns about them.""" + conductor_steps.validate_user_deploy_steps_and_templates(task) + conductor_steps.set_node_deployment_steps( + task, reset_current=False) + + info = task.node.driver_internal_info + info['steps_validated'] = True + task.node.driver_internal_info = info + task.node.save() + + @utils.fail_on_error(utils.deploying_error_handler, _("Unexpected error when processing next deploy step")) @task_manager.require_exclusive_lock @@ -361,22 +381,15 @@ def continue_node_deploy(task): node = task.node # Agent is now running, we're ready to validate the remaining steps - if not node.driver_internal_info.get('steps_validated'): + if not task.node.driver_internal_info.get('steps_validated'): try: - conductor_steps.validate_user_deploy_steps_and_templates(task) - conductor_steps.set_node_deployment_steps( - task, reset_current=False) + validate_deploy_steps(task) except exception.IronicException as exc: msg = _('Failed to validate the final deploy steps list ' 'for node %(node)s: %(exc)s') % {'node': node.uuid, 'exc': exc} return utils.deploying_error_handler(task, msg) - info = node.driver_internal_info - info['steps_validated'] = True - node.driver_internal_info = info - node.save() - next_step_index = utils.update_next_step_index(task, 'deploy') do_next_deploy_step(task, next_step_index) diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 48779b963e..bcef5495e1 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -26,6 +26,7 @@ from ironic.common import images from ironic.common import raid from ironic.common import states from ironic.common import utils +from ironic.conductor import deployments from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils from ironic.conf import CONF @@ -519,6 +520,7 @@ class AgentDeploy(AgentDeployMixin, agent_base.AgentBaseMixin, # NOTE(dtantsur): while the node is up and heartbeating, we don't # necessary have the deploy steps cached. Force a refresh here. self.refresh_steps(task, 'deploy') + deployments.validate_deploy_steps(task) elif task.driver.storage.should_write_image(task): # Check if the driver has already performed a reboot in a previous # deploy step. diff --git a/ironic/tests/unit/conductor/test_deployments.py b/ironic/tests/unit/conductor/test_deployments.py index f0d09952b9..f0e8944611 100644 --- a/ironic/tests/unit/conductor/test_deployments.py +++ b/ironic/tests/unit/conductor/test_deployments.py @@ -435,8 +435,7 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, self.in_band_step = { 'step': 'deploy_middle', 'priority': 30, 'interface': 'deploy'} - @mock.patch.object(deployments, 'LOG', autospec=True) - def test__do_next_deploy_step_none(self, mock_log): + def test__do_next_deploy_step_none(self): self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware') task = task_manager.TaskManager(self.context, node.uuid) @@ -446,7 +445,6 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, node.refresh() self.assertEqual(states.ACTIVE, node.provision_state) - self.assertEqual(2, mock_log.info.call_count) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step', autospec=True) @@ -616,6 +614,47 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, self.assertNotIn('agent_url', node.driver_internal_info) self.assertNotIn('agent_secret_token', node.driver_internal_info) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step', + autospec=True) + def test__do_next_deploy_step_dynamic(self, mock_execute): + # Simulate adding more steps in runtime. + driver_internal_info = {'deploy_step_index': None, + 'deploy_steps': self.deploy_steps[:1], + 'agent_url': 'url', + 'agent_secret_token': 'token'} + self._start_service() + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + driver_internal_info=driver_internal_info, + deploy_step={}) + + def _fake_execute(iface, task, step): + dii = task.node.driver_internal_info + dii['deploy_steps'] = self.deploy_steps + task.node.driver_internal_info = dii + task.node.save() + + mock_execute.side_effect = _fake_execute + + task = task_manager.TaskManager(self.context, node.uuid) + task.process_event('deploy') + + deployments.do_next_deploy_step(task, 0) + + # Deploying should be complete + node.refresh() + self.assertIsNone(node.last_error) + self.assertEqual(states.ACTIVE, node.provision_state) + self.assertEqual(states.NOSTATE, node.target_provision_state) + self.assertEqual({}, node.deploy_step) + self.assertNotIn('deploy_step_index', node.driver_internal_info) + self.assertIsNone(node.driver_internal_info['deploy_steps']) + mock_execute.assert_has_calls( + [mock.call(task.driver.deploy, task, self.deploy_steps[0]), + mock.call(task.driver.deploy, task, self.deploy_steps[1])]) + self.assertNotIn('agent_url', node.driver_internal_info) + self.assertNotIn('agent_secret_token', node.driver_internal_info) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step', autospec=True) def test__do_next_deploy_step_fast_track(self, mock_execute): diff --git a/releasenotes/notes/fast-track-validate-723f27986a012ffe.yaml b/releasenotes/notes/fast-track-validate-723f27986a012ffe.yaml new file mode 100644 index 0000000000..dea755aa24 --- /dev/null +++ b/releasenotes/notes/fast-track-validate-723f27986a012ffe.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Correctly processes in-band deploy steps on fast-track deployment.