Ensure in-band deploy steps are present in time for fast-track deployments

Currently we load in-band deploy steps on the first heartbeat in DEPLOYWAIT.
With fast-track, however, this heartbeat may happen way too late, because
the node is up and heartbeating before it's moved to DEPLOYWAIT. This
results in in-band deploy steps not loaded. This change forces a refresh
of deploy steps in deploy.deploy if fast-track is used.

Also make sure that cached steps are cleared on reboot or power off since
they may become outdated next time the agent boots.

Change-Id: I003543452717183b9b3359e368757bcd824a5ce7
This commit is contained in:
Dmitry Tantsur 2020-08-21 12:38:45 +02:00
parent 1e25135bde
commit 7dd611dc5e
7 changed files with 42 additions and 6 deletions

View File

@ -284,14 +284,13 @@ def node_power_action(task, new_state, timeout=None):
driver_internal_info = node.driver_internal_info
driver_internal_info['last_power_state_change'] = str(
timeutils.utcnow().isoformat())
node.driver_internal_info = driver_internal_info
# NOTE(dtantsur): wipe token on shutting down, otherwise a reboot in
# fast-track (or an accidentally booted agent) will cause subsequent
# actions to fail.
if target_state in (states.POWER_OFF, states.SOFT_POWER_OFF,
states.REBOOT, states.SOFT_REBOOT):
if not is_agent_token_pregenerated(node):
driver_internal_info.pop('agent_secret_token', False)
node.driver_internal_info = driver_internal_info
wipe_internal_info_on_power_off(node)
node.save()
# take power action
@ -451,6 +450,19 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
task.process_event('fail', target_state=target_state)
def wipe_internal_info_on_power_off(node):
"""Wipe information that should not survive reboot/power off."""
driver_internal_info = node.driver_internal_info
if not is_agent_token_pregenerated(node):
# Wipe the token if it's not pre-generated, otherwise we'll refuse to
# generate it again for the newly booted agent.
driver_internal_info.pop('agent_secret_token', False)
# Wipe cached steps since they may change after reboot.
driver_internal_info.pop('agent_cached_deploy_steps', None)
driver_internal_info.pop('agent_cached_clean_steps', None)
node.driver_internal_info = driver_internal_info
def wipe_token_and_url(task):
"""Remove agent URL and token from the task."""
info = task.node.driver_internal_info

View File

@ -489,6 +489,9 @@ class AgentDeploy(AgentDeployMixin, agent_base.AgentBaseMixin,
# immediately to the next deploy step.
LOG.debug('Performing a fast track deployment for %(node)s.',
{'node': task.node.uuid})
# NOTE(dtantsur): while the node is up and heartbeating, we don't
# necessary have the deploy steps cached. Force a refresh here.
self.refresh_steps(task, 'deploy')
elif task.driver.storage.should_write_image(task):
# Check if the driver has already performed a reboot in a previous
# deploy step.

View File

@ -655,6 +655,9 @@ class ISCSIDeploy(agent_base.AgentDeployMixin, agent_base.AgentBaseMixin,
{'node': task.node.uuid})
deploy_utils.cache_instance_image(task.context, node)
check_image_size(task)
# NOTE(dtantsur): while the node is up and heartbeating, we don't
# necessary have the deploy steps cached. Force a refresh here.
self.refresh_steps(task, 'deploy')
elif task.driver.storage.should_write_image(task):
# Standard deploy process
deploy_utils.cache_instance_image(task.context, node)

View File

@ -225,7 +225,8 @@ class NodePowerActionTestCase(db_base.DbTestCase):
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
def test_node_power_action_power_off(self, get_power_mock):
"""Test node_power_action to turn node power off."""
dii = {'agent_secret_token': 'token'}
dii = {'agent_secret_token': 'token',
'agent_cached_deploy_steps': ['steps']}
node = obj_utils.create_test_node(self.context,
uuid=uuidutils.generate_uuid(),
driver='fake-hardware',
@ -243,6 +244,8 @@ class NodePowerActionTestCase(db_base.DbTestCase):
self.assertIsNone(node['target_power_state'])
self.assertIsNone(node['last_error'])
self.assertNotIn('agent_secret_token', node['driver_internal_info'])
self.assertNotIn('agent_cached_deploy_steps',
node['driver_internal_info'])
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
def test_node_power_action_power_off_pregenerated_token(self,

View File

@ -467,13 +467,15 @@ class TestAgentDeploy(db_base.DbTestCase):
self.assertIsNone(driver_return)
self.assertFalse(mock_power.called)
@mock.patch.object(agent.AgentDeploy, 'refresh_steps', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'prepare_image',
autospec=True)
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'prepare_instance', autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
def test_deploy_fast_track(self, power_mock, mock_pxe_instance,
mock_is_fast_track, prepare_image_mock):
mock_is_fast_track, prepare_image_mock,
refresh_mock):
mock_is_fast_track.return_value = True
self.node.target_provision_state = states.ACTIVE
self.node.provision_state = states.DEPLOYING
@ -488,6 +490,7 @@ class TestAgentDeploy(db_base.DbTestCase):
self.assertEqual(states.DEPLOYING, task.node.provision_state)
self.assertEqual(states.ACTIVE,
task.node.target_provision_state)
refresh_mock.assert_called_once_with(self.driver, task, 'deploy')
@mock.patch.object(noop_storage.NoopStorage, 'detach_volumes',
autospec=True)

View File

@ -855,6 +855,8 @@ class ISCSIDeployTestCase(db_base.DbTestCase):
self.assertIsNone(ret)
self.assertFalse(mock_node_power_action.called)
@mock.patch.object(iscsi_deploy.ISCSIDeploy, 'refresh_steps',
autospec=True)
@mock.patch.object(iscsi_deploy, 'check_image_size', autospec=True)
@mock.patch.object(deploy_utils, 'cache_instance_image', autospec=True)
@mock.patch.object(iscsi_deploy.ISCSIDeploy, 'write_image',
@ -864,7 +866,8 @@ class ISCSIDeployTestCase(db_base.DbTestCase):
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
def test_deploy_fast_track(self, power_mock, mock_pxe_instance,
mock_is_fast_track, write_image_mock,
cache_image_mock, check_image_size_mock):
cache_image_mock, check_image_size_mock,
refresh_mock):
mock_is_fast_track.return_value = True
self.node.target_provision_state = states.ACTIVE
self.node.provision_state = states.DEPLOYING
@ -885,6 +888,8 @@ class ISCSIDeployTestCase(db_base.DbTestCase):
cache_image_mock.assert_called_with(mock.ANY, task.node)
check_image_size_mock.assert_called_with(task)
self.assertFalse(write_image_mock.called)
refresh_mock.assert_called_once_with(task.driver.deploy,
task, 'deploy')
@mock.patch.object(noop_storage.NoopStorage, 'detach_volumes',
autospec=True)

View File

@ -0,0 +1,7 @@
---
fixes:
- |
Fixes an issue that caused in-band deploy steps inserted before
``write_image`` to be skipped when fast-track is used.
- |
Makes sure in-band deploy and clean steps are not cached across reboots.