Fix fast-track with the direct deploy interface
Several small fixes: 1) Make sure the deploy.deploy step returns DEPLOYWAIT after running prepare_image otherwise the conductor wrongly assumes that the deploy is done. 2) Handle the case when provision_state==DEPLOYWAIT when returning from an asynchronous deploy step. 3) Do not assume that prepare_image is always the last command to run, sometimes get_deploy_steps sneaks in. 4) Do not issue a deprecation warning when receiving "agent is busy" on get_deploy_steps, this is normal for fast-track. Change-Id: I19274c48bd36fca19961a7d78467ec8c29f85905
This commit is contained in:
parent
df439697ae
commit
f0803493de
@ -291,7 +291,8 @@ def do_next_deploy_step(task, step_index, conductor_id):
|
||||
LOG.info('Deploy step %(step)s on node %(node)s being '
|
||||
'executed asynchronously, waiting for driver.',
|
||||
{'node': node.uuid, 'step': step})
|
||||
task.process_event('wait')
|
||||
if task.node.provision_state != states.DEPLOYWAIT:
|
||||
task.process_event('wait')
|
||||
return
|
||||
elif result is not None:
|
||||
# NOTE(rloo): This is an internal/dev error; shouldn't happen.
|
||||
|
@ -186,17 +186,13 @@ class AgentDeployMixin(agent_base.AgentDeployMixin):
|
||||
if not commands:
|
||||
return False
|
||||
|
||||
last_command = commands[-1]
|
||||
|
||||
if last_command['command_name'] != 'prepare_image':
|
||||
# catches race condition where prepare_image is still processing
|
||||
# so deploy hasn't started yet
|
||||
try:
|
||||
last_command = next(cmd for cmd in reversed(commands)
|
||||
if cmd['command_name'] == 'prepare_image')
|
||||
except StopIteration:
|
||||
return False
|
||||
|
||||
if last_command['command_status'] != 'RUNNING':
|
||||
return True
|
||||
|
||||
return False
|
||||
else:
|
||||
return last_command['command_status'] != 'RUNNING'
|
||||
|
||||
@METRICS.timer('AgentDeployMixin.continue_deploy')
|
||||
@task_manager.require_exclusive_lock
|
||||
@ -487,6 +483,7 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
|
||||
# the state machine state going from DEPLOYWAIT -> DEPLOYING
|
||||
task.process_event('wait')
|
||||
self.continue_deploy(task)
|
||||
return states.DEPLOYWAIT
|
||||
elif task.driver.storage.should_write_image(task):
|
||||
# Check if the driver has already performed a reboot in a previous
|
||||
# deploy step.
|
||||
|
@ -722,10 +722,15 @@ class AgentDeployMixin(HeartbeatMixin):
|
||||
'steps': previous_steps})
|
||||
|
||||
call = getattr(self._client, 'get_%s_steps' % step_type)
|
||||
# TODO(dtantsur): remove the error handling in the V release.
|
||||
try:
|
||||
agent_result = call(node, task.ports).get('command_result', {})
|
||||
except exception.AgentAPIError as exc:
|
||||
if 'agent is busy' in str(exc):
|
||||
LOG.debug('Agent is busy with a command, will refresh steps '
|
||||
'on the next heartbeat')
|
||||
return
|
||||
|
||||
# TODO(dtantsur): change to just 'raise'
|
||||
if step_type == 'clean':
|
||||
raise
|
||||
else:
|
||||
|
@ -411,6 +411,39 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
mock_execute.assert_called_once_with(mock.ANY, task,
|
||||
self.deploy_steps[0])
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
|
||||
autospec=True)
|
||||
def test__do_next_deploy_step_in_deploywait(self, mock_execute):
|
||||
driver_internal_info = {'deploy_step_index': None,
|
||||
'deploy_steps': self.deploy_steps}
|
||||
self._start_service()
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
driver_internal_info=driver_internal_info,
|
||||
deploy_step={})
|
||||
|
||||
def fake_execute(interface, task, step):
|
||||
# A deploy step leaves the node in DEPLOYWAIT
|
||||
task.process_event('wait')
|
||||
return states.DEPLOYWAIT
|
||||
|
||||
mock_execute.side_effect = fake_execute
|
||||
expected_first_step = node.driver_internal_info['deploy_steps'][0]
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
task.process_event('deploy')
|
||||
|
||||
deployments.do_next_deploy_step(task, 0, self.service.conductor.id)
|
||||
|
||||
node.refresh()
|
||||
self.assertIsNone(node.last_error)
|
||||
self.assertEqual(states.DEPLOYWAIT, node.provision_state)
|
||||
self.assertEqual(states.ACTIVE, node.target_provision_state)
|
||||
self.assertEqual(expected_first_step, node.deploy_step)
|
||||
self.assertEqual(0, node.driver_internal_info['deploy_step_index'])
|
||||
self.assertEqual(self.service.conductor.id, node.conductor_affinity)
|
||||
mock_execute.assert_called_once_with(mock.ANY, task,
|
||||
self.deploy_steps[0])
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
|
||||
autospec=True)
|
||||
def test__do_next_deploy_step_continue_from_last_step(self, mock_execute):
|
||||
|
@ -492,7 +492,7 @@ class TestAgentDeploy(db_base.DbTestCase):
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node['uuid'], shared=False) as task:
|
||||
self.driver.deploy(task)
|
||||
self.assertEqual(states.DEPLOYWAIT, self.driver.deploy(task))
|
||||
self.assertFalse(power_mock.called)
|
||||
self.assertFalse(mock_pxe_instance.called)
|
||||
task.node.refresh()
|
||||
@ -1739,6 +1739,27 @@ class TestAgentDeploy(db_base.DbTestCase):
|
||||
'command_status': 'RUNNING'}]
|
||||
self.assertFalse(task.driver.deploy.deploy_is_done(task))
|
||||
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||
autospec=True)
|
||||
def test_deploy_is_done_several_results(self, mock_get_cmd):
|
||||
with task_manager.acquire(self.context, self.node.uuid) as task:
|
||||
mock_get_cmd.return_value = [
|
||||
{'command_name': 'prepare_image', 'command_status': 'SUCCESS'},
|
||||
{'command_name': 'other_command', 'command_status': 'SUCCESS'},
|
||||
{'command_name': 'prepare_image', 'command_status': 'RUNNING'},
|
||||
]
|
||||
self.assertFalse(task.driver.deploy.deploy_is_done(task))
|
||||
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||
autospec=True)
|
||||
def test_deploy_is_done_not_the_last(self, mock_get_cmd):
|
||||
with task_manager.acquire(self.context, self.node.uuid) as task:
|
||||
mock_get_cmd.return_value = [
|
||||
{'command_name': 'prepare_image', 'command_status': 'SUCCESS'},
|
||||
{'command_name': 'other_command', 'command_status': 'SUCCESS'},
|
||||
]
|
||||
self.assertTrue(task.driver.deploy.deploy_is_done(task))
|
||||
|
||||
@mock.patch.object(manager_utils, 'restore_power_state_if_needed',
|
||||
autospec=True)
|
||||
@mock.patch.object(manager_utils, 'power_on_node_if_needed',
|
||||
|
@ -2235,6 +2235,25 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
|
||||
self.assertEqual([self.clean_steps['clean_steps'][
|
||||
'SpecificHardwareManager'][1]], steps['raid'])
|
||||
|
||||
@mock.patch.object(agent_base.LOG, 'warning', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_deploy_steps',
|
||||
autospec=True)
|
||||
def test_refresh_steps_busy(self, client_mock, log_mock):
|
||||
client_mock.side_effect = exception.AgentAPIError(
|
||||
node="node", status="500", error='agent is busy')
|
||||
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.deploy.refresh_steps(task, 'deploy')
|
||||
|
||||
client_mock.assert_called_once_with(mock.ANY, task.node,
|
||||
task.ports)
|
||||
self.assertNotIn('agent_cached_deploy_steps_refreshed',
|
||||
task.node.driver_internal_info)
|
||||
self.assertIsNone(task.node.driver_internal_info.get(
|
||||
'agent_cached_deploy_steps'))
|
||||
self.assertFalse(log_mock.called)
|
||||
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
|
||||
autospec=True)
|
||||
def test_refresh_steps_missing_steps(self, client_mock):
|
||||
|
@ -0,0 +1,5 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes fast-track deployments with the ``direct`` deploy interface that
|
||||
used to hang previously.
|
Loading…
x
Reference in New Issue
Block a user