Browse Source

Fix fast-track with the direct deploy interface

Several small fixes:

1) Make sure the deploy.deploy step returns DEPLOYWAIT after running
   prepare_image otherwise the conductor wrongly assumes that the
   deploy is done.
2) Handle the case when provision_state==DEPLOYWAIT when returning
   from an asynchronous deploy step.
3) Do not assume that prepare_image is always the last command to run,
   sometimes get_deploy_steps sneaks in.
4) Do not issue a deprecation warning when receiving "agent is busy"
   on get_deploy_steps, this is normal for fast-track.

Change-Id: I19274c48bd36fca19961a7d78467ec8c29f85905
(cherry picked from commit f0803493de)
changes/87/737687/1
Dmitry Tantsur 3 weeks ago
parent
commit
316a868da8
7 changed files with 94 additions and 13 deletions
  1. +2
    -1
      ironic/conductor/deployments.py
  2. +7
    -10
      ironic/drivers/modules/agent.py
  3. +6
    -1
      ironic/drivers/modules/agent_base.py
  4. +33
    -0
      ironic/tests/unit/conductor/test_deployments.py
  5. +22
    -1
      ironic/tests/unit/drivers/modules/test_agent.py
  6. +19
    -0
      ironic/tests/unit/drivers/modules/test_agent_base.py
  7. +5
    -0
      releasenotes/notes/direct-fast-track-d0f43850b6e80751.yaml

+ 2
- 1
ironic/conductor/deployments.py View File

@@ -286,7 +286,8 @@ def do_next_deploy_step(task, step_index, conductor_id):
LOG.info('Deploy step %(step)s on node %(node)s being '
'executed asynchronously, waiting for driver.',
{'node': node.uuid, 'step': step})
task.process_event('wait')
if task.node.provision_state != states.DEPLOYWAIT:
task.process_event('wait')
return
elif result is not None:
# NOTE(rloo): This is an internal/dev error; shouldn't happen.


+ 7
- 10
ironic/drivers/modules/agent.py View File

@@ -186,17 +186,13 @@ class AgentDeployMixin(agent_base.AgentDeployMixin):
if not commands:
return False

last_command = commands[-1]

if last_command['command_name'] != 'prepare_image':
# catches race condition where prepare_image is still processing
# so deploy hasn't started yet
try:
last_command = next(cmd for cmd in reversed(commands)
if cmd['command_name'] == 'prepare_image')
except StopIteration:
return False

if last_command['command_status'] != 'RUNNING':
return True

return False
else:
return last_command['command_status'] != 'RUNNING'

@METRICS.timer('AgentDeployMixin.continue_deploy')
@task_manager.require_exclusive_lock
@@ -478,6 +474,7 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
# the state machine state going from DEPLOYWAIT -> DEPLOYING
task.process_event('wait')
self.continue_deploy(task)
return states.DEPLOYWAIT
elif task.driver.storage.should_write_image(task):
# Check if the driver has already performed a reboot in a previous
# deploy step.


+ 6
- 1
ironic/drivers/modules/agent_base.py View File

@@ -722,10 +722,15 @@ class AgentDeployMixin(HeartbeatMixin):
'steps': previous_steps})

call = getattr(self._client, 'get_%s_steps' % step_type)
# TODO(dtantsur): remove the error handling in the V release.
try:
agent_result = call(node, task.ports).get('command_result', {})
except exception.AgentAPIError as exc:
if 'agent is busy' in str(exc):
LOG.debug('Agent is busy with a command, will refresh steps '
'on the next heartbeat')
return

# TODO(dtantsur): change to just 'raise'
if step_type == 'clean':
raise
else:


+ 33
- 0
ironic/tests/unit/conductor/test_deployments.py View File

@@ -423,6 +423,39 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
mock_execute.assert_called_once_with(mock.ANY, task,
self.deploy_steps[0])

@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
autospec=True)
def test__do_next_deploy_step_in_deploywait(self, mock_execute):
driver_internal_info = {'deploy_step_index': None,
'deploy_steps': self.deploy_steps}
self._start_service()
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
driver_internal_info=driver_internal_info,
deploy_step={})

def fake_execute(interface, task, step):
# A deploy step leaves the node in DEPLOYWAIT
task.process_event('wait')
return states.DEPLOYWAIT

mock_execute.side_effect = fake_execute
expected_first_step = node.driver_internal_info['deploy_steps'][0]
task = task_manager.TaskManager(self.context, node.uuid)
task.process_event('deploy')

deployments.do_next_deploy_step(task, 0, self.service.conductor.id)

node.refresh()
self.assertIsNone(node.last_error)
self.assertEqual(states.DEPLOYWAIT, node.provision_state)
self.assertEqual(states.ACTIVE, node.target_provision_state)
self.assertEqual(expected_first_step, node.deploy_step)
self.assertEqual(0, node.driver_internal_info['deploy_step_index'])
self.assertEqual(self.service.conductor.id, node.conductor_affinity)
mock_execute.assert_called_once_with(mock.ANY, task,
self.deploy_steps[0])

@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
autospec=True)
def test__do_next_deploy_step_continue_from_last_step(self, mock_execute):


+ 22
- 1
ironic/tests/unit/drivers/modules/test_agent.py View File

@@ -492,7 +492,7 @@ class TestAgentDeploy(db_base.DbTestCase):
self.node.save()
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
self.driver.deploy(task)
self.assertEqual(states.DEPLOYWAIT, self.driver.deploy(task))
self.assertFalse(power_mock.called)
self.assertFalse(mock_pxe_instance.called)
task.node.refresh()
@@ -1661,6 +1661,27 @@ class TestAgentDeploy(db_base.DbTestCase):
'command_status': 'RUNNING'}]
self.assertFalse(task.driver.deploy.deploy_is_done(task))

@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_is_done_several_results(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [
{'command_name': 'prepare_image', 'command_status': 'SUCCESS'},
{'command_name': 'other_command', 'command_status': 'SUCCESS'},
{'command_name': 'prepare_image', 'command_status': 'RUNNING'},
]
self.assertFalse(task.driver.deploy.deploy_is_done(task))

@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_is_done_not_the_last(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [
{'command_name': 'prepare_image', 'command_status': 'SUCCESS'},
{'command_name': 'other_command', 'command_status': 'SUCCESS'},
]
self.assertTrue(task.driver.deploy.deploy_is_done(task))

@mock.patch.object(manager_utils, 'restore_power_state_if_needed',
autospec=True)
@mock.patch.object(manager_utils, 'power_on_node_if_needed',


+ 19
- 0
ironic/tests/unit/drivers/modules/test_agent_base.py View File

@@ -2192,6 +2192,25 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
self.assertEqual([self.clean_steps['clean_steps'][
'SpecificHardwareManager'][1]], steps['raid'])

@mock.patch.object(agent_base.LOG, 'warning', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_deploy_steps',
autospec=True)
def test_refresh_steps_busy(self, client_mock, log_mock):
client_mock.side_effect = exception.AgentAPIError(
node="node", status="500", error='agent is busy')

with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.deploy.refresh_steps(task, 'deploy')

client_mock.assert_called_once_with(mock.ANY, task.node,
task.ports)
self.assertNotIn('agent_cached_deploy_steps_refreshed',
task.node.driver_internal_info)
self.assertIsNone(task.node.driver_internal_info.get(
'agent_cached_deploy_steps'))
self.assertFalse(log_mock.called)

@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
autospec=True)
def test_refresh_steps_missing_steps(self, client_mock):


+ 5
- 0
releasenotes/notes/direct-fast-track-d0f43850b6e80751.yaml View File

@@ -0,0 +1,5 @@
---
fixes:
- |
Fixes fast-track deployments with the ``direct`` deploy interface that
used to hang previously.

Loading…
Cancel
Save