Use DEPLOYWAIT while waiting for agent to write image

There is an assumption that anything in DEPLOYING state is locked; the
agent driver breaks that assumption. Fix it by using DEPLOYWAIT while
waiting for the agent to write an image.

Change-Id: I4957bd9608b1bc92177e69efef66ecb951181de1
Related-Bug: #1461937
This commit is contained in:
Jim Rollenhagen 2015-07-09 08:39:04 -07:00
parent 09433e4153
commit f1929f0155
4 changed files with 71 additions and 7 deletions

View File

@ -437,6 +437,15 @@ class AgentDeploy(base.DeployInterface):
class AgentVendorInterface(agent_base_vendor.BaseAgentVendor):
def deploy_has_started(self, task):
commands = self._client.get_commands_status(task.node)
for command in commands:
if command['command_name'] == 'prepare_image':
# deploy did start at some point
return True
return False
def deploy_is_done(self, task):
commands = self._client.get_commands_status(task.node)
if not commands:
@ -479,6 +488,8 @@ class AgentVendorInterface(agent_base_vendor.BaseAgentVendor):
LOG.debug('prepare_image got response %(res)s for node %(node)s',
{'res': res, 'node': node.uuid})
task.process_event('wait')
def check_deploy_success(self, node):
# should only ever be called after we've validated that
# the prepare_image command is complete
@ -487,6 +498,7 @@ class AgentVendorInterface(agent_base_vendor.BaseAgentVendor):
return command['command_error']
def reboot_to_instance(self, task, **kwargs):
task.process_event('resume')
node = task.node
LOG.debug('Preparing to reboot to instance for node %s',
node.uuid)

View File

@ -88,6 +88,13 @@ class BaseAgentVendor(base.VendorInterface):
"""
pass
def deploy_has_started(self, task):
"""Check if the deployment has started already.
:returns: True if the deploy has started, False otherwise.
"""
pass
def deploy_is_done(self, task):
"""Check if the deployment is already completed.
@ -245,10 +252,11 @@ class BaseAgentVendor(base.VendorInterface):
LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
'not taking any action.', {'node': node.uuid})
return
elif node.provision_state == states.DEPLOYWAIT:
elif (node.provision_state == states.DEPLOYWAIT and
not self.deploy_has_started(task)):
msg = _('Node failed to get image for deploy.')
self.continue_deploy(task, **kwargs)
elif (node.provision_state == states.DEPLOYING and
elif (node.provision_state == states.DEPLOYWAIT and
self.deploy_is_done(task)):
msg = _('Node failed to move to active state.')
self.reboot_to_instance(task, **kwargs)

View File

@ -398,7 +398,7 @@ class TestAgentVendor(db_base.DbTestCase):
client_mock.prepare_image.assert_called_with(task.node,
expected_image_info)
self.assertEqual(states.DEPLOYING, task.node.provision_state)
self.assertEqual(states.DEPLOYWAIT, task.node.provision_state)
self.assertEqual(states.ACTIVE,
task.node.target_provision_state)
@ -424,7 +424,7 @@ class TestAgentVendor(db_base.DbTestCase):
client_mock.prepare_image.assert_called_with(task.node,
expected_image_info)
self.assertEqual(states.DEPLOYING, task.node.provision_state)
self.assertEqual(states.DEPLOYWAIT, task.node.provision_state)
self.assertEqual(states.ACTIVE,
task.node.target_provision_state)
@ -441,7 +441,7 @@ class TestAgentVendor(db_base.DbTestCase):
node_power_action_mock):
check_deploy_mock.return_value = None
self.node.provision_state = states.DEPLOYING
self.node.provision_state = states.DEPLOYWAIT
self.node.target_provision_state = states.ACTIVE
self.node.save()
@ -459,6 +459,47 @@ class TestAgentVendor(db_base.DbTestCase):
self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = []
self.assertFalse(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started_is_done(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [{'command_name': 'prepare_image',
'command_status': 'SUCCESS'}]
self.assertTrue(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started_did_start(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [{'command_name': 'prepare_image',
'command_status': 'RUNNING'}]
self.assertTrue(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started_multiple_commands(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [{'command_name': 'cache_image',
'command_status': 'SUCCESS'},
{'command_name': 'prepare_image',
'command_status': 'RUNNING'}]
self.assertTrue(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started_other_commands(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [{'command_name': 'cache_image',
'command_status': 'SUCCESS'}]
self.assertFalse(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_is_done(self, mock_get_cmd):

View File

@ -272,19 +272,22 @@ class TestBaseAgentVendor(db_base.DbTestCase):
self.assertRaises(exception.MissingParameterValue,
self.passthru.heartbeat, task, **kwargs)
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'deploy_has_started',
autospec=True)
@mock.patch.object(deploy_utils, 'set_failed_state', autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'deploy_is_done',
autospec=True)
@mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True)
def test_heartbeat_deploy_done_fails(self, log_mock, done_mock,
failed_mock):
failed_mock, deploy_started_mock):
deploy_started_mock.return_value = True
kwargs = {
'agent_url': 'http://127.0.0.1:9999/bar'
}
done_mock.side_effect = iter([Exception('LlamaException')])
with task_manager.acquire(
self.context, self.node['uuid'], shared=True) as task:
task.node.provision_state = states.DEPLOYING
task.node.provision_state = states.DEPLOYWAIT
task.node.target_provision_state = states.ACTIVE
self.passthru.heartbeat(task, **kwargs)
failed_mock.assert_called_once_with(task, mock.ANY)