Use DEPLOYWAIT while waiting for agent to write image

There is an assumption that anything in DEPLOYING state is locked; the
agent driver breaks that assumption. Fix it by using DEPLOYWAIT while
waiting for the agent to write an image.

Change-Id: I4957bd9608b1bc92177e69efef66ecb951181de1
Related-Bug: #1461937
This commit is contained in:
Jim Rollenhagen 2015-07-09 08:39:04 -07:00
parent 09433e4153
commit f1929f0155
4 changed files with 71 additions and 7 deletions

View File

@ -437,6 +437,15 @@ class AgentDeploy(base.DeployInterface):
class AgentVendorInterface(agent_base_vendor.BaseAgentVendor): class AgentVendorInterface(agent_base_vendor.BaseAgentVendor):
def deploy_has_started(self, task):
commands = self._client.get_commands_status(task.node)
for command in commands:
if command['command_name'] == 'prepare_image':
# deploy did start at some point
return True
return False
def deploy_is_done(self, task): def deploy_is_done(self, task):
commands = self._client.get_commands_status(task.node) commands = self._client.get_commands_status(task.node)
if not commands: if not commands:
@ -479,6 +488,8 @@ class AgentVendorInterface(agent_base_vendor.BaseAgentVendor):
LOG.debug('prepare_image got response %(res)s for node %(node)s', LOG.debug('prepare_image got response %(res)s for node %(node)s',
{'res': res, 'node': node.uuid}) {'res': res, 'node': node.uuid})
task.process_event('wait')
def check_deploy_success(self, node): def check_deploy_success(self, node):
# should only ever be called after we've validated that # should only ever be called after we've validated that
# the prepare_image command is complete # the prepare_image command is complete
@ -487,6 +498,7 @@ class AgentVendorInterface(agent_base_vendor.BaseAgentVendor):
return command['command_error'] return command['command_error']
def reboot_to_instance(self, task, **kwargs): def reboot_to_instance(self, task, **kwargs):
task.process_event('resume')
node = task.node node = task.node
LOG.debug('Preparing to reboot to instance for node %s', LOG.debug('Preparing to reboot to instance for node %s',
node.uuid) node.uuid)

View File

@ -88,6 +88,13 @@ class BaseAgentVendor(base.VendorInterface):
""" """
pass pass
def deploy_has_started(self, task):
"""Check if the deployment has started already.
:returns: True if the deploy has started, False otherwise.
"""
pass
def deploy_is_done(self, task): def deploy_is_done(self, task):
"""Check if the deployment is already completed. """Check if the deployment is already completed.
@ -245,10 +252,11 @@ class BaseAgentVendor(base.VendorInterface):
LOG.debug('Heartbeat from node %(node)s in maintenance mode; ' LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
'not taking any action.', {'node': node.uuid}) 'not taking any action.', {'node': node.uuid})
return return
elif node.provision_state == states.DEPLOYWAIT: elif (node.provision_state == states.DEPLOYWAIT and
not self.deploy_has_started(task)):
msg = _('Node failed to get image for deploy.') msg = _('Node failed to get image for deploy.')
self.continue_deploy(task, **kwargs) self.continue_deploy(task, **kwargs)
elif (node.provision_state == states.DEPLOYING and elif (node.provision_state == states.DEPLOYWAIT and
self.deploy_is_done(task)): self.deploy_is_done(task)):
msg = _('Node failed to move to active state.') msg = _('Node failed to move to active state.')
self.reboot_to_instance(task, **kwargs) self.reboot_to_instance(task, **kwargs)

View File

@ -398,7 +398,7 @@ class TestAgentVendor(db_base.DbTestCase):
client_mock.prepare_image.assert_called_with(task.node, client_mock.prepare_image.assert_called_with(task.node,
expected_image_info) expected_image_info)
self.assertEqual(states.DEPLOYING, task.node.provision_state) self.assertEqual(states.DEPLOYWAIT, task.node.provision_state)
self.assertEqual(states.ACTIVE, self.assertEqual(states.ACTIVE,
task.node.target_provision_state) task.node.target_provision_state)
@ -424,7 +424,7 @@ class TestAgentVendor(db_base.DbTestCase):
client_mock.prepare_image.assert_called_with(task.node, client_mock.prepare_image.assert_called_with(task.node,
expected_image_info) expected_image_info)
self.assertEqual(states.DEPLOYING, task.node.provision_state) self.assertEqual(states.DEPLOYWAIT, task.node.provision_state)
self.assertEqual(states.ACTIVE, self.assertEqual(states.ACTIVE,
task.node.target_provision_state) task.node.target_provision_state)
@ -441,7 +441,7 @@ class TestAgentVendor(db_base.DbTestCase):
node_power_action_mock): node_power_action_mock):
check_deploy_mock.return_value = None check_deploy_mock.return_value = None
self.node.provision_state = states.DEPLOYING self.node.provision_state = states.DEPLOYWAIT
self.node.target_provision_state = states.ACTIVE self.node.target_provision_state = states.ACTIVE
self.node.save() self.node.save()
@ -459,6 +459,47 @@ class TestAgentVendor(db_base.DbTestCase):
self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.provision_state)
self.assertEqual(states.NOSTATE, task.node.target_provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = []
self.assertFalse(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started_is_done(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [{'command_name': 'prepare_image',
'command_status': 'SUCCESS'}]
self.assertTrue(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started_did_start(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [{'command_name': 'prepare_image',
'command_status': 'RUNNING'}]
self.assertTrue(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started_multiple_commands(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [{'command_name': 'cache_image',
'command_status': 'SUCCESS'},
{'command_name': 'prepare_image',
'command_status': 'RUNNING'}]
self.assertTrue(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_deploy_has_started_other_commands(self, mock_get_cmd):
with task_manager.acquire(self.context, self.node.uuid) as task:
mock_get_cmd.return_value = [{'command_name': 'cache_image',
'command_status': 'SUCCESS'}]
self.assertFalse(self.passthru.deploy_has_started(task))
@mock.patch.object(agent_client.AgentClient, 'get_commands_status', @mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True) autospec=True)
def test_deploy_is_done(self, mock_get_cmd): def test_deploy_is_done(self, mock_get_cmd):

View File

@ -272,19 +272,22 @@ class TestBaseAgentVendor(db_base.DbTestCase):
self.assertRaises(exception.MissingParameterValue, self.assertRaises(exception.MissingParameterValue,
self.passthru.heartbeat, task, **kwargs) self.passthru.heartbeat, task, **kwargs)
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'deploy_has_started',
autospec=True)
@mock.patch.object(deploy_utils, 'set_failed_state', autospec=True) @mock.patch.object(deploy_utils, 'set_failed_state', autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'deploy_is_done', @mock.patch.object(agent_base_vendor.BaseAgentVendor, 'deploy_is_done',
autospec=True) autospec=True)
@mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True) @mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True)
def test_heartbeat_deploy_done_fails(self, log_mock, done_mock, def test_heartbeat_deploy_done_fails(self, log_mock, done_mock,
failed_mock): failed_mock, deploy_started_mock):
deploy_started_mock.return_value = True
kwargs = { kwargs = {
'agent_url': 'http://127.0.0.1:9999/bar' 'agent_url': 'http://127.0.0.1:9999/bar'
} }
done_mock.side_effect = iter([Exception('LlamaException')]) done_mock.side_effect = iter([Exception('LlamaException')])
with task_manager.acquire( with task_manager.acquire(
self.context, self.node['uuid'], shared=True) as task: self.context, self.node['uuid'], shared=True) as task:
task.node.provision_state = states.DEPLOYING task.node.provision_state = states.DEPLOYWAIT
task.node.target_provision_state = states.ACTIVE task.node.target_provision_state = states.ACTIVE
self.passthru.heartbeat(task, **kwargs) self.passthru.heartbeat(task, **kwargs)
failed_mock.assert_called_once_with(task, mock.ANY) failed_mock.assert_called_once_with(task, mock.ANY)