Do not assume that prepare_image is the last command to run

The get_deploy_steps command can be run after it breaking deploy.

Change-Id: I8e641a521a574462010a95a19e8a64ac36d4e52d
This commit is contained in:
Dmitry Tantsur 2020-09-04 11:33:31 +02:00
parent b605ab585a
commit ce46cc461d
4 changed files with 84 additions and 32 deletions

View File

@ -292,25 +292,33 @@ class AgentDeployMixin(agent_base.AgentDeployMixin):
# TODO(dtantsur): remove in W
def _get_uuid_from_result(self, task, type_uuid):
command = self._client.get_commands_status(task.node)[-1]
command = self._client.get_last_command_status(task.node,
'prepare_image')
if (not command
or not command.get('command_result', {}).get('result')):
msg = _('Unexpected response from the agent for node %s: the '
'running command list does not include prepare_image '
'or its result is malformed') % task.node.uuid
LOG.error(msg)
deploy_utils.set_failed_state(task, msg)
return
if command['command_result'] is not None:
words = command['command_result']['result'].split()
for word in words:
if type_uuid in word:
result = word.split('=')[1]
if not result:
msg = (_('Command result did not return %(type_uuid)s '
'for node %(node)s. The version of the IPA '
'ramdisk used in the deployment might not '
'have support for provisioning of '
'partition images.') %
{'type_uuid': type_uuid,
'node': task.node.uuid})
LOG.error(msg)
deploy_utils.set_failed_state(task, msg)
return
return result
words = command['command_result']['result'].split()
for word in words:
if type_uuid in word:
result = word.split('=')[1]
if not result:
msg = (_('Command result did not return %(type_uuid)s '
'for node %(node)s. The version of the IPA '
'ramdisk used in the deployment might not '
'have support for provisioning of '
'partition images.') %
{'type_uuid': type_uuid,
'node': task.node.uuid})
LOG.error(msg)
deploy_utils.set_failed_state(task, msg)
return
return result
@METRICS.timer('AgentDeployMixin.prepare_instance_boot')
@base.deploy_step(priority=60)

View File

@ -102,23 +102,11 @@ class AgentClient(object):
:param method: A string represents the command executed by agent.
:raises: AgentCommandTimeout if timeout is reached.
"""
try:
method = method.split('.', 1)[1]
except IndexError:
pass
# NOTE(dtantsur): this function uses AgentCommandTimeout on every
# failure, but unless the timeout is reached, the exception is caught
# and retried by the @retry decorator above.
commands = self.get_commands_status(node)
try:
result = next(c for c in reversed(commands)
if c.get('command_name') == method)
except StopIteration:
LOG.debug('Command %(cmd)s is not in the executing commands list '
'for node %(node)s',
{'cmd': method, 'node': node.uuid})
result = self.get_last_command_status(node, method)
if result is None:
raise exception.AgentCommandTimeout(command=method, node=node.uuid)
if result.get('command_status') == 'RUNNING':
@ -312,6 +300,29 @@ class AgentClient(object):
{'node': node.uuid, 'status': status})
return result
def get_last_command_status(self, node, method):
"""Get the last status for the given command.
:param node: A Node object.
:param method: Command name.
:returns: A dict containing command status from agent or None
if the command was not found.
"""
try:
method = method.split('.', 1)[1]
except IndexError:
pass
commands = self.get_commands_status(node)
try:
return next(c for c in reversed(commands)
if c.get('command_name') == method)
except StopIteration:
LOG.debug('Command %(cmd)s is not in the executing commands list '
'for node %(node)s',
{'cmd': method, 'node': node.uuid})
return None
@METRICS.timer('AgentClient.prepare_image')
def prepare_image(self, node, image_info, wait=False):
"""Call the `prepare_image` method on the node.

View File

@ -1649,6 +1649,34 @@ class TestAgentDeploy(db_base.DbTestCase):
self.node.refresh()
self.assertEqual('bar', self.node.instance_info['foo'])
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_get_uuid_from_result(self, mock_statuses):
mock_statuses.return_value = [
{'command_name': 'banana', 'command_result': None},
{'command_name': 'prepare_image',
'command_result': {'result': 'okay root_uuid=abcd'}},
{'command_name': 'get_deploy_steps',
'command_result': {'deploy_steps': []}}
]
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
result = self.driver._get_uuid_from_result(task, 'root_uuid')
self.assertEqual('abcd', result)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_get_uuid_from_result_fails(self, mock_statuses):
mock_statuses.return_value = [
{'command_name': 'banana', 'command_result': None},
{'command_name': 'get_deploy_steps',
'command_result': {'deploy_steps': []}}
]
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
result = self.driver._get_uuid_from_result(task, 'root_uuid')
self.assertIsNone(result)
@mock.patch.object(manager_utils, 'restore_power_state_if_needed',
autospec=True)
@mock.patch.object(manager_utils, 'power_on_node_if_needed',

View File

@ -0,0 +1,5 @@
---
fixes:
- |
Fixes the deployment failure with Ussuri (and older) ramdisks that happens
when another IPA command runs after ``prepare_image``.