Do not assume that prepare_image is the last command to run

The get_deploy_steps command can be run after it breaking deploy.

Change-Id: I8e641a521a574462010a95a19e8a64ac36d4e52d
This commit is contained in:
Dmitry Tantsur 2020-09-04 11:33:31 +02:00
parent b605ab585a
commit ce46cc461d
4 changed files with 84 additions and 32 deletions

View File

@ -292,9 +292,17 @@ class AgentDeployMixin(agent_base.AgentDeployMixin):
# TODO(dtantsur): remove in W # TODO(dtantsur): remove in W
def _get_uuid_from_result(self, task, type_uuid): def _get_uuid_from_result(self, task, type_uuid):
command = self._client.get_commands_status(task.node)[-1] command = self._client.get_last_command_status(task.node,
'prepare_image')
if (not command
or not command.get('command_result', {}).get('result')):
msg = _('Unexpected response from the agent for node %s: the '
'running command list does not include prepare_image '
'or its result is malformed') % task.node.uuid
LOG.error(msg)
deploy_utils.set_failed_state(task, msg)
return
if command['command_result'] is not None:
words = command['command_result']['result'].split() words = command['command_result']['result'].split()
for word in words: for word in words:
if type_uuid in word: if type_uuid in word:

View File

@ -102,23 +102,11 @@ class AgentClient(object):
:param method: A string represents the command executed by agent. :param method: A string represents the command executed by agent.
:raises: AgentCommandTimeout if timeout is reached. :raises: AgentCommandTimeout if timeout is reached.
""" """
try:
method = method.split('.', 1)[1]
except IndexError:
pass
# NOTE(dtantsur): this function uses AgentCommandTimeout on every # NOTE(dtantsur): this function uses AgentCommandTimeout on every
# failure, but unless the timeout is reached, the exception is caught # failure, but unless the timeout is reached, the exception is caught
# and retried by the @retry decorator above. # and retried by the @retry decorator above.
result = self.get_last_command_status(node, method)
commands = self.get_commands_status(node) if result is None:
try:
result = next(c for c in reversed(commands)
if c.get('command_name') == method)
except StopIteration:
LOG.debug('Command %(cmd)s is not in the executing commands list '
'for node %(node)s',
{'cmd': method, 'node': node.uuid})
raise exception.AgentCommandTimeout(command=method, node=node.uuid) raise exception.AgentCommandTimeout(command=method, node=node.uuid)
if result.get('command_status') == 'RUNNING': if result.get('command_status') == 'RUNNING':
@ -312,6 +300,29 @@ class AgentClient(object):
{'node': node.uuid, 'status': status}) {'node': node.uuid, 'status': status})
return result return result
def get_last_command_status(self, node, method):
"""Get the last status for the given command.
:param node: A Node object.
:param method: Command name.
:returns: A dict containing command status from agent or None
if the command was not found.
"""
try:
method = method.split('.', 1)[1]
except IndexError:
pass
commands = self.get_commands_status(node)
try:
return next(c for c in reversed(commands)
if c.get('command_name') == method)
except StopIteration:
LOG.debug('Command %(cmd)s is not in the executing commands list '
'for node %(node)s',
{'cmd': method, 'node': node.uuid})
return None
@METRICS.timer('AgentClient.prepare_image') @METRICS.timer('AgentClient.prepare_image')
def prepare_image(self, node, image_info, wait=False): def prepare_image(self, node, image_info, wait=False):
"""Call the `prepare_image` method on the node. """Call the `prepare_image` method on the node.

View File

@ -1649,6 +1649,34 @@ class TestAgentDeploy(db_base.DbTestCase):
self.node.refresh() self.node.refresh()
self.assertEqual('bar', self.node.instance_info['foo']) self.assertEqual('bar', self.node.instance_info['foo'])
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_get_uuid_from_result(self, mock_statuses):
mock_statuses.return_value = [
{'command_name': 'banana', 'command_result': None},
{'command_name': 'prepare_image',
'command_result': {'result': 'okay root_uuid=abcd'}},
{'command_name': 'get_deploy_steps',
'command_result': {'deploy_steps': []}}
]
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
result = self.driver._get_uuid_from_result(task, 'root_uuid')
self.assertEqual('abcd', result)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_get_uuid_from_result_fails(self, mock_statuses):
mock_statuses.return_value = [
{'command_name': 'banana', 'command_result': None},
{'command_name': 'get_deploy_steps',
'command_result': {'deploy_steps': []}}
]
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
result = self.driver._get_uuid_from_result(task, 'root_uuid')
self.assertIsNone(result)
@mock.patch.object(manager_utils, 'restore_power_state_if_needed', @mock.patch.object(manager_utils, 'restore_power_state_if_needed',
autospec=True) autospec=True)
@mock.patch.object(manager_utils, 'power_on_node_if_needed', @mock.patch.object(manager_utils, 'power_on_node_if_needed',

View File

@ -0,0 +1,5 @@
---
fixes:
- |
Fixes the deployment failure with Ussuri (and older) ramdisks that happens
when another IPA command runs after ``prepare_image``.