Merge "Handle agent still doing the prior command"

This commit is contained in:
Zuul 2020-10-30 16:10:10 +00:00 committed by Gerrit Code Review
commit 31277f2c95
10 changed files with 188 additions and 8 deletions

View File

@ -813,3 +813,8 @@ class UnknownAttribute(ClientSideError):
else:
self.fieldname = name
super(UnknownAttribute, self).__init__(self.msg)
class AgentInProgress(IronicException):
_msg_fmt = _('Node %(node)s command "%(command)s" failed. Agent is '
'presently executing a command. Error %(error)s')

View File

@ -179,6 +179,17 @@ def do_next_clean_step(task, step_index):
else None)
task.process_event('wait', target_state=target_state)
return
if isinstance(e, exception.AgentInProgress):
LOG.info('Conductor attempted to process clean step for '
'node %(node)s. Agent indicated it is presently '
'executing a command. Error: %(error)s',
{'node': task.node.uuid,
'error': e})
driver_internal_info['skip_current_clean_step'] = False
node.driver_internal_info = driver_internal_info
target_state = states.MANAGEABLE if manual_clean else None
task.process_event('wait', target_state=target_state)
return
msg = (_('Node %(node)s failed step %(step)s: '
'%(exc)s') %

View File

@ -257,6 +257,16 @@ def do_next_deploy_step(task, step_index):
node.driver_internal_info = driver_internal_info
task.process_event('wait')
return
if isinstance(e, exception.AgentInProgress):
LOG.info('Conductor attempted to process deploy step for '
'node %(node)s. Agent indicated it is presently '
'executing a command. Error: %(error)s',
{'node': task.node.uuid,
'error': e})
driver_internal_info['skip_current_deploy_step'] = False
node.driver_internal_info = driver_internal_info
task.process_event('wait')
return
log_msg = ('Node %(node)s failed deploy step %(step)s. Error: '
'%(err)s' %
{'node': node.uuid, 'step': node.deploy_step, 'err': e})

View File

@ -898,11 +898,11 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin):
call = getattr(self._client, 'get_%s_steps' % step_type)
try:
agent_result = call(node, task.ports).get('command_result', {})
except exception.AgentAPIError as exc:
if 'agent is busy' in str(exc):
LOG.debug('Agent is busy with a command, will refresh steps '
'on the next heartbeat')
return
except exception.AgentInProgress as exc:
LOG.debug('Agent for node %(node)s is busy with a command, '
'will refresh steps on the next heartbeat.',
{'node': task.node.uuid})
return
# TODO(dtantsur): change to just 'raise'
if step_type == 'clean':

View File

@ -153,6 +153,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command result from agent, see
get_commands_status for a sample.
"""
@ -223,6 +225,17 @@ class AgentClient(object):
'Expected 2xx HTTP status code, got %(code)d.',
{'method': method, 'node': node.uuid,
'code': response.status_code})
if (response.status_code == http_client.CONFLICT
or 'agent is busy' in faultstring.lower()):
# HTTP 409 check as an explicit check of if the agent
# is already busy.
# NOTE(TheJulia): The agent sends upper case A as of
# late victoria, but lower case the entire message
# for compatability with pre-late victoria agents
# which returns HTTP 409.
raise exception.AgentInProgress(node=node.uuid,
command=method,
error=faultstring)
raise exception.AgentAPIError(node=node.uuid,
status=response.status_code,
error=faultstring)
@ -337,6 +350,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command status from agent.
See :func:`get_commands_status` for a command result sample.
"""
@ -370,6 +385,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
"""
@ -400,6 +417,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
"""
@ -456,6 +475,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
The value of key command_result is in the form of:
@ -487,6 +508,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
The value of key command_result is in the form of:
@ -550,6 +573,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
The value of key command_result is in the form of:
@ -581,6 +606,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
"""
@ -597,6 +624,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
"""
@ -612,6 +641,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
"""
@ -627,6 +658,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
"""
@ -643,6 +676,8 @@ class AgentClient(object):
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
"""
@ -660,6 +695,8 @@ class AgentClient(object):
to issue the request, or there was a malformed response from
the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:raises: InstanceRescueFailure when the agent ramdisk is too old
to support transmission of the rescue password.
:returns: A dict containing command response from agent.

View File

@ -770,6 +770,37 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
mock_execute.assert_called_once_with(
mock.ANY, mock.ANY, self.clean_steps[0])
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True)
def test_do_next_clean_step_agent_busy(self, mock_execute):
# When a clean step fails, go to CLEANWAIT
tgt_prov_state = states.MANAGEABLE
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.CLEANING,
target_provision_state=tgt_prov_state,
last_error=None,
driver_internal_info={'clean_steps': self.clean_steps,
'clean_step_index': None,
'cleaning_reboot': True},
clean_step={})
mock_execute.side_effect = exception.AgentInProgress(
reason='still meowing')
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
cleaning.do_next_clean_step(task, 0)
node.refresh()
# Make sure we go to CLEANWAIT
self.assertEqual(states.CLEANWAIT, node.provision_state)
self.assertEqual(tgt_prov_state, node.target_provision_state)
self.assertEqual(self.clean_steps[0], node.clean_step)
self.assertEqual(0, node.driver_internal_info['clean_step_index'])
self.assertFalse(node.driver_internal_info['skip_current_clean_step'])
mock_execute.assert_called_once_with(
mock.ANY, mock.ANY, self.clean_steps[0])
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True)
def test_do_next_clean_step_oob_reboot_last_step(self, mock_execute):

View File

@ -769,7 +769,42 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
self._stop_service()
node.refresh()
# Make sure we go to CLEANWAIT
# Make sure we go to DEPLOYWAIT
self.assertEqual(states.DEPLOYWAIT, node.provision_state)
self.assertEqual(tgt_prov_state, node.target_provision_state)
self.assertEqual(self.deploy_steps[0], node.deploy_step)
self.assertEqual(0, node.driver_internal_info['deploy_step_index'])
self.assertFalse(node.driver_internal_info['skip_current_deploy_step'])
mock_execute.assert_called_once_with(
mock.ANY, mock.ANY, self.deploy_steps[0])
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
autospec=True)
def test_do_next_deploy_step_agent_busy(self, mock_execute):
# When a deploy step fails, go to DEPLOYWAIT
tgt_prov_state = states.ACTIVE
self._start_service()
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.DEPLOYING,
target_provision_state=tgt_prov_state,
last_error=None,
driver_internal_info={'deploy_steps': self.deploy_steps,
'deploy_step_index': None,
'deployment_reboot': True},
clean_step={})
mock_execute.side_effect = exception.AgentInProgress(
reason='meow')
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
deployments.do_next_deploy_step(task, 0)
self._stop_service()
node.refresh()
# Make sure we go to DEPLOYWAIT
self.assertEqual(states.DEPLOYWAIT, node.provision_state)
self.assertEqual(tgt_prov_state, node.target_provision_state)
self.assertEqual(self.deploy_steps[0], node.deploy_step)

View File

@ -2223,8 +2223,8 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
@mock.patch.object(agent_client.AgentClient, 'get_deploy_steps',
autospec=True)
def test_refresh_steps_busy(self, client_mock, log_mock):
client_mock.side_effect = exception.AgentAPIError(
node="node", status="500", error='agent is busy')
client_mock.side_effect = exception.AgentInProgress(
node="node", error='Agent is busy : maximum meowing')
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:

View File

@ -237,6 +237,51 @@ class TestAgentClient(base.TestCase):
timeout=60,
verify=True)
def test__command_error_code_agent_busy(self):
# Victoria and previous busy status check.
response_text = {"faultstring": "Agent is busy - meowing"}
self.client.session.post.return_value = MockResponse(
response_text, status_code=http_client.BAD_REQUEST)
method = 'standby.run_image'
image_info = {'image_id': 'test_image'}
params = {'image_info': image_info}
url = self.client._get_command_url(self.node)
body = self.client._get_command_body(method, params)
self.assertRaises(exception.AgentInProgress,
self.client._command,
self.node, method, params)
self.client.session.post.assert_called_once_with(
url,
data=body,
params={'wait': 'false'},
timeout=60,
verify=True)
def test__command_error_code_agent_busy_conflict(self):
# Post Wallaby logic as the IPA return code changed to
# better delineate the state.
response_text = {"faultstring": "lolcat says busy meowing"}
self.client.session.post.return_value = MockResponse(
response_text, status_code=http_client.CONFLICT)
method = 'standby.run_image'
image_info = {'image_id': 'test_image'}
params = {'image_info': image_info}
url = self.client._get_command_url(self.node)
body = self.client._get_command_body(method, params)
self.assertRaises(exception.AgentInProgress,
self.client._command,
self.node, method, params)
self.client.session.post.assert_called_once_with(
url,
data=body,
params={'wait': 'false'},
timeout=60,
verify=True)
@mock.patch('os.path.exists', autospec=True, return_value=True)
def test__command_verify(self, mock_exists):
response_data = {'status': 'ok'}

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Fixes recognition of a busy agent to also handle recognition
during deployment steps by more uniformly detecting and
identifying when the ``ironic-python-agent`` service is busy.