Browse Source

Merge "Retries and timeout for IPA command" into stable/rocky

stable/rocky
Zuul 1 month ago
committed by Gerrit Code Review
parent
commit
d0493735e4
4 changed files with 89 additions and 5 deletions
  1. +8
    -0
      ironic/conf/agent.py
  2. +8
    -2
      ironic/drivers/modules/agent_client.py
  3. +68
    -3
      ironic/tests/unit/drivers/modules/test_agent_client.py
  4. +5
    -0
      releasenotes/notes/ipa-command-retries-and-timeout-29b0be3f2c21328c.yaml

+ 8
- 0
ironic/conf/agent.py View File

@@ -89,6 +89,14 @@ opts = [
'forever or until manually deleted. Used when the '
'deploy_logs_storage_backend is configured to '
'"swift".')),
cfg.IntOpt('command_timeout',
default=60,
help=_('Timeout (in seconds) for IPA commands')),
cfg.IntOpt('max_command_attempts',
default=3,
help=_('This is the maximum number of attempts that will be '
'done for IPA commands that fails due to network '
'problems')),
]




+ 8
- 2
ironic/drivers/modules/agent_client.py View File

@@ -16,6 +16,7 @@ from ironic_lib import metrics_utils
from oslo_log import log
from oslo_serialization import jsonutils
import requests
import retrying
from six.moves import http_client

from ironic.common import exception
@@ -55,6 +56,10 @@ class AgentClient(object):
})

@METRICS.timer('AgentClient._command')
@retrying.retry(
retry_on_exception=(
lambda e: isinstance(e, exception.AgentConnectionFailed)),
stop_max_attempt_number=CONF.agent.max_command_attempts)
def _command(self, node, method, params, wait=False):
"""Sends command to agent.

@@ -80,8 +85,9 @@ class AgentClient(object):
{'node': node.uuid, 'method': method})

try:
response = self.session.post(url, params=request_params, data=body)
except requests.ConnectionError as e:
response = self.session.post(url, params=request_params, data=body,
timeout=CONF.agent.command_timeout)
except (requests.ConnectionError, requests.Timeout) as e:
msg = (_('Failed to connect to the agent running on node %(node)s '
'for invoking command %(method)s. Error: %(error)s') %
{'node': node.uuid, 'method': method, 'error': e})


+ 68
- 3
ironic/tests/unit/drivers/modules/test_agent_client.py View File

@@ -96,7 +96,8 @@ class TestAgentClient(base.TestCase):
self.client.session.post.assert_called_once_with(
url,
data=body,
params={'wait': 'false'})
params={'wait': 'false'},
timeout=60)

def test__command_fail_json(self):
response_text = 'this be not json matey!'
@@ -114,7 +115,8 @@ class TestAgentClient(base.TestCase):
self.client.session.post.assert_called_once_with(
url,
data=body,
params={'wait': 'false'})
params={'wait': 'false'},
timeout=60)

def test__command_fail_post(self):
error = 'Boom'
@@ -151,6 +153,68 @@ class TestAgentClient(base.TestCase):
{'method': method, 'node': self.node.uuid,
'error': error}, str(e))

def test__command_fail_all_attempts(self):
error = 'Connection Timeout'
method = 'standby.run_image'
image_info = {'image_id': 'test_image'}
params = {'image_info': image_info}
self.client.session.post.side_effect = [requests.Timeout(error),
requests.Timeout(error),
requests.Timeout(error),
requests.Timeout(error)]
self.client._get_command_url(self.node)
self.client._get_command_body(method, params)

e = self.assertRaises(exception.AgentConnectionFailed,
self.client._command,
self.node, method, params)
self.assertEqual('Connection to agent failed: Failed to connect to '
'the agent running on node %(node)s for invoking '
'command %(method)s. Error: %(error)s' %
{'method': method, 'node': self.node.uuid,
'error': error}, str(e))
self.assertEqual(3, self.client.session.post.call_count)

def test__command_succeed_after_two_timeouts(self):
error = 'Connection Timeout'
response_data = {'status': 'ok'}
response_text = json.dumps(response_data)
method = 'standby.run_image'
image_info = {'image_id': 'test_image'}
params = {'image_info': image_info}
self.client.session.post.side_effect = [requests.Timeout(error),
requests.Timeout(error),
MockResponse(response_text)]

response = self.client._command(self.node, method, params)
self.assertEqual(3, self.client.session.post.call_count)
self.assertEqual(response, response_data)
self.client.session.post.assert_called_with(
self.client._get_command_url(self.node),
data=self.client._get_command_body(method, params),
params={'wait': 'false'},
timeout=60)

def test__command_succeed_after_one_timeout(self):
error = 'Connection Timeout'
response_data = {'status': 'ok'}
response_text = json.dumps(response_data)
method = 'standby.run_image'
image_info = {'image_id': 'test_image'}
params = {'image_info': image_info}
self.client.session.post.side_effect = [requests.Timeout(error),
MockResponse(response_text),
requests.Timeout(error)]

response = self.client._command(self.node, method, params)
self.assertEqual(2, self.client.session.post.call_count)
self.assertEqual(response, response_data)
self.client.session.post.assert_called_with(
self.client._get_command_url(self.node),
data=self.client._get_command_body(method, params),
params={'wait': 'false'},
timeout=60)

def test__command_error_code(self):
response_text = '{"faultstring": "you dun goofd"}'
self.client.session.post.return_value = MockResponse(
@@ -168,7 +232,8 @@ class TestAgentClient(base.TestCase):
self.client.session.post.assert_called_once_with(
url,
data=body,
params={'wait': 'false'})
params={'wait': 'false'},
timeout=60)

def test_get_commands_status(self):
with mock.patch.object(self.client.session, 'get',


+ 5
- 0
releasenotes/notes/ipa-command-retries-and-timeout-29b0be3f2c21328c.yaml View File

@@ -0,0 +1,5 @@
---
fixes:
- |
Adds ``command_timeout`` and ``max_command_attempts`` configuration options
to IPA, so when connection errors occur the command will be executed again.

Loading…
Cancel
Save