Merge "Retries and timeout for IPA command" into stable/rocky
This commit is contained in:
commit
d0493735e4
|
@ -89,6 +89,14 @@ opts = [
|
||||||
'forever or until manually deleted. Used when the '
|
'forever or until manually deleted. Used when the '
|
||||||
'deploy_logs_storage_backend is configured to '
|
'deploy_logs_storage_backend is configured to '
|
||||||
'"swift".')),
|
'"swift".')),
|
||||||
|
cfg.IntOpt('command_timeout',
|
||||||
|
default=60,
|
||||||
|
help=_('Timeout (in seconds) for IPA commands')),
|
||||||
|
cfg.IntOpt('max_command_attempts',
|
||||||
|
default=3,
|
||||||
|
help=_('This is the maximum number of attempts that will be '
|
||||||
|
'done for IPA commands that fails due to network '
|
||||||
|
'problems')),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@ from ironic_lib import metrics_utils
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
from oslo_serialization import jsonutils
|
from oslo_serialization import jsonutils
|
||||||
import requests
|
import requests
|
||||||
|
import retrying
|
||||||
from six.moves import http_client
|
from six.moves import http_client
|
||||||
|
|
||||||
from ironic.common import exception
|
from ironic.common import exception
|
||||||
|
@ -55,6 +56,10 @@ class AgentClient(object):
|
||||||
})
|
})
|
||||||
|
|
||||||
@METRICS.timer('AgentClient._command')
|
@METRICS.timer('AgentClient._command')
|
||||||
|
@retrying.retry(
|
||||||
|
retry_on_exception=(
|
||||||
|
lambda e: isinstance(e, exception.AgentConnectionFailed)),
|
||||||
|
stop_max_attempt_number=CONF.agent.max_command_attempts)
|
||||||
def _command(self, node, method, params, wait=False):
|
def _command(self, node, method, params, wait=False):
|
||||||
"""Sends command to agent.
|
"""Sends command to agent.
|
||||||
|
|
||||||
|
@ -80,8 +85,9 @@ class AgentClient(object):
|
||||||
{'node': node.uuid, 'method': method})
|
{'node': node.uuid, 'method': method})
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self.session.post(url, params=request_params, data=body)
|
response = self.session.post(url, params=request_params, data=body,
|
||||||
except requests.ConnectionError as e:
|
timeout=CONF.agent.command_timeout)
|
||||||
|
except (requests.ConnectionError, requests.Timeout) as e:
|
||||||
msg = (_('Failed to connect to the agent running on node %(node)s '
|
msg = (_('Failed to connect to the agent running on node %(node)s '
|
||||||
'for invoking command %(method)s. Error: %(error)s') %
|
'for invoking command %(method)s. Error: %(error)s') %
|
||||||
{'node': node.uuid, 'method': method, 'error': e})
|
{'node': node.uuid, 'method': method, 'error': e})
|
||||||
|
|
|
@ -96,7 +96,8 @@ class TestAgentClient(base.TestCase):
|
||||||
self.client.session.post.assert_called_once_with(
|
self.client.session.post.assert_called_once_with(
|
||||||
url,
|
url,
|
||||||
data=body,
|
data=body,
|
||||||
params={'wait': 'false'})
|
params={'wait': 'false'},
|
||||||
|
timeout=60)
|
||||||
|
|
||||||
def test__command_fail_json(self):
|
def test__command_fail_json(self):
|
||||||
response_text = 'this be not json matey!'
|
response_text = 'this be not json matey!'
|
||||||
|
@ -114,7 +115,8 @@ class TestAgentClient(base.TestCase):
|
||||||
self.client.session.post.assert_called_once_with(
|
self.client.session.post.assert_called_once_with(
|
||||||
url,
|
url,
|
||||||
data=body,
|
data=body,
|
||||||
params={'wait': 'false'})
|
params={'wait': 'false'},
|
||||||
|
timeout=60)
|
||||||
|
|
||||||
def test__command_fail_post(self):
|
def test__command_fail_post(self):
|
||||||
error = 'Boom'
|
error = 'Boom'
|
||||||
|
@ -151,6 +153,68 @@ class TestAgentClient(base.TestCase):
|
||||||
{'method': method, 'node': self.node.uuid,
|
{'method': method, 'node': self.node.uuid,
|
||||||
'error': error}, str(e))
|
'error': error}, str(e))
|
||||||
|
|
||||||
|
def test__command_fail_all_attempts(self):
|
||||||
|
error = 'Connection Timeout'
|
||||||
|
method = 'standby.run_image'
|
||||||
|
image_info = {'image_id': 'test_image'}
|
||||||
|
params = {'image_info': image_info}
|
||||||
|
self.client.session.post.side_effect = [requests.Timeout(error),
|
||||||
|
requests.Timeout(error),
|
||||||
|
requests.Timeout(error),
|
||||||
|
requests.Timeout(error)]
|
||||||
|
self.client._get_command_url(self.node)
|
||||||
|
self.client._get_command_body(method, params)
|
||||||
|
|
||||||
|
e = self.assertRaises(exception.AgentConnectionFailed,
|
||||||
|
self.client._command,
|
||||||
|
self.node, method, params)
|
||||||
|
self.assertEqual('Connection to agent failed: Failed to connect to '
|
||||||
|
'the agent running on node %(node)s for invoking '
|
||||||
|
'command %(method)s. Error: %(error)s' %
|
||||||
|
{'method': method, 'node': self.node.uuid,
|
||||||
|
'error': error}, str(e))
|
||||||
|
self.assertEqual(3, self.client.session.post.call_count)
|
||||||
|
|
||||||
|
def test__command_succeed_after_two_timeouts(self):
|
||||||
|
error = 'Connection Timeout'
|
||||||
|
response_data = {'status': 'ok'}
|
||||||
|
response_text = json.dumps(response_data)
|
||||||
|
method = 'standby.run_image'
|
||||||
|
image_info = {'image_id': 'test_image'}
|
||||||
|
params = {'image_info': image_info}
|
||||||
|
self.client.session.post.side_effect = [requests.Timeout(error),
|
||||||
|
requests.Timeout(error),
|
||||||
|
MockResponse(response_text)]
|
||||||
|
|
||||||
|
response = self.client._command(self.node, method, params)
|
||||||
|
self.assertEqual(3, self.client.session.post.call_count)
|
||||||
|
self.assertEqual(response, response_data)
|
||||||
|
self.client.session.post.assert_called_with(
|
||||||
|
self.client._get_command_url(self.node),
|
||||||
|
data=self.client._get_command_body(method, params),
|
||||||
|
params={'wait': 'false'},
|
||||||
|
timeout=60)
|
||||||
|
|
||||||
|
def test__command_succeed_after_one_timeout(self):
|
||||||
|
error = 'Connection Timeout'
|
||||||
|
response_data = {'status': 'ok'}
|
||||||
|
response_text = json.dumps(response_data)
|
||||||
|
method = 'standby.run_image'
|
||||||
|
image_info = {'image_id': 'test_image'}
|
||||||
|
params = {'image_info': image_info}
|
||||||
|
self.client.session.post.side_effect = [requests.Timeout(error),
|
||||||
|
MockResponse(response_text),
|
||||||
|
requests.Timeout(error)]
|
||||||
|
|
||||||
|
response = self.client._command(self.node, method, params)
|
||||||
|
self.assertEqual(2, self.client.session.post.call_count)
|
||||||
|
self.assertEqual(response, response_data)
|
||||||
|
self.client.session.post.assert_called_with(
|
||||||
|
self.client._get_command_url(self.node),
|
||||||
|
data=self.client._get_command_body(method, params),
|
||||||
|
params={'wait': 'false'},
|
||||||
|
timeout=60)
|
||||||
|
|
||||||
def test__command_error_code(self):
|
def test__command_error_code(self):
|
||||||
response_text = '{"faultstring": "you dun goofd"}'
|
response_text = '{"faultstring": "you dun goofd"}'
|
||||||
self.client.session.post.return_value = MockResponse(
|
self.client.session.post.return_value = MockResponse(
|
||||||
|
@ -168,7 +232,8 @@ class TestAgentClient(base.TestCase):
|
||||||
self.client.session.post.assert_called_once_with(
|
self.client.session.post.assert_called_once_with(
|
||||||
url,
|
url,
|
||||||
data=body,
|
data=body,
|
||||||
params={'wait': 'false'})
|
params={'wait': 'false'},
|
||||||
|
timeout=60)
|
||||||
|
|
||||||
def test_get_commands_status(self):
|
def test_get_commands_status(self):
|
||||||
with mock.patch.object(self.client.session, 'get',
|
with mock.patch.object(self.client.session, 'get',
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Adds ``command_timeout`` and ``max_command_attempts`` configuration options
|
||||||
|
to IPA, so when connection errors occur the command will be executed again.
|
Loading…
Reference in New Issue