Add timeout and retries when connection to an image server
If the server is stuck for any reason, the download will hang for a potentially long time. Provide a timeout (defaults to 60 seconds) and 2 retries on failure. Change-Id: Ie53519266edd914fdbfa82fe52b4a55151e5ec5f
This commit is contained in:
parent
8a9df0b485
commit
8adb7e1a04
@ -239,6 +239,21 @@ cli_opts = [
|
||||
'enforce token validation. The configuration provided '
|
||||
'by the conductor MAY override this and force this '
|
||||
'setting to be changed to True in memory.'),
|
||||
cfg.IntOpt('image_download_connection_timeout', min=1,
|
||||
default=APARAMS.get(
|
||||
'ipa-image-download-connection-timeout', 60),
|
||||
help='The connection timeout (in seconds) when downloading '
|
||||
'an image. Does not affect the whole download.'),
|
||||
cfg.IntOpt('image_download_connection_retries', min=0,
|
||||
default=APARAMS.get('ipa-image-download-connection-retries', 2),
|
||||
help='How many times to retry the connection when downloading '
|
||||
'an image. Also retries on failure HTTP statuses.'),
|
||||
cfg.IntOpt('image_download_connection_retry_interval', min=0,
|
||||
default=APARAMS.get(
|
||||
'ipa-image-download-connection-retry-interval', 5),
|
||||
help='Interval (in seconds) between two attempts to establish '
|
||||
'connection when downloading an image.'),
|
||||
|
||||
]
|
||||
|
||||
CONF.register_cli_opts(cli_opts)
|
||||
|
@ -70,12 +70,28 @@ def _download_with_proxy(image_info, url, image_id):
|
||||
os.environ['no_proxy'] = no_proxy
|
||||
proxies = image_info.get('proxies', {})
|
||||
verify, cert = utils.get_ssl_client_options(CONF)
|
||||
resp = requests.get(url, stream=True, proxies=proxies,
|
||||
verify=verify, cert=cert)
|
||||
if resp.status_code != 200:
|
||||
msg = ('Received status code {} from {}, expected 200. Response '
|
||||
'body: {}').format(resp.status_code, url, resp.text)
|
||||
raise errors.ImageDownloadError(image_id, msg)
|
||||
resp = None
|
||||
for attempt in range(CONF.image_download_connection_retries + 1):
|
||||
try:
|
||||
resp = requests.get(url, stream=True, proxies=proxies,
|
||||
verify=verify, cert=cert,
|
||||
timeout=CONF.image_download_connection_timeout)
|
||||
if resp.status_code != 200:
|
||||
msg = ('Received status code {} from {}, expected 200. '
|
||||
'Response body: {}').format(resp.status_code, url,
|
||||
resp.text)
|
||||
raise errors.ImageDownloadError(image_id, msg)
|
||||
except (errors.ImageDownloadError, requests.RequestException) as e:
|
||||
if (attempt == CONF.image_download_connection_retries
|
||||
# NOTE(dtantsur): do not retry 4xx status codes
|
||||
or (resp and resp.status_code < 500)):
|
||||
raise
|
||||
else:
|
||||
LOG.warning('Unable to connect to %s, retrying. Error: %s',
|
||||
url, e)
|
||||
time.sleep(CONF.image_download_connection_retry_interval)
|
||||
else:
|
||||
break
|
||||
return resp
|
||||
|
||||
|
||||
|
@ -18,6 +18,7 @@ from unittest import mock
|
||||
|
||||
from ironic_lib import exception
|
||||
from oslo_concurrency import processutils
|
||||
import requests
|
||||
|
||||
from ironic_python_agent import errors
|
||||
from ironic_python_agent.extensions import standby
|
||||
@ -369,7 +370,8 @@ class TestStandbyExtension(base.IronicAgentTest):
|
||||
standby._download_image(image_info)
|
||||
requests_mock.assert_called_once_with(image_info['urls'][0],
|
||||
cert=None, verify=True,
|
||||
stream=True, proxies={})
|
||||
stream=True, proxies={},
|
||||
timeout=60)
|
||||
write = file_mock.write
|
||||
write.assert_any_call('some')
|
||||
write.assert_any_call('content')
|
||||
@ -400,7 +402,8 @@ class TestStandbyExtension(base.IronicAgentTest):
|
||||
self.assertEqual(no_proxy, os.environ['no_proxy'])
|
||||
requests_mock.assert_called_once_with(image_info['urls'][0],
|
||||
cert=None, verify=True,
|
||||
stream=True, proxies=proxies)
|
||||
stream=True, proxies=proxies,
|
||||
timeout=60)
|
||||
write = file_mock.write
|
||||
write.assert_any_call('some')
|
||||
write.assert_any_call('content')
|
||||
@ -1111,7 +1114,8 @@ class TestStandbyExtension(base.IronicAgentTest):
|
||||
'/dev/foo')
|
||||
requests_mock.assert_called_once_with(image_info['urls'][0],
|
||||
cert=None, verify=True,
|
||||
stream=True, proxies={})
|
||||
stream=True, proxies={},
|
||||
timeout=60)
|
||||
expected_calls = [mock.call('some'), mock.call('content')]
|
||||
file_mock.write.assert_has_calls(expected_calls)
|
||||
fix_gpt_mock.assert_called_once_with('/dev/foo', node_uuid=None)
|
||||
@ -1136,7 +1140,8 @@ class TestStandbyExtension(base.IronicAgentTest):
|
||||
image_info, '/dev/foo')
|
||||
requests_mock.assert_called_once_with(image_info['urls'][0],
|
||||
cert=None, verify=True,
|
||||
stream=True, proxies={})
|
||||
stream=True, proxies={},
|
||||
timeout=60)
|
||||
# Assert write was only called once and failed!
|
||||
file_mock.write.assert_called_once_with('some')
|
||||
|
||||
@ -1254,11 +1259,13 @@ class TestImageDownload(base.IronicAgentTest):
|
||||
self.assertEqual(content, list(image_download))
|
||||
requests_mock.assert_called_once_with(image_info['urls'][0],
|
||||
cert=None, verify=True,
|
||||
stream=True, proxies={})
|
||||
stream=True, proxies={},
|
||||
timeout=60)
|
||||
self.assertEqual(image_info['checksum'],
|
||||
image_download._hash_algo.hexdigest())
|
||||
|
||||
def test_download_image_fail(self, requests_mock, time_mock):
|
||||
@mock.patch('time.sleep', autospec=True)
|
||||
def test_download_image_fail(self, sleep_mock, requests_mock, time_mock):
|
||||
response = requests_mock.return_value
|
||||
response.status_code = 401
|
||||
response.text = 'Unauthorized'
|
||||
@ -1272,7 +1279,56 @@ class TestImageDownload(base.IronicAgentTest):
|
||||
standby.ImageDownload, image_info)
|
||||
requests_mock.assert_called_once_with(image_info['urls'][0],
|
||||
cert=None, verify=True,
|
||||
stream=True, proxies={})
|
||||
stream=True, proxies={},
|
||||
timeout=60)
|
||||
self.assertFalse(sleep_mock.called)
|
||||
|
||||
@mock.patch('time.sleep', autospec=True)
|
||||
def test_download_image_retries(self, sleep_mock, requests_mock,
|
||||
time_mock):
|
||||
response = requests_mock.return_value
|
||||
response.status_code = 500
|
||||
response.text = 'Oops'
|
||||
time_mock.return_value = 0.0
|
||||
image_info = _build_fake_image_info()
|
||||
msg = ('Error downloading image: Download of image fake_id failed: '
|
||||
'URL: http://example.org; time: .* seconds. Error: '
|
||||
'Received status code 500 from http://example.org, expected '
|
||||
'200. Response body: Oops')
|
||||
self.assertRaisesRegex(errors.ImageDownloadError, msg,
|
||||
standby.ImageDownload, image_info)
|
||||
requests_mock.assert_called_with(image_info['urls'][0],
|
||||
cert=None, verify=True,
|
||||
stream=True, proxies={},
|
||||
timeout=60)
|
||||
self.assertEqual(3, requests_mock.call_count)
|
||||
sleep_mock.assert_called_with(5)
|
||||
self.assertEqual(2, sleep_mock.call_count)
|
||||
|
||||
@mock.patch('time.sleep', autospec=True)
|
||||
def test_download_image_retries_success(self, sleep_mock, requests_mock,
|
||||
md5_mock):
|
||||
content = ['SpongeBob', 'SquarePants']
|
||||
fail_response = mock.Mock()
|
||||
fail_response.status_code = 500
|
||||
fail_response.text = " "
|
||||
response = mock.Mock()
|
||||
response.status_code = 200
|
||||
response.iter_content.return_value = content
|
||||
requests_mock.side_effect = [requests.Timeout, fail_response, response]
|
||||
|
||||
image_info = _build_fake_image_info()
|
||||
md5_mock.return_value.hexdigest.return_value = image_info['checksum']
|
||||
image_download = standby.ImageDownload(image_info)
|
||||
|
||||
self.assertEqual(content, list(image_download))
|
||||
requests_mock.assert_called_with(image_info['urls'][0],
|
||||
cert=None, verify=True,
|
||||
stream=True, proxies={},
|
||||
timeout=60)
|
||||
self.assertEqual(3, requests_mock.call_count)
|
||||
sleep_mock.assert_called_with(5)
|
||||
self.assertEqual(2, sleep_mock.call_count)
|
||||
|
||||
def test_download_image_and_checksum(self, requests_mock, md5_mock):
|
||||
content = ['SpongeBob', 'SquarePants']
|
||||
@ -1293,9 +1349,9 @@ class TestImageDownload(base.IronicAgentTest):
|
||||
self.assertEqual(content, list(image_download))
|
||||
requests_mock.assert_has_calls([
|
||||
mock.call('http://example.com/checksum', cert=None, verify=True,
|
||||
stream=True, proxies={}),
|
||||
stream=True, proxies={}, timeout=60),
|
||||
mock.call(image_info['urls'][0], cert=None, verify=True,
|
||||
stream=True, proxies={}),
|
||||
stream=True, proxies={}, timeout=60),
|
||||
])
|
||||
self.assertEqual(fake_cs, image_download._hash_algo.hexdigest())
|
||||
|
||||
@ -1323,9 +1379,9 @@ foobar irrelevant file.img
|
||||
self.assertEqual(content, list(image_download))
|
||||
requests_mock.assert_has_calls([
|
||||
mock.call('http://example.com/checksum', cert=None, verify=True,
|
||||
stream=True, proxies={}),
|
||||
stream=True, proxies={}, timeout=60),
|
||||
mock.call(image_info['urls'][0], cert=None, verify=True,
|
||||
stream=True, proxies={}),
|
||||
stream=True, proxies={}, timeout=60),
|
||||
])
|
||||
self.assertEqual(fake_cs, image_download._hash_algo.hexdigest())
|
||||
|
||||
@ -1378,7 +1434,9 @@ foobar irrelevant file.img
|
||||
response = mock.Mock()
|
||||
response.status_code = 200
|
||||
response.iter_content.return_value = content
|
||||
requests_mock.side_effect = [cs_response, response]
|
||||
# 3 retries on status code
|
||||
requests_mock.side_effect = [cs_response, cs_response, cs_response,
|
||||
response]
|
||||
|
||||
image_info = _build_fake_image_info(
|
||||
'http://example.com/path/image.img')
|
||||
|
@ -0,0 +1,11 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Provides timeout and retries when establishing a connection to download
|
||||
an image in the ``standby`` extension. Reduces probability of an image
|
||||
download getting stuck in the event of network problems.
|
||||
|
||||
The default timeout is 60 seconds and can be set via the
|
||||
``ipa-image-download-connection-timeout`` kernel parameter. The default
|
||||
number of retries is 2 and can be set via the
|
||||
``ipa-image-download-connection-retries`` parameter.
|
Loading…
Reference in New Issue
Block a user