Extend retries to 9, 10 seconds apart.

The download retry interval was previously five seconds which is
not long enough to recover after a hard network connectivity break
where we may be reliant upon network port forwarding hold-down
timers or even routing protocol route propogation to recover
communication.

Previously the time value was 5 seconds, with 3 attempts, meaning
15 seconds total ignoring the error detection timeouts.

Now it is 10 seconds, with 10 attempts, meaning 100 seconds before
the error detection timeouts.

Change-Id: I6d11edc9a3156f2bdc21c3d432ecc7625d652699
This commit is contained in:
Julia Kreger 2020-06-17 14:51:03 -07:00
parent 159ab9f0ce
commit c77a7df851
3 changed files with 16 additions and 4 deletions

View File

@ -245,12 +245,12 @@ cli_opts = [
help='The connection timeout (in seconds) when downloading '
'an image. Does not affect the whole download.'),
cfg.IntOpt('image_download_connection_retries', min=0,
default=APARAMS.get('ipa-image-download-connection-retries', 2),
default=APARAMS.get('ipa-image-download-connection-retries', 9),
help='How many times to retry the connection when downloading '
'an image. Also retries on failure HTTP statuses.'),
cfg.IntOpt('image_download_connection_retry_interval', min=0,
default=APARAMS.get(
'ipa-image-download-connection-retry-interval', 5),
'ipa-image-download-connection-retry-interval', 10),
help='Interval (in seconds) between two attempts to establish '
'connection when downloading an image.'),

View File

@ -412,6 +412,7 @@ class TestStandbyExtension(base.IronicAgentTest):
@mock.patch('requests.get', autospec=True)
def test_download_image_bad_status(self, requests_mock):
self.config(image_download_connection_retry_interval=0)
image_info = _build_fake_image_info()
response = requests_mock.return_value
response.status_code = 404
@ -1248,6 +1249,7 @@ class TestStandbyExtension(base.IronicAgentTest):
return self
self.config(image_download_connection_timeout=1)
self.config(image_download_connection_retries=2)
self.config(image_download_connection_retry_interval=0)
image_info = _build_fake_image_info()
file_mock = mock.Mock()
@ -1395,6 +1397,7 @@ class TestImageDownload(base.IronicAgentTest):
@mock.patch('time.sleep', autospec=True)
def test_download_image_retries(self, sleep_mock, requests_mock,
time_mock):
self.config(image_download_connection_retries=2)
response = requests_mock.return_value
response.status_code = 500
response.text = 'Oops'
@ -1411,7 +1414,7 @@ class TestImageDownload(base.IronicAgentTest):
stream=True, proxies={},
timeout=60)
self.assertEqual(3, requests_mock.call_count)
sleep_mock.assert_called_with(5)
sleep_mock.assert_called_with(10)
self.assertEqual(2, sleep_mock.call_count)
@mock.patch('time.sleep', autospec=True)
@ -1436,7 +1439,7 @@ class TestImageDownload(base.IronicAgentTest):
stream=True, proxies={},
timeout=60)
self.assertEqual(3, requests_mock.call_count)
sleep_mock.assert_called_with(5)
sleep_mock.assert_called_with(10)
self.assertEqual(2, sleep_mock.call_count)
def test_download_image_and_checksum(self, requests_mock, md5_mock):
@ -1536,6 +1539,7 @@ foobar irrelevant file.img
standby.ImageDownload, image_info)
def test_download_image_and_checksum_failed(self, requests_mock, md5_mock):
self.config(image_download_connection_retry_interval=0)
content = ['SpongeBob', 'SquarePants']
cs_response = mock.Mock()
cs_response.status_code = 400

View File

@ -0,0 +1,8 @@
---
fixes:
- |
Fixes the short timeout retries interval, which was previously ``5``
seconds, to a length that will allow the agent to retry after a
network interruption. The time between retries is now ``10`` seconds,
and the number of retries are set to ``9`` to help ensure intermittent
network outages do not cause recoverable failures.