From af0bf467a850d56e1f832688d5c1af35dd91f872 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Wed, 17 Jun 2020 14:51:03 -0700 Subject: [PATCH] Extend retries to 9, 10 seconds apart. The download retry interval was previously five seconds which is not long enough to recover after a hard network connectivity break where we may be reliant upon network port forwarding hold-down timers or even routing protocol route propogation to recover communication. Previously the time value was 5 seconds, with 3 attempts, meaning 15 seconds total ignoring the error detection timeouts. Now it is 10 seconds, with 10 attempts, meaning 100 seconds before the error detection timeouts. Change-Id: I6d11edc9a3156f2bdc21c3d432ecc7625d652699 (cherry picked from commit c77a7df851a511a2503da2b50004ca2f34b9be3a) --- ironic_python_agent/config.py | 4 ++-- ironic_python_agent/tests/unit/extensions/test_standby.py | 8 ++++++-- .../notes/extend-retry-timeout-30c930a33d97c193.yaml | 8 ++++++++ 3 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/extend-retry-timeout-30c930a33d97c193.yaml diff --git a/ironic_python_agent/config.py b/ironic_python_agent/config.py index c73e6fc96..84566d859 100644 --- a/ironic_python_agent/config.py +++ b/ironic_python_agent/config.py @@ -245,12 +245,12 @@ cli_opts = [ help='The connection timeout (in seconds) when downloading ' 'an image. Does not affect the whole download.'), cfg.IntOpt('image_download_connection_retries', min=0, - default=APARAMS.get('ipa-image-download-connection-retries', 2), + default=APARAMS.get('ipa-image-download-connection-retries', 9), help='How many times to retry the connection when downloading ' 'an image. Also retries on failure HTTP statuses.'), cfg.IntOpt('image_download_connection_retry_interval', min=0, default=APARAMS.get( - 'ipa-image-download-connection-retry-interval', 5), + 'ipa-image-download-connection-retry-interval', 10), help='Interval (in seconds) between two attempts to establish ' 'connection when downloading an image.'), diff --git a/ironic_python_agent/tests/unit/extensions/test_standby.py b/ironic_python_agent/tests/unit/extensions/test_standby.py index fe212a9a2..f39d32d6d 100644 --- a/ironic_python_agent/tests/unit/extensions/test_standby.py +++ b/ironic_python_agent/tests/unit/extensions/test_standby.py @@ -412,6 +412,7 @@ class TestStandbyExtension(base.IronicAgentTest): @mock.patch('requests.get', autospec=True) def test_download_image_bad_status(self, requests_mock): + self.config(image_download_connection_retry_interval=0) image_info = _build_fake_image_info() response = requests_mock.return_value response.status_code = 404 @@ -1201,6 +1202,7 @@ class TestStandbyExtension(base.IronicAgentTest): return self self.config(image_download_connection_timeout=1) + self.config(image_download_connection_retries=2) self.config(image_download_connection_retry_interval=0) image_info = _build_fake_image_info() file_mock = mock.Mock() @@ -1357,6 +1359,7 @@ class TestImageDownload(base.IronicAgentTest): @mock.patch('time.sleep', autospec=True) def test_download_image_retries(self, sleep_mock, requests_mock, time_mock): + self.config(image_download_connection_retries=2) response = requests_mock.return_value response.status_code = 500 response.text = 'Oops' @@ -1373,7 +1376,7 @@ class TestImageDownload(base.IronicAgentTest): stream=True, proxies={}, timeout=60) self.assertEqual(3, requests_mock.call_count) - sleep_mock.assert_called_with(5) + sleep_mock.assert_called_with(10) self.assertEqual(2, sleep_mock.call_count) @mock.patch('time.sleep', autospec=True) @@ -1398,7 +1401,7 @@ class TestImageDownload(base.IronicAgentTest): stream=True, proxies={}, timeout=60) self.assertEqual(3, requests_mock.call_count) - sleep_mock.assert_called_with(5) + sleep_mock.assert_called_with(10) self.assertEqual(2, sleep_mock.call_count) def test_download_image_and_checksum(self, requests_mock, md5_mock): @@ -1498,6 +1501,7 @@ foobar irrelevant file.img standby.ImageDownload, image_info) def test_download_image_and_checksum_failed(self, requests_mock, md5_mock): + self.config(image_download_connection_retry_interval=0) content = ['SpongeBob', 'SquarePants'] cs_response = mock.Mock() cs_response.status_code = 400 diff --git a/releasenotes/notes/extend-retry-timeout-30c930a33d97c193.yaml b/releasenotes/notes/extend-retry-timeout-30c930a33d97c193.yaml new file mode 100644 index 000000000..0870eb6b9 --- /dev/null +++ b/releasenotes/notes/extend-retry-timeout-30c930a33d97c193.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Fixes the short timeout retries interval, which was previously ``5`` + seconds, to a length that will allow the agent to retry after a + network interruption. The time between retries is now ``10`` seconds, + and the number of retries are set to ``9`` to help ensure intermittent + network outages do not cause recoverable failures.