Hard stop on image download duration threshold

Adds a wall timeout `image_download_max_timeout` to enforce an upper
bound on total download duration.

While the per-chunk timeout protects against stalled reads, downloads
that trickle in just under the timeout threshold (e.g., due to heavy
TCP retransmits) can hang for longer than intended.

Now, if the total allowed time is exceeded, the download is aborted with
a non-retryable `ImageDownloadTimeoutError` regardless of per-chunk
retry or connection success.

A value of 0 (the default) disables this feature.

Closes-Bug: #2115995
Change-Id: I3b56d21abae0488853bfed14072ba21116d47baf
Signed-off-by: Afonne-CID <afonnepaulc@gmail.com>
This commit is contained in:
Afonne-CID
2025-07-07 14:29:30 +01:00
parent b51cc75ff3
commit e1a31eb97a
5 changed files with 69 additions and 0 deletions

View File

@@ -306,6 +306,13 @@ cli_opts = [
'ipa-image-download-connection-retry-interval', 10),
help='Interval (in seconds) between two attempts to establish '
'connection when downloading an image.'),
cfg.IntOpt('image_download_max_duration', min=0,
default=int(APARAMS.get(
'ipa-image-download-max-duration', 0)),
help='Maximum total duration (in seconds) allowed for '
'downloading an image. If the download exceeds this '
'duration, it will be aborted regardless of retry or '
'connection success.'),
cfg.StrOpt('ironic_api_version',
default=APARAMS.get('ipa-ironic-api-version', None),
help='Ironic API version in format "x.x". If not set, the API '

View File

@@ -156,6 +156,19 @@ class ImageDownloadError(RESTError):
super(ImageDownloadError, self).__init__(details)
class ImageDownloadTimeoutError(RESTError):
"""Raised when an image download operation exceeds its allowed time limit.
"""
status_code = 408
message = 'Image download timeout'
def __init__(self, image_id, msg):
details = 'Download of image {} failed: {}'.format(image_id, msg)
self.secondary_message = msg
super(ImageDownloadTimeoutError, self).__init__(details)
class ImageDownloadOutofSpaceError(ImageDownloadError):
"""Raised when an image download fails due to insufficient storage."""
pass

View File

@@ -641,7 +641,21 @@ class ImageDownload(object):
which is a constant in this module.
"""
self._last_chunk_time = None
start_time = self._time
for chunk in self._request.iter_content(IMAGE_CHUNK_SIZE):
max_download_duration = CONF.image_download_max_duration
if max_download_duration:
elapsed = time.time() - start_time
if elapsed > max_download_duration:
LOG.error('Total download timeout (%s seconds) exceeded',
max_download_duration)
raise errors.ImageDownloadTimeoutError(
self._image_info['id'],
'Download exceeded max allowed time (%s seconds)' %
max_download_duration)
# Per requests forum posts/discussions, iter_content should
# periodically yield to the caller for the client to do things
# like stopwatch and potentially interrupt the download.

View File

@@ -2214,6 +2214,31 @@ class TestImageDownload(base.IronicAgentTest):
sleep_mock.assert_called_with(10)
self.assertEqual(2, sleep_mock.call_count)
@mock.patch('time.time', autospec=True)
def test_download_image_exceeds_max_duration(self, time_mock,
requests_mock, hash_mock):
CONF.set_override('image_download_max_duration', 5)
image_info = _build_fake_image_info()
hash_mock.return_value.hexdigest.return_value = image_info[
'os_hash_value']
content = ['a'] * 10
# simulating time passing with each chunk downloaded
time_mock.side_effect = list(range(11))
response = requests_mock.return_value
response.status_code = 200
response.iter_content.return_value = content
image_download = standby.ImageDownload(image_info)
with self.assertRaisesRegex(errors.ImageDownloadTimeoutError,
'Download exceeded max allowed time'):
# Iterating triggers the timeout logic inside __iter__()
list(image_download)
@mock.patch('time.sleep', autospec=True)
def test_download_image_no_space_error_fatal(self, sleep_mock,
requests_mock, hash_mock):

View File

@@ -0,0 +1,10 @@
---
features:
- |
Adds a new configuration option `[DEFAULT]image_download_max_duration`,
which enforces a maximum total duration (in seconds) allowed for
downloading an image. If this threshold is exceeded, the download is
aborted with a non-retryable `ImageDownloadTimeoutError` exception.
A value of 0 (the default) disables this feature.