From 82ee97d471caa809572383597107b0f796733810 Mon Sep 17 00:00:00 2001 From: Jay Faulkner Date: Tue, 21 Oct 2025 13:19:08 -0700 Subject: [PATCH] Improve image download performance With the recent threading changes, there have been multiple reports of conductor threads timing out heartbeats due to being overly busy. It appears this is somewhat due to unoptimized image download code. This change adds two major optimizations that will help with this performance issues: - Streaming checksums when getting images -- instead of doing a checksum at the last step, instead, we do them iteratively during download. - Additionally, we stream using larger chunks -- 1 MB -- instead of the default of 128 bytes in order to achieve greater performance through less context switching. Assisted-by: Claude-code 2.0 Closes-bug: https://bugs.launchpad.net/ironic/+bug/2129260 Signed-off-by: Jay Faulkner Change-Id: I0e9b5ca6aa24b268f73eddbc796b096859e76a4b (cherry picked from commit c3418b15a4023b31f7d2564c6ba954230ad3dc91) --- ironic/common/checksum_utils.py | 7 +- ironic/common/glance_service/image_service.py | 73 +++++++++-- ironic/common/image_service.py | 85 +++++++++++-- ironic/common/images.py | 16 ++- .../tests/unit/common/test_image_service.py | 115 +++++++----------- ironic/tests/unit/common/test_images.py | 14 +-- ironic/tests/unit/stubs.py | 18 ++- ...download-performance-0bf1af5556c1adbf.yaml | 9 ++ 8 files changed, 235 insertions(+), 102 deletions(-) create mode 100644 releasenotes/notes/image-download-performance-0bf1af5556c1adbf.yaml diff --git a/ironic/common/checksum_utils.py b/ironic/common/checksum_utils.py index e9ce8da52a..4655b78f34 100644 --- a/ironic/common/checksum_utils.py +++ b/ironic/common/checksum_utils.py @@ -272,7 +272,8 @@ def get_checksum_from_url(checksum, image_source): class TransferHelper(object): - def __init__(self, response, checksum_algo, expected_checksum): + def __init__(self, response, checksum_algo, expected_checksum, + chunk_size=1024 * 1024): """Helper class to drive data download with concurrent checksum. The TransferHelper can be used to help retrieve data from a @@ -284,6 +285,8 @@ class TransferHelper(object): :param checksum_algo: The expected checksum algorithm. :param expected_checksum: The expected checksum of the data being transferred. + :param chunk_size: Size of chunks to read during transfer. + Defaults to 1MB. """ # NOTE(TheJulia): Similar code exists in IPA in regards to @@ -297,7 +300,7 @@ class TransferHelper(object): # This may artificially throttle transfer speeds a little in # high performance environments as the data may get held up # in the kernel limiting the window from scaling. - self._chunk_size = 1024 * 1024 # 1MB + self._chunk_size = chunk_size self._last_check_time = time.time() self._request = response self._bytes_transferred = 0 diff --git a/ironic/common/glance_service/image_service.py b/ironic/common/glance_service/image_service.py index 9b50d4d4d2..a320872361 100644 --- a/ironic/common/glance_service/image_service.py +++ b/ironic/common/glance_service/image_service.py @@ -29,6 +29,7 @@ from oslo_log import log from oslo_utils import uuidutils import tenacity +from ironic.common import checksum_utils from ironic.common import exception from ironic.common.glance_service import service_utils from ironic.common.i18n import _ @@ -44,6 +45,15 @@ TempUrlCacheElement = collections.namedtuple('TempUrlCacheElement', LOG = log.getLogger(__name__) _GLANCE_SESSION = None +# Chunk size for streaming image downloads. This MUST be specified explicitly +# when iterating over a requests.Response object. The default chunk size when +# iterating directly over a Response (i.e., `for chunk in resp:`) is only +# 128 bytes, which for a 40GB image results in ~335 million iterations. +# This causes severe CPU starvation and memory pressure, leading to conductor +# heartbeat failures and system unresponsiveness. +# See: https://bugs.launchpad.net/ironic/+bug/2129260 +IMAGE_CHUNK_SIZE = 1024 * 1024 # 1 MiB + def _translate_image_exception(image_id, exc_value): if isinstance(exc_value, (openstack_exc.ForbiddenException)): @@ -114,6 +124,7 @@ class GlanceImageService(object): self.client = client self.context = context self.endpoint = None + self._transfer_verified_checksum = None @tenacity.retry( retry=tenacity.retry_if_exception_type( @@ -179,14 +190,22 @@ class GlanceImageService(object): return base_image_meta @check_image_service - def download(self, image_href, data=None): + def download(self, image_href, data=None, checksum=None, + checksum_algo=None): """Calls out to Glance for data and writes data. :param image_href: The opaque image identifier. :param data: (Optional) File object to write data to. + :param checksum: Expected checksum value for validation during + transfer. + :param checksum_algo: Algorithm for checksum (e.g., 'md5', + 'sha256'). """ image_id = service_utils.parse_image_id(image_href) + # Reset transfer checksum for new download + self._transfer_verified_checksum = None + if 'file' in CONF.glance.allowed_direct_url_schemes: location = self._get_location(image_id) url = urlparse.urlparse(location) @@ -199,10 +218,50 @@ class GlanceImageService(object): image_size = 0 image_data = None if data: - image_chunks = self.call('download_image', image_id, stream=True) - for chunk in image_chunks: - data.write(chunk) - image_size += len(chunk) + # Get streaming response from glance + resp = self.call('download_image', image_id, stream=True) + + # If checksum validation is requested, use TransferHelper + # to calculate checksum during download + if checksum and checksum_algo: + try: + download_helper = checksum_utils.TransferHelper( + resp, checksum_algo, checksum, + chunk_size=IMAGE_CHUNK_SIZE) + for chunk in download_helper: + data.write(chunk) + image_size += len(chunk) + + # Verify checksum matches + if download_helper.checksum_matches: + # Store verified checksum in algo:value format + self._transfer_verified_checksum = ( + f"{checksum_algo}:{checksum}") + LOG.debug("Verified checksum during download of image " + "%(image)s: %(algo)s:%(checksum)s", + {'image': image_href, 'algo': checksum_algo, + 'checksum': checksum}) + else: + raise exception.ImageChecksumError() + except (exception.ImageChecksumAlgorithmFailure, ValueError): + # Fall back to download without checksumming + LOG.warning("Checksum algorithm %(algo)s not available, " + "downloading without incremental checksum for " + "image %(image)s", + {'algo': checksum_algo, 'image': image_href}) + # NOTE(JayF): Must use iter_content with explicit chunk + # size. See IMAGE_CHUNK_SIZE definition for details. + for chunk in resp.iter_content( + chunk_size=IMAGE_CHUNK_SIZE): + data.write(chunk) + image_size += len(chunk) + else: + # No checksum requested, just stream and write. + # NOTE(JayF): Must use iter_content with explicit chunk + # size. See IMAGE_CHUNK_SIZE definition for details. + for chunk in resp.iter_content(chunk_size=IMAGE_CHUNK_SIZE): + data.write(chunk) + image_size += len(chunk) else: image_data = self.call('download_image', image_id).content image_size = len(image_data) @@ -441,6 +500,4 @@ class GlanceImageService(object): @property def transfer_verified_checksum(self): """The transferred artifact checksum.""" - # FIXME(TheJulia): We should look at and see if we wire - # this up in a future change. - return None + return self._transfer_verified_checksum diff --git a/ironic/common/image_service.py b/ironic/common/image_service.py index 4b90bc945e..4dd40408b5 100644 --- a/ironic/common/image_service.py +++ b/ironic/common/image_service.py @@ -28,6 +28,7 @@ from oslo_utils import strutils from oslo_utils import uuidutils import requests +from ironic.common import checksum_utils from ironic.common import exception from ironic.common.glance_service.image_service import GlanceImageService from ironic.common.i18n import _ @@ -57,11 +58,16 @@ class BaseImageService(object, metaclass=abc.ABCMeta): """ @abc.abstractmethod - def download(self, image_href, image_file): + def download(self, image_href, image_file, checksum=None, + checksum_algo=None): """Downloads image to specified location. :param image_href: Image reference. :param image_file: File object to write data to. + :param checksum: Expected checksum value for validation during + transfer. + :param checksum_algo: Algorithm for checksum (e.g., 'md5', + 'sha256'). :raises: exception.ImageRefValidationFailed. :raises: exception.ImageDownloadFailed. """ @@ -91,6 +97,14 @@ class BaseImageService(object, metaclass=abc.ABCMeta): class HttpImageService(BaseImageService): """Provides retrieval of disk images using HTTP.""" + def __init__(self): + self._transfer_verified_checksum = None + + @property + def transfer_verified_checksum(self): + """The transferred artifact checksum.""" + return self._transfer_verified_checksum + @staticmethod def gen_auth_from_conf_user_pass(image_href): """This function is used to pass the credentials to the chosen @@ -197,20 +211,27 @@ class HttpImageService(BaseImageService): reason=str(e)) return response - def download(self, image_href, image_file): + def download(self, image_href, image_file, checksum=None, + checksum_algo=None): """Downloads image to specified location. :param image_href: Image reference. :param image_file: File object to write data to. + :param checksum: Expected checksum value for validation during + transfer. + :param checksum_algo: Algorithm for checksum (e.g., 'md5', + 'sha256'). :raises: exception.ImageRefValidationFailed if GET request returned response code not equal to 200. :raises: exception.ImageDownloadFailed if: * IOError happened during file write; * GET request failed. + :raises: exception.ImageChecksumError if checksum validation fails. """ + # Reset transfer checksum for new download + self._transfer_verified_checksum = None try: - verify = strutils.bool_from_string(CONF.webserver_verify_ca, strict=True) except ValueError: @@ -227,8 +248,45 @@ class HttpImageService(BaseImageService): reason=_("Got HTTP code %s instead of 200 in response " "to GET request.") % response.status_code) - with response.raw as input_img: - shutil.copyfileobj(input_img, image_file, IMAGE_CHUNK_SIZE) + # If checksum validation is requested, use TransferHelper + # to calculate checksum during download + if checksum and checksum_algo: + try: + download_helper = checksum_utils.TransferHelper( + response, checksum_algo, checksum, + chunk_size=IMAGE_CHUNK_SIZE) + for chunk in download_helper: + image_file.write(chunk) + + # Verify checksum matches + if download_helper.checksum_matches: + # Store verified checksum in algo:value format + self._transfer_verified_checksum = ( + f"{checksum_algo}:{checksum}") + LOG.debug("Verified checksum during download of image " + "%(image)s: %(algo)s:%(checksum)s", + {'image': image_href, 'algo': checksum_algo, + 'checksum': checksum}) + else: + raise exception.ImageChecksumError() + except (exception.ImageChecksumAlgorithmFailure, ValueError): + # Fall back to download without checksumming + LOG.warning("Checksum algorithm %(algo)s not available, " + "downloading without incremental checksum for " + "image %(image)s", + {'algo': checksum_algo, 'image': image_href}) + # NOTE(JayF): Must use iter_content with explicit chunk + # size. See IMAGE_CHUNK_SIZE definition for details. + for chunk in response.iter_content( + chunk_size=IMAGE_CHUNK_SIZE): + image_file.write(chunk) + else: + # No checksum requested, just stream and write. + # NOTE(JayF): Must use iter_content with explicit chunk + # size. See IMAGE_CHUNK_SIZE definition for details. + for chunk in response.iter_content( + chunk_size=IMAGE_CHUNK_SIZE): + image_file.write(chunk) except (OSError, requests.ConnectionError, requests.RequestException, IOError) as e: @@ -424,11 +482,18 @@ class OciImageService(BaseImageService): return self.show(image_href) - def download(self, image_href, image_file): + def download(self, image_href, image_file, checksum=None, + checksum_algo=None): """Downloads image to specified location. :param image_href: Image reference. :param image_file: File object to write data to. + :param checksum: Expected checksum value for validation during + transfer. (Currently unused, OCI validates via + manifest digest.) + :param checksum_algo: Algorithm for checksum (e.g., 'md5', + 'sha256'). (Currently unused, OCI validates + via manifest digest.) :raises: exception.ImageRefValidationFailed. :raises: exception.ImageDownloadFailed. :raises: exception.OciImageNotSpecific. @@ -855,11 +920,17 @@ class FileImageService(BaseImageService): return image_path - def download(self, image_href, image_file): + def download(self, image_href, image_file, checksum=None, + checksum_algo=None): """Downloads image to specified location. :param image_href: Image reference. :param image_file: File object to write data to. + :param checksum: Expected checksum value for validation during + transfer. (Currently unused, for API compatibility.) + :param checksum_algo: Algorithm for checksum (e.g., 'md5', + 'sha256'). (Currently unused, for API + compatibility.) :raises: exception.ImageRefValidationFailed if source image file doesn't exist. :raises: exception.ImageDownloadFailed if exceptions were raised while diff --git a/ironic/common/images.py b/ironic/common/images.py index 50cf0458da..a7e1ae729e 100644 --- a/ironic/common/images.py +++ b/ironic/common/images.py @@ -365,7 +365,7 @@ def create_esp_image_for_uefi( def fetch_into(context, image_href, image_file, - image_auth_data=None): + image_auth_data=None, checksum=None, checksum_algo=None): """Fetches image file contents into a file. :param context: A context object. @@ -375,6 +375,8 @@ def fetch_into(context, image_href, image_file, :param image_auth_data: Optional dictionary for credentials to be conveyed from the original task to the image download process, if required. + :param checksum: Expected checksum value for validation during transfer. + :param checksum_algo: Algorithm for checksum (e.g., 'md5', 'sha256'). :returns: If a value is returned, that value was validated as the checksum. Otherwise None indicating the process had been completed. """ @@ -399,9 +401,13 @@ def fetch_into(context, image_href, image_file, if isinstance(image_file, str): with open(image_file, "wb") as image_file_obj: - image_service.download(image_href, image_file_obj) + image_service.download(image_href, image_file_obj, + checksum=checksum, + checksum_algo=checksum_algo) else: - image_service.download(image_href, image_file) + image_service.download(image_href, image_file, + checksum=checksum, + checksum_algo=checksum_algo) LOG.debug("Image %(image_href)s downloaded in %(time).2f seconds.", {'image_href': image_href, 'time': time.time() - start}) @@ -450,7 +456,9 @@ def fetch(context, image_href, path, force_raw=False, image_auth_data=None): with fileutils.remove_path_on_error(path): transfer_checksum = fetch_into(context, image_href, path, - image_auth_data) + image_auth_data, + checksum=checksum, + checksum_algo=checksum_algo) if (not transfer_checksum and not CONF.conductor.disable_file_checksum and checksum): diff --git a/ironic/tests/unit/common/test_image_service.py b/ironic/tests/unit/common/test_image_service.py index e7dc3f09f1..c81f5a8005 100644 --- a/ironic/tests/unit/common/test_image_service.py +++ b/ironic/tests/unit/common/test_image_service.py @@ -377,45 +377,39 @@ class HttpImageServiceTestCase(base.TestCase): timeout=60, auth=None, allow_redirects=True) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_success_http_scheme(self, req_get_mock, shutil_mock): + def test_download_success_http_scheme(self, req_get_mock): self.href = 'http://127.0.0.1:12345/fedora.qcow2' response_mock = req_get_mock.return_value response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) + response_mock.iter_content.return_value = [b'chunk1', b'chunk2'] file_mock = mock.Mock(spec=io.BytesIO) self.service.download(self.href, file_mock) - shutil_mock.assert_called_once_with( - response_mock.raw.__enter__(), file_mock, - image_service.IMAGE_CHUNK_SIZE - ) + response_mock.iter_content.assert_called_once_with( + chunk_size=image_service.IMAGE_CHUNK_SIZE) + self.assertEqual(2, file_mock.write.call_count) req_get_mock.assert_called_once_with(self.href, stream=True, verify=True, timeout=60, auth=None) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_success_verify_false( - self, req_get_mock, shutil_mock): + def test_download_success_verify_false(self, req_get_mock): cfg.CONF.set_override('webserver_verify_ca', 'False') response_mock = req_get_mock.return_value response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) + response_mock.iter_content.return_value = [b'chunk1', b'chunk2'] file_mock = mock.Mock(spec=io.BytesIO) self.service.download(self.href, file_mock) - shutil_mock.assert_called_once_with( - response_mock.raw.__enter__(), file_mock, - image_service.IMAGE_CHUNK_SIZE - ) + response_mock.iter_content.assert_called_once_with( + chunk_size=image_service.IMAGE_CHUNK_SIZE) + self.assertEqual(2, file_mock.write.call_count) req_get_mock.assert_called_once_with(self.href, stream=True, verify=False, timeout=60, auth=None) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) def test_download_success_verify_false_basic_auth_sucess( - self, req_get_mock, shutil_mock): + self, req_get_mock): cfg.CONF.set_override('webserver_verify_ca', 'False') cfg.CONF.set_override('image_server_auth_strategy', 'http_basic', @@ -427,13 +421,12 @@ class HttpImageServiceTestCase(base.TestCase): auth_creds = requests.auth.HTTPBasicAuth(user, password) response_mock = req_get_mock.return_value response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) + response_mock.iter_content.return_value = [b'chunk1', b'chunk2'] file_mock = mock.Mock(spec=io.BytesIO) self.service.download(self.href, file_mock) - shutil_mock.assert_called_once_with( - response_mock.raw.__enter__(), file_mock, - image_service.IMAGE_CHUNK_SIZE - ) + response_mock.iter_content.assert_called_once_with( + chunk_size=image_service.IMAGE_CHUNK_SIZE) + self.assertEqual(2, file_mock.write.call_count) req_get_mock.assert_called_once_with(self.href, stream=True, verify=False, timeout=60, auth=auth_creds) @@ -447,76 +440,61 @@ class HttpImageServiceTestCase(base.TestCase): self.assertRaises(exception.ImageRefValidationFailed, self.service.download, self.href, file_mock) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_success_verify_true( - self, req_get_mock, shutil_mock): + def test_download_success_verify_true(self, req_get_mock): cfg.CONF.set_override('webserver_verify_ca', 'True') response_mock = req_get_mock.return_value response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) + response_mock.iter_content.return_value = [b'chunk1', b'chunk2'] file_mock = mock.Mock(spec=io.BytesIO) self.service.download(self.href, file_mock) - shutil_mock.assert_called_once_with( - response_mock.raw.__enter__(), file_mock, - image_service.IMAGE_CHUNK_SIZE - ) + response_mock.iter_content.assert_called_once_with( + chunk_size=image_service.IMAGE_CHUNK_SIZE) + self.assertEqual(2, file_mock.write.call_count) req_get_mock.assert_called_once_with(self.href, stream=True, verify=True, timeout=60, auth=None) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_success_verify_path( - self, req_get_mock, shutil_mock): + def test_download_success_verify_path(self, req_get_mock): cfg.CONF.set_override('webserver_verify_ca', '/some/path') response_mock = req_get_mock.return_value response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) + response_mock.iter_content.return_value = [b'chunk1', b'chunk2'] file_mock = mock.Mock(spec=io.BytesIO) self.service.download(self.href, file_mock) - shutil_mock.assert_called_once_with( - response_mock.raw.__enter__(), file_mock, - image_service.IMAGE_CHUNK_SIZE - ) + response_mock.iter_content.assert_called_once_with( + chunk_size=image_service.IMAGE_CHUNK_SIZE) + self.assertEqual(2, file_mock.write.call_count) req_get_mock.assert_called_once_with(self.href, stream=True, verify='/some/path', timeout=60, auth=None) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_fail_verify_false_connerror( - self, req_get_mock, shutil_mock): + def test_download_fail_verify_false_connerror(self, req_get_mock): cfg.CONF.set_override('webserver_verify_ca', False) req_get_mock.side_effect = requests.ConnectionError() file_mock = mock.Mock(spec=io.BytesIO) self.assertRaises(exception.ImageDownloadFailed, self.service.download, self.href, file_mock) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_fail_verify_false_ioerror( - self, req_get_mock, shutil_mock): + def test_download_fail_verify_false_ioerror(self, req_get_mock): cfg.CONF.set_override('webserver_verify_ca', False) response_mock = req_get_mock.return_value response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) + response_mock.iter_content.return_value = [b'chunk1', b'chunk2'] file_mock = mock.Mock(spec=io.BytesIO) - shutil_mock.side_effect = IOError + file_mock.write.side_effect = IOError self.assertRaises(exception.ImageDownloadFailed, self.service.download, self.href, file_mock) req_get_mock.assert_called_once_with(self.href, stream=True, verify=False, timeout=60, auth=None) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_success_verify_true_connerror( - self, req_get_mock, shutil_mock): + def test_download_success_verify_true_connerror(self, req_get_mock): cfg.CONF.set_override('webserver_verify_ca', '/some/path') - response_mock = mock.Mock() - response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) req_get_mock.side_effect = requests.ConnectionError file_mock = mock.Mock(spec=io.BytesIO) @@ -526,52 +504,45 @@ class HttpImageServiceTestCase(base.TestCase): verify='/some/path', timeout=60, auth=None) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_fail_verify_true_ioerror( - self, req_get_mock, shutil_mock): + def test_download_fail_verify_true_ioerror(self, req_get_mock): cfg.CONF.set_override('webserver_verify_ca', '/some/path') response_mock = req_get_mock.return_value response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) + response_mock.iter_content.return_value = [b'chunk1', b'chunk2'] file_mock = mock.Mock(spec=io.BytesIO) - shutil_mock.side_effect = IOError + file_mock.write.side_effect = IOError self.assertRaises(exception.ImageDownloadFailed, self.service.download, self.href, file_mock) req_get_mock.assert_called_once_with(self.href, stream=True, verify='/some/path', timeout=60, auth=None) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_fail_verify_true_oserror( - self, req_get_mock, shutil_mock): + def test_download_fail_verify_true_oserror(self, req_get_mock): cfg.CONF.set_override('webserver_verify_ca', '/some/path') response_mock = req_get_mock.return_value response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) + response_mock.iter_content.return_value = [b'chunk1', b'chunk2'] file_mock = mock.Mock(spec=io.BytesIO) - shutil_mock.side_effect = OSError() + file_mock.write.side_effect = OSError() self.assertRaises(exception.ImageDownloadFailed, self.service.download, self.href, file_mock) req_get_mock.assert_called_once_with(self.href, stream=True, verify='/some/path', timeout=60, auth=None) - @mock.patch.object(shutil, 'copyfileobj', autospec=True) @mock.patch.object(requests, 'get', autospec=True) - def test_download_success_custom_timeout( - self, req_get_mock, shutil_mock): + def test_download_success_custom_timeout(self, req_get_mock): cfg.CONF.set_override('webserver_connection_timeout', 15) response_mock = req_get_mock.return_value response_mock.status_code = http_client.OK - response_mock.raw = mock.MagicMock(spec=io.BytesIO) + response_mock.iter_content.return_value = [b'chunk1', b'chunk2'] file_mock = mock.Mock(spec=io.BytesIO) self.service.download(self.href, file_mock) - shutil_mock.assert_called_once_with( - response_mock.raw.__enter__(), file_mock, - image_service.IMAGE_CHUNK_SIZE - ) + response_mock.iter_content.assert_called_once_with( + chunk_size=image_service.IMAGE_CHUNK_SIZE) + self.assertEqual(2, file_mock.write.call_count) req_get_mock.assert_called_once_with(self.href, stream=True, verify=True, timeout=15, auth=None) @@ -1274,14 +1245,14 @@ class ServiceGetterTestCase(base.TestCase): def test_get_http_image_service(self, http_service_mock): image_href = 'http://127.0.0.1/image.qcow2' image_service.get_image_service(image_href) - http_service_mock.assert_called_once_with() + http_service_mock.assert_called_once() @mock.patch.object(image_service.HttpImageService, '__init__', return_value=None, autospec=True) def test_get_https_image_service(self, http_service_mock): image_href = 'https://127.0.0.1/image.qcow2' image_service.get_image_service(image_href) - http_service_mock.assert_called_once_with() + http_service_mock.assert_called_once() @mock.patch.object(image_service.FileImageService, '__init__', return_value=None, autospec=True) diff --git a/ironic/tests/unit/common/test_images.py b/ironic/tests/unit/common/test_images.py index dd30a9e610..eb9d8015a4 100644 --- a/ironic/tests/unit/common/test_images.py +++ b/ironic/tests/unit/common/test_images.py @@ -57,7 +57,7 @@ class IronicImagesTestCase(base.TestCase): image_service_mock.assert_called_once_with('image_href', context='context') image_service_mock.return_value.download.assert_called_once_with( - 'image_href', 'file') + 'image_href', 'file', checksum=None, checksum_algo=None) mock_zstd.assert_called_once_with('path') @mock.patch.object(images, '_handle_zstd_compression', autospec=True) @@ -76,7 +76,7 @@ class IronicImagesTestCase(base.TestCase): open_mock.assert_called_once_with('path', 'wb') image_service_mock.return_value.download.assert_called_once_with( - 'image_href', 'file') + 'image_href', 'file', checksum=None, checksum_algo=None) image_to_raw_mock.assert_called_once_with( 'image_href', 'path', 'path.part') mock_zstd.assert_called_once_with('path') @@ -102,7 +102,7 @@ class IronicImagesTestCase(base.TestCase): mock_checksum.assert_called_once_with('path', algorithm='sha256') open_mock.assert_called_once_with('path', 'wb') image_service_mock.return_value.download.assert_called_once_with( - 'image_href', 'file') + 'image_href', 'file', checksum='f00', checksum_algo='sha256') image_to_raw_mock.assert_called_once_with( 'image_href', 'path', 'path.part') mock_zstd.assert_called_once_with('path') @@ -131,7 +131,7 @@ class IronicImagesTestCase(base.TestCase): mock_checksum.assert_called_once_with('path', algorithm='sha256') open_mock.assert_called_once_with('path', 'wb') image_service_mock.return_value.download.assert_called_once_with( - 'image_href', 'file') + 'image_href', 'file', checksum='f00', checksum_algo='sha256') # If the checksum fails, then we don't attempt to convert the image. image_to_raw_mock.assert_not_called() mock_zstd.assert_not_called() @@ -158,7 +158,7 @@ class IronicImagesTestCase(base.TestCase): mock_checksum.assert_called_once_with('path', algorithm='md5') open_mock.assert_called_once_with('path', 'wb') image_service_mock.return_value.download.assert_called_once_with( - 'image_href', 'file') + 'image_href', 'file', checksum='f00', checksum_algo=None) image_to_raw_mock.assert_called_once_with( 'image_href', 'path', 'path.part') mock_zstd.assert_called_once_with('path') @@ -185,7 +185,7 @@ class IronicImagesTestCase(base.TestCase): mock_checksum.assert_called_once_with('path', algorithm='sha512') open_mock.assert_called_once_with('path', 'wb') image_service_mock.return_value.download.assert_called_once_with( - 'image_href', 'file') + 'image_href', 'file', checksum='sha512:f00', checksum_algo=None) image_to_raw_mock.assert_called_once_with( 'image_href', 'path', 'path.part') mock_zstd.assert_called_once_with('path') @@ -215,7 +215,7 @@ class IronicImagesTestCase(base.TestCase): mock_checksum.assert_not_called() open_mock.assert_called_once_with('path', 'wb') svc_mock.return_value.download.assert_called_once_with( - 'image_href', 'file') + 'image_href', 'file', checksum='sha512:f00', checksum_algo=None) image_to_raw_mock.assert_called_once_with( 'image_href', 'path', 'path.part') svc_mock.return_value.set_image_auth.assert_called_once_with( diff --git a/ironic/tests/unit/stubs.py b/ironic/tests/unit/stubs.py index f7f97ebfd8..31b40af778 100644 --- a/ironic/tests/unit/stubs.py +++ b/ironic/tests/unit/stubs.py @@ -12,7 +12,6 @@ # License for the specific language governing permissions and limitations # under the License. -import io from openstack.connection import exceptions as openstack_exc from oslo_utils import uuidutils @@ -40,7 +39,7 @@ class StubGlanceClient(object): def download_image(self, image_id, stream=False): self.get_image(image_id) if stream: - return io.BytesIO(self.image_data) + return FakeStreamingResponse(self.image_data) else: return FakeImageDownload(self.image_data) @@ -56,6 +55,21 @@ class FakeImageDownload(object): self.content = content +class FakeStreamingResponse(object): + """Mimics a requests.Response object for streaming downloads.""" + + def __init__(self, content): + self._content = content + self._pos = 0 + + def iter_content(self, chunk_size=1): + """Yield chunks of content mimicking requests.Response.iter_content.""" + while self._pos < len(self._content): + chunk = self._content[self._pos:self._pos + chunk_size] + self._pos += chunk_size + yield chunk + + class FakeNeutronPort(dict): def __init__(self, **attrs): PORT_ATTRS = ['admin_state_up', diff --git a/releasenotes/notes/image-download-performance-0bf1af5556c1adbf.yaml b/releasenotes/notes/image-download-performance-0bf1af5556c1adbf.yaml new file mode 100644 index 0000000000..9a8a592b8e --- /dev/null +++ b/releasenotes/notes/image-download-performance-0bf1af5556c1adbf.yaml @@ -0,0 +1,9 @@ +fixes: + - | + Extremely large instance images, post eventlet removal, were causing CPU + spikes and the conductor process to hang while the image was being + fetched and validated. This optimizes the instance fetch and validation + in two ways -- first, we now calculate the image checksum while the + file is being fetched instead of as a separate step. Secondly, we were, + in some cases, using the default chunk size of 128 bytes during downloads. + Now, we use a more standard and reasonable value of 1 megabyte.