From 8d6333b46277f48fa41fb11026fd83b4bf2efe4c Mon Sep 17 00:00:00 2001 From: Max Lamprecht Date: Wed, 11 Feb 2026 10:37:37 +0100 Subject: [PATCH] feat: return size in get_image_data_iter Return the size with the data iter in get_image_data_iter(uri). We will either fetch it from the local filesize or from the http 'content-length' header. For example the s3 store driver needs to know the image size in order to decide between singlepart and multipart upload. Without the size the s3 driver will always attempt to use singlepart which can fail for larger objects. e.g. aws part size of 5GB Closes-Bug: #2141673 Change-Id: Ief14a61f6a1d9327acfd6ada61c2b5976d153555 Signed-off-by: Max Lamprecht --- .../flows/_internal_plugins/web_download.py | 25 ++++++------ glance/async_/flows/base_import.py | 4 +- .../common/scripts/api_image_import/main.py | 4 +- glance/common/scripts/image_import/main.py | 13 +++++-- glance/common/scripts/utils.py | 14 +++++-- .../tests/unit/async_/flows/test_convert.py | 10 +++-- glance/tests/unit/async_/flows/test_import.py | 23 ++++++----- .../unit/async_/flows/test_web_download.py | 38 +++++++++++++------ .../common/scripts/image_import/test_main.py | 22 +++++++++-- ...-get_image_data_iter-6ff6c742dd87b59b.yaml | 10 +++++ 10 files changed, 111 insertions(+), 52 deletions(-) create mode 100644 releasenotes/notes/return-size-get_image_data_iter-6ff6c742dd87b59b.yaml diff --git a/glance/async_/flows/_internal_plugins/web_download.py b/glance/async_/flows/_internal_plugins/web_download.py index 3e831dbbd9..17a52b3dc9 100644 --- a/glance/async_/flows/_internal_plugins/web_download.py +++ b/glance/async_/flows/_internal_plugins/web_download.py @@ -48,25 +48,24 @@ class _WebDownload(base_download.BaseDownload): # we recommend as the best solution. For more details on this, please # refer to the comment in the `_ImportToStore.execute` method. try: - data = script_utils.get_image_data_iter(self.uri) + data, size = script_utils.get_image_data_iter(self.uri) except Exception as e: with excutils.save_and_reraise_exception(): LOG.error("Task %(task_id)s failed with exception %(error)s", {"error": e, "task_id": self.task_id}) - self._path, bytes_written = self.store.add(self.image_id, data, 0)[0:2] - try: - content_length = int(data.headers['content-length']) - if bytes_written != content_length: - msg = (_("Task %(task_id)s failed because downloaded data " - "size %(data_size)i is different from expected %(" - "expected)i") % - {"task_id": self.task_id, "data_size": bytes_written, - "expected": content_length}) - raise exception.ImportTaskError(msg) - except (KeyError, ValueError): - pass + self._path, bytes_written = self.store.add(self.image_id, data, + size)[0:2] + + if bytes_written != size and size != 0: + msg = (_("Task %(task_id)s failed because downloaded data " + "size %(data_size)i is different from expected %(" + "expected)i") % {"task_id": self.task_id, + "data_size": bytes_written, + "expected": size}) + raise exception.ImportTaskError(msg) + return self._path diff --git a/glance/async_/flows/base_import.py b/glance/async_/flows/base_import.py index cfc2236b63..33cb95c9a6 100644 --- a/glance/async_/flows/base_import.py +++ b/glance/async_/flows/base_import.py @@ -153,9 +153,9 @@ class _ImportToFS(task.Task): # While using any path should be "technically" fine, it's not what # we recommend as the best solution. For more details on this, please # refer to the comment in the `_ImportToStore.execute` method. - data = script_utils.get_image_data_iter(self.uri) + data, size = script_utils.get_image_data_iter(self.uri) - path = self.store.add(image_id, data, 0, context=None)[0] + path = self.store.add(image_id, data, size, context=None)[0] try: # NOTE(flaper87): Consider moving this code to a common diff --git a/glance/common/scripts/api_image_import/main.py b/glance/common/scripts/api_image_import/main.py index e4db531510..d3807e7081 100644 --- a/glance/common/scripts/api_image_import/main.py +++ b/glance/common/scripts/api_image_import/main.py @@ -118,8 +118,8 @@ def set_image_data(image, uri, task_id, backend=None): try: LOG.info("Task %(task_id)s: Got image data uri %(data_uri)s to be " "imported", {"data_uri": uri, "task_id": task_id}) - data_iter = script_utils.get_image_data_iter(uri) - image.set_data(data_iter, backend=backend) + data_iter, size = script_utils.get_image_data_iter(uri) + image.set_data(data_iter, backend=backend, size=size) except Exception as e: with excutils.save_and_reraise_exception(): LOG.warning("Task %(task_id)s failed with exception %(error)s", diff --git a/glance/common/scripts/image_import/main.py b/glance/common/scripts/image_import/main.py index 0cd078590d..23e1871374 100644 --- a/glance/common/scripts/image_import/main.py +++ b/glance/common/scripts/image_import/main.py @@ -140,16 +140,23 @@ def set_image_data(image, uri, task_id, backend=None, set_active=True, try: LOG.info(_LI("Task %(task_id)s: Got image data uri %(data_uri)s to be " "imported"), {"data_uri": uri, "task_id": task_id}) - data_iter = script_utils.get_image_data_iter(uri) + data_iter, size = script_utils.get_image_data_iter(uri) + if callback: # If a callback was provided, wrap our data iterator to call # the function every 60 seconds. data_iter = script_utils.CallbackIterator( data_iter, callback, min_interval=60) - image_size = image.size if image.size is not None else 0 - image.set_data(data_iter, size=image_size, backend=backend, + if image.size is not None and image.size != size: + msg = _( + "Task %(task_id)s: Image size mismatch. Expected %(expected)d " + "but got %(actual)d" + ) % {"task_id": task_id, "expected": image.size, "actual": size} + raise exception.ImportTaskError(msg) + image.set_data(data_iter, size=size, backend=backend, set_active=set_active) + except Exception as e: with excutils.save_and_reraise_exception(): LOG.warning(_LW("Task %(task_id)s failed with exception " diff --git a/glance/common/scripts/utils.py b/glance/common/scripts/utils.py index f7c6a9f6a4..e7ff36e342 100644 --- a/glance/common/scripts/utils.py +++ b/glance/common/scripts/utils.py @@ -20,7 +20,7 @@ __all__ = [ 'validate_location_uri', 'get_image_data_iter', ] - +import os import urllib from oslo_log import log as logging @@ -133,6 +133,7 @@ def get_image_data_iter(uri): Validation/sanitization of the uri is expected to happen before we get here. """ + size = 0 # NOTE(flaper87): This is safe because the input uri is already # verified before the task is created. if uri.startswith("file://"): @@ -146,9 +147,16 @@ def get_image_data_iter(uri): # # We're not using StringIO or other tools to avoid reading everything # into memory. Some images may be quite heavy. - return open(uri, "rb") + size = os.path.getsize(uri) + return open(uri, "rb"), size - return urllib.request.urlopen(uri) + urlopen = urllib.request.urlopen(uri) + try: + size = int(urlopen.headers['content-length']) + except (KeyError, ValueError, TypeError): + pass + + return urlopen, size class CallbackIterator(object): diff --git a/glance/tests/unit/async_/flows/test_convert.py b/glance/tests/unit/async_/flows/test_convert.py index aa65ab7ae5..976dc60be8 100644 --- a/glance/tests/unit/async_/flows/test_convert.py +++ b/glance/tests/unit/async_/flows/test_convert.py @@ -155,7 +155,7 @@ class TestImportTask(test_utils.BaseTestCase): assert os.path.exists(args[3].split("file://")[-1]) return (json.dumps({ - "virtual-size": 10737418240, + "virtual-size": 10, "filename": "/tmp/image.qcow2", "cluster-size": 65536, "format": "raw", @@ -173,7 +173,11 @@ class TestImportTask(test_utils.BaseTestCase): return ("", None) with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + content = b"TEST_IMAGE" + + def new_data_iter(*args, **kwargs): + return io.BytesIO(content), len(content) + dmock.side_effect = new_data_iter with mock.patch.object(processutils, 'execute') as exc_mock: exc_mock.side_effect = fake_execute @@ -187,7 +191,7 @@ class TestImportTask(test_utils.BaseTestCase): # the tasks have been executed. self.assertEqual([], os.listdir(self.work_dir)) self.assertEqual('raw', image.disk_format) - self.assertEqual(10737418240, image.virtual_size) + self.assertEqual(10, image.virtual_size) # NOTE(hemanthm): Asserting that the source format is passed # to qemu-utis to avoid inferring the image format when diff --git a/glance/tests/unit/async_/flows/test_import.py b/glance/tests/unit/async_/flows/test_import.py index b49ef1867b..8a4a1fb6b9 100644 --- a/glance/tests/unit/async_/flows/test_import.py +++ b/glance/tests/unit/async_/flows/test_import.py @@ -17,7 +17,6 @@ import io import json import os from unittest import mock -import urllib import glance_store from oslo_concurrency import processutils as putils @@ -123,7 +122,11 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + content = b"TEST_IMAGE" + + def new_data_iter(*args, **kwargs): + return io.BytesIO(content), len(content) + dmock.side_effect = new_data_iter with mock.patch.object(putils, 'trycmd') as tmock: tmock.return_value = (json.dumps({ @@ -166,7 +169,7 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10) with mock.patch.object(import_flow._ImportToFS, 'execute') as emk: executor.begin_processing(self.task.task_id) @@ -200,7 +203,7 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10) with mock.patch.object(putils, 'trycmd') as tmock: out = json.dumps({'format-specific': @@ -270,7 +273,7 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10) with mock.patch.object(putils, 'trycmd') as tmock: tmock.return_value = (json.dumps({ @@ -320,7 +323,7 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10) with mock.patch.object(putils, 'trycmd') as tmock: tmock.return_value = (json.dumps({ @@ -371,9 +374,9 @@ class TestImportTask(test_utils.BaseTestCase): self.img_repo.get.return_value = self.image img_factory.new_image.side_effect = create_image - with mock.patch.object(urllib.request, 'urlopen') as umock: + with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: content = b"TEST_IMAGE" - umock.return_value = io.BytesIO(content) + dmock.return_value = (io.BytesIO(content), len(content)) with mock.patch.object(import_flow, "_get_import_flows") as imock: imock.return_value = (x for x in []) @@ -383,7 +386,7 @@ class TestImportTask(test_utils.BaseTestCase): "%s.tasks_import" % image_path) self.assertFalse(os.path.exists(tmp_image_path)) self.assertTrue(os.path.exists(image_path)) - self.assertEqual(1, umock.call_count) + self.assertEqual(1, dmock.call_count) with open(image_path, 'rb') as ifile: self.assertEqual(content, ifile.read()) @@ -430,7 +433,7 @@ class TestImportTask(test_utils.BaseTestCase): with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: content = b"test" - dmock.return_value = [content] + dmock.return_value = ([content], 4) with mock.patch.object(putils, 'trycmd') as tmock: tmock.return_value = (json.dumps({ diff --git a/glance/tests/unit/async_/flows/test_web_download.py b/glance/tests/unit/async_/flows/test_web_download.py index 997fcf90a7..6913d10e36 100644 --- a/glance/tests/unit/async_/flows/test_web_download.py +++ b/glance/tests/unit/async_/flows/test_web_download.py @@ -75,36 +75,44 @@ class TestWebDownloadTask(test_utils.BaseTestCase): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: mock_add.return_value = ["path", 4] - mock_iter.return_value.headers = {} + data_mock = mock.MagicMock() + data_mock.headers = {} + mock_iter.return_value = (data_mock, 0) self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 0) @mock.patch.object(filesystem.Store, 'add') def test_web_download_with_content_length(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: - mock_iter.return_value.headers = {'content-length': '4'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': '4'} + mock_iter.return_value = (data_mock, 4) mock_add.return_value = ["path", 4] self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 4) @mock.patch.object(filesystem.Store, 'add') def test_web_download_with_invalid_content_length(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: - mock_iter.return_value.headers = {'content-length': "not_valid"} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': "not_valid"} + mock_iter.return_value = (data_mock, 0) mock_add.return_value = ["path", 4] self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 0) @mock.patch.object(filesystem.Store, 'add') def test_web_download_fails_when_data_size_different(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: - mock_iter.return_value.headers = {'content-length': '4'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': '4'} + mock_iter.return_value = (data_mock, 4) mock_add.return_value = ["path", 3] self.assertRaises( glance.common.exception.ImportTaskError, @@ -122,26 +130,32 @@ class TestWebDownloadTask(test_utils.BaseTestCase): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: mock_add.return_value = ["path", 4] - mock_iter.return_value.headers = {'content-length': '4'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': '4'} + mock_iter.return_value = (data_mock, 4) self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 4) @mock.patch.object(filesystem.Store, 'add') def test_web_download_invalid_content_length(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: mock_add.return_value = ["path", 4] - mock_iter.return_value.headers = {'content-length': 'not_valid'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': 'not_valid'} + mock_iter.return_value = (data_mock, 0) self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 0) @mock.patch.object(filesystem.Store, 'add') def test_web_download_wrong_content_length(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: mock_add.return_value = ["path", 2] - mock_iter.return_value.headers = {'content-length': '4'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': '4'} + mock_iter.return_value = (data_mock, 4) self.assertRaises(glance.common.exception.ImportTaskError, self.web_download_task.execute) diff --git a/glance/tests/unit/common/scripts/image_import/test_main.py b/glance/tests/unit/common/scripts/image_import/test_main.py index 9ac71ae0cb..5a648e1881 100644 --- a/glance/tests/unit/common/scripts/image_import/test_main.py +++ b/glance/tests/unit/common/scripts/image_import/test_main.py @@ -88,7 +88,8 @@ class TestImageImport(test_utils.BaseTestCase): def test_set_image_data_http(self, mock_image_iter): uri = 'http://www.example.com' image = mock.Mock() - mock_image_iter.return_value = test_utils.FakeHTTPResponse() + image.size = None + mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 0) self.assertIsNone(image_import_script.set_image_data(image, uri, None)) @@ -127,10 +128,12 @@ class TestImageImport(test_utils.BaseTestCase): def test_set_image_data_with_callback(self, mock_gidi, mock_sw): data = [b'0' * 60, b'0' * 50, b'0' * 10, b'0' * 150] result_data = [] - mock_gidi.return_value = iter(data) + # Return the iterator and the total size in bytes of the data + mock_gidi.return_value = (iter(data), len(data)) mock_sw.return_value.expired.side_effect = [False, True, False, False] image = mock.MagicMock() + image.size = None callback = mock.MagicMock() def fake_set_data(data_iter, **kwargs): @@ -154,7 +157,7 @@ class TestImageImport(test_utils.BaseTestCase): """Verify set_data called with size=image.size when image.size set.""" uri = 'http://www.example.com' image = mock.Mock(size=1024) - mock_image_iter.return_value = test_utils.FakeHTTPResponse() + mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 1024) image_import_script.set_image_data(image, uri, None) @@ -170,7 +173,7 @@ class TestImageImport(test_utils.BaseTestCase): """Verify set_data is called with size=0 when image.size is None.""" uri = 'http://www.example.com' image = mock.Mock(size=None) - mock_image_iter.return_value = test_utils.FakeHTTPResponse() + mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 0) image_import_script.set_image_data(image, uri, None) @@ -179,3 +182,14 @@ class TestImageImport(test_utils.BaseTestCase): self.assertEqual(0, call_kwargs['size']) self.assertIn('backend', call_kwargs) self.assertIn('set_active', call_kwargs) + + @mock.patch.object(utils, 'get_image_data_iter') + def test_set_image_data_fails_when_image_size_mismatch( + self, mock_image_iter): + """Verify set_data fails during size mismatch""" + uri = 'http://www.example.com' + image = mock.Mock(size=1024) + mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 512) + + self.assertRaises(exception.ImportTaskError, + image_import_script.set_image_data, image, uri, None) diff --git a/releasenotes/notes/return-size-get_image_data_iter-6ff6c742dd87b59b.yaml b/releasenotes/notes/return-size-get_image_data_iter-6ff6c742dd87b59b.yaml new file mode 100644 index 0000000000..f3861964d4 --- /dev/null +++ b/releasenotes/notes/return-size-get_image_data_iter-6ff6c742dd87b59b.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Bug 2141673_: image import can fail for bigger images with s3 backend + + Glance now tries to detect the image size in advance so that some backends + can be more efficient when uploading image data. This is true for the S3 + backend which can use a multipart upload instead of a singlepart upload. + + .. _2141673: https://bugs.launchpad.net/glance/+bug/2141673