feat: return size in get_image_data_iter

Return the size with the data iter in get_image_data_iter(uri).
We will either fetch it from the local filesize or from the http
'content-length' header.

For example the s3 store driver needs to know the image size in order
to decide between singlepart and multipart upload.
Without the size the s3 driver will always attempt to use singlepart
which can fail for larger objects. e.g. aws part size of 5GB

Closes-Bug: #2141673
Change-Id: Ief14a61f6a1d9327acfd6ada61c2b5976d153555
Signed-off-by: Max Lamprecht <max.lamprecht@stackit.cloud>
This commit is contained in:
Max Lamprecht
2026-02-11 10:37:37 +01:00
committed by Cyril Roelandt
parent 9987754ba4
commit 8d6333b462
10 changed files with 111 additions and 52 deletions

View File

@@ -48,25 +48,24 @@ class _WebDownload(base_download.BaseDownload):
# we recommend as the best solution. For more details on this, please
# refer to the comment in the `_ImportToStore.execute` method.
try:
data = script_utils.get_image_data_iter(self.uri)
data, size = script_utils.get_image_data_iter(self.uri)
except Exception as e:
with excutils.save_and_reraise_exception():
LOG.error("Task %(task_id)s failed with exception %(error)s",
{"error": e,
"task_id": self.task_id})
self._path, bytes_written = self.store.add(self.image_id, data, 0)[0:2]
try:
content_length = int(data.headers['content-length'])
if bytes_written != content_length:
msg = (_("Task %(task_id)s failed because downloaded data "
"size %(data_size)i is different from expected %("
"expected)i") %
{"task_id": self.task_id, "data_size": bytes_written,
"expected": content_length})
raise exception.ImportTaskError(msg)
except (KeyError, ValueError):
pass
self._path, bytes_written = self.store.add(self.image_id, data,
size)[0:2]
if bytes_written != size and size != 0:
msg = (_("Task %(task_id)s failed because downloaded data "
"size %(data_size)i is different from expected %("
"expected)i") % {"task_id": self.task_id,
"data_size": bytes_written,
"expected": size})
raise exception.ImportTaskError(msg)
return self._path

View File

@@ -153,9 +153,9 @@ class _ImportToFS(task.Task):
# While using any path should be "technically" fine, it's not what
# we recommend as the best solution. For more details on this, please
# refer to the comment in the `_ImportToStore.execute` method.
data = script_utils.get_image_data_iter(self.uri)
data, size = script_utils.get_image_data_iter(self.uri)
path = self.store.add(image_id, data, 0, context=None)[0]
path = self.store.add(image_id, data, size, context=None)[0]
try:
# NOTE(flaper87): Consider moving this code to a common

View File

@@ -118,8 +118,8 @@ def set_image_data(image, uri, task_id, backend=None):
try:
LOG.info("Task %(task_id)s: Got image data uri %(data_uri)s to be "
"imported", {"data_uri": uri, "task_id": task_id})
data_iter = script_utils.get_image_data_iter(uri)
image.set_data(data_iter, backend=backend)
data_iter, size = script_utils.get_image_data_iter(uri)
image.set_data(data_iter, backend=backend, size=size)
except Exception as e:
with excutils.save_and_reraise_exception():
LOG.warning("Task %(task_id)s failed with exception %(error)s",

View File

@@ -140,16 +140,23 @@ def set_image_data(image, uri, task_id, backend=None, set_active=True,
try:
LOG.info(_LI("Task %(task_id)s: Got image data uri %(data_uri)s to be "
"imported"), {"data_uri": uri, "task_id": task_id})
data_iter = script_utils.get_image_data_iter(uri)
data_iter, size = script_utils.get_image_data_iter(uri)
if callback:
# If a callback was provided, wrap our data iterator to call
# the function every 60 seconds.
data_iter = script_utils.CallbackIterator(
data_iter, callback, min_interval=60)
image_size = image.size if image.size is not None else 0
image.set_data(data_iter, size=image_size, backend=backend,
if image.size is not None and image.size != size:
msg = _(
"Task %(task_id)s: Image size mismatch. Expected %(expected)d "
"but got %(actual)d"
) % {"task_id": task_id, "expected": image.size, "actual": size}
raise exception.ImportTaskError(msg)
image.set_data(data_iter, size=size, backend=backend,
set_active=set_active)
except Exception as e:
with excutils.save_and_reraise_exception():
LOG.warning(_LW("Task %(task_id)s failed with exception "

View File

@@ -20,7 +20,7 @@ __all__ = [
'validate_location_uri',
'get_image_data_iter',
]
import os
import urllib
from oslo_log import log as logging
@@ -133,6 +133,7 @@ def get_image_data_iter(uri):
Validation/sanitization of the uri is expected to happen before we get
here.
"""
size = 0
# NOTE(flaper87): This is safe because the input uri is already
# verified before the task is created.
if uri.startswith("file://"):
@@ -146,9 +147,16 @@ def get_image_data_iter(uri):
#
# We're not using StringIO or other tools to avoid reading everything
# into memory. Some images may be quite heavy.
return open(uri, "rb")
size = os.path.getsize(uri)
return open(uri, "rb"), size
return urllib.request.urlopen(uri)
urlopen = urllib.request.urlopen(uri)
try:
size = int(urlopen.headers['content-length'])
except (KeyError, ValueError, TypeError):
pass
return urlopen, size
class CallbackIterator(object):

View File

@@ -155,7 +155,7 @@ class TestImportTask(test_utils.BaseTestCase):
assert os.path.exists(args[3].split("file://")[-1])
return (json.dumps({
"virtual-size": 10737418240,
"virtual-size": 10,
"filename": "/tmp/image.qcow2",
"cluster-size": 65536,
"format": "raw",
@@ -173,7 +173,11 @@ class TestImportTask(test_utils.BaseTestCase):
return ("", None)
with mock.patch.object(script_utils, 'get_image_data_iter') as dmock:
dmock.return_value = io.BytesIO(b"TEST_IMAGE")
content = b"TEST_IMAGE"
def new_data_iter(*args, **kwargs):
return io.BytesIO(content), len(content)
dmock.side_effect = new_data_iter
with mock.patch.object(processutils, 'execute') as exc_mock:
exc_mock.side_effect = fake_execute
@@ -187,7 +191,7 @@ class TestImportTask(test_utils.BaseTestCase):
# the tasks have been executed.
self.assertEqual([], os.listdir(self.work_dir))
self.assertEqual('raw', image.disk_format)
self.assertEqual(10737418240, image.virtual_size)
self.assertEqual(10, image.virtual_size)
# NOTE(hemanthm): Asserting that the source format is passed
# to qemu-utis to avoid inferring the image format when

View File

@@ -17,7 +17,6 @@ import io
import json
import os
from unittest import mock
import urllib
import glance_store
from oslo_concurrency import processutils as putils
@@ -123,7 +122,11 @@ class TestImportTask(test_utils.BaseTestCase):
img_factory.new_image.side_effect = create_image
with mock.patch.object(script_utils, 'get_image_data_iter') as dmock:
dmock.return_value = io.BytesIO(b"TEST_IMAGE")
content = b"TEST_IMAGE"
def new_data_iter(*args, **kwargs):
return io.BytesIO(content), len(content)
dmock.side_effect = new_data_iter
with mock.patch.object(putils, 'trycmd') as tmock:
tmock.return_value = (json.dumps({
@@ -166,7 +169,7 @@ class TestImportTask(test_utils.BaseTestCase):
img_factory.new_image.side_effect = create_image
with mock.patch.object(script_utils, 'get_image_data_iter') as dmock:
dmock.return_value = io.BytesIO(b"TEST_IMAGE")
dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10)
with mock.patch.object(import_flow._ImportToFS, 'execute') as emk:
executor.begin_processing(self.task.task_id)
@@ -200,7 +203,7 @@ class TestImportTask(test_utils.BaseTestCase):
img_factory.new_image.side_effect = create_image
with mock.patch.object(script_utils, 'get_image_data_iter') as dmock:
dmock.return_value = io.BytesIO(b"TEST_IMAGE")
dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10)
with mock.patch.object(putils, 'trycmd') as tmock:
out = json.dumps({'format-specific':
@@ -270,7 +273,7 @@ class TestImportTask(test_utils.BaseTestCase):
img_factory.new_image.side_effect = create_image
with mock.patch.object(script_utils, 'get_image_data_iter') as dmock:
dmock.return_value = io.BytesIO(b"TEST_IMAGE")
dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10)
with mock.patch.object(putils, 'trycmd') as tmock:
tmock.return_value = (json.dumps({
@@ -320,7 +323,7 @@ class TestImportTask(test_utils.BaseTestCase):
img_factory.new_image.side_effect = create_image
with mock.patch.object(script_utils, 'get_image_data_iter') as dmock:
dmock.return_value = io.BytesIO(b"TEST_IMAGE")
dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10)
with mock.patch.object(putils, 'trycmd') as tmock:
tmock.return_value = (json.dumps({
@@ -371,9 +374,9 @@ class TestImportTask(test_utils.BaseTestCase):
self.img_repo.get.return_value = self.image
img_factory.new_image.side_effect = create_image
with mock.patch.object(urllib.request, 'urlopen') as umock:
with mock.patch.object(script_utils, 'get_image_data_iter') as dmock:
content = b"TEST_IMAGE"
umock.return_value = io.BytesIO(content)
dmock.return_value = (io.BytesIO(content), len(content))
with mock.patch.object(import_flow, "_get_import_flows") as imock:
imock.return_value = (x for x in [])
@@ -383,7 +386,7 @@ class TestImportTask(test_utils.BaseTestCase):
"%s.tasks_import" % image_path)
self.assertFalse(os.path.exists(tmp_image_path))
self.assertTrue(os.path.exists(image_path))
self.assertEqual(1, umock.call_count)
self.assertEqual(1, dmock.call_count)
with open(image_path, 'rb') as ifile:
self.assertEqual(content, ifile.read())
@@ -430,7 +433,7 @@ class TestImportTask(test_utils.BaseTestCase):
with mock.patch.object(script_utils, 'get_image_data_iter') as dmock:
content = b"test"
dmock.return_value = [content]
dmock.return_value = ([content], 4)
with mock.patch.object(putils, 'trycmd') as tmock:
tmock.return_value = (json.dumps({

View File

@@ -75,36 +75,44 @@ class TestWebDownloadTask(test_utils.BaseTestCase):
with mock.patch.object(script_utils,
'get_image_data_iter') as mock_iter:
mock_add.return_value = ["path", 4]
mock_iter.return_value.headers = {}
data_mock = mock.MagicMock()
data_mock.headers = {}
mock_iter.return_value = (data_mock, 0)
self.assertEqual(self.web_download_task.execute(), "path")
mock_add.assert_called_once_with(self.image_id,
mock_iter.return_value, 0)
data_mock, 0)
@mock.patch.object(filesystem.Store, 'add')
def test_web_download_with_content_length(self, mock_add):
with mock.patch.object(script_utils,
'get_image_data_iter') as mock_iter:
mock_iter.return_value.headers = {'content-length': '4'}
data_mock = mock.MagicMock()
data_mock.headers = {'content-length': '4'}
mock_iter.return_value = (data_mock, 4)
mock_add.return_value = ["path", 4]
self.assertEqual(self.web_download_task.execute(), "path")
mock_add.assert_called_once_with(self.image_id,
mock_iter.return_value, 0)
data_mock, 4)
@mock.patch.object(filesystem.Store, 'add')
def test_web_download_with_invalid_content_length(self, mock_add):
with mock.patch.object(script_utils,
'get_image_data_iter') as mock_iter:
mock_iter.return_value.headers = {'content-length': "not_valid"}
data_mock = mock.MagicMock()
data_mock.headers = {'content-length': "not_valid"}
mock_iter.return_value = (data_mock, 0)
mock_add.return_value = ["path", 4]
self.assertEqual(self.web_download_task.execute(), "path")
mock_add.assert_called_once_with(self.image_id,
mock_iter.return_value, 0)
data_mock, 0)
@mock.patch.object(filesystem.Store, 'add')
def test_web_download_fails_when_data_size_different(self, mock_add):
with mock.patch.object(script_utils,
'get_image_data_iter') as mock_iter:
mock_iter.return_value.headers = {'content-length': '4'}
data_mock = mock.MagicMock()
data_mock.headers = {'content-length': '4'}
mock_iter.return_value = (data_mock, 4)
mock_add.return_value = ["path", 3]
self.assertRaises(
glance.common.exception.ImportTaskError,
@@ -122,26 +130,32 @@ class TestWebDownloadTask(test_utils.BaseTestCase):
with mock.patch.object(script_utils,
'get_image_data_iter') as mock_iter:
mock_add.return_value = ["path", 4]
mock_iter.return_value.headers = {'content-length': '4'}
data_mock = mock.MagicMock()
data_mock.headers = {'content-length': '4'}
mock_iter.return_value = (data_mock, 4)
self.assertEqual(self.web_download_task.execute(), "path")
mock_add.assert_called_once_with(self.image_id,
mock_iter.return_value, 0)
data_mock, 4)
@mock.patch.object(filesystem.Store, 'add')
def test_web_download_invalid_content_length(self, mock_add):
with mock.patch.object(script_utils,
'get_image_data_iter') as mock_iter:
mock_add.return_value = ["path", 4]
mock_iter.return_value.headers = {'content-length': 'not_valid'}
data_mock = mock.MagicMock()
data_mock.headers = {'content-length': 'not_valid'}
mock_iter.return_value = (data_mock, 0)
self.assertEqual(self.web_download_task.execute(), "path")
mock_add.assert_called_once_with(self.image_id,
mock_iter.return_value, 0)
data_mock, 0)
@mock.patch.object(filesystem.Store, 'add')
def test_web_download_wrong_content_length(self, mock_add):
with mock.patch.object(script_utils,
'get_image_data_iter') as mock_iter:
mock_add.return_value = ["path", 2]
mock_iter.return_value.headers = {'content-length': '4'}
data_mock = mock.MagicMock()
data_mock.headers = {'content-length': '4'}
mock_iter.return_value = (data_mock, 4)
self.assertRaises(glance.common.exception.ImportTaskError,
self.web_download_task.execute)

View File

@@ -88,7 +88,8 @@ class TestImageImport(test_utils.BaseTestCase):
def test_set_image_data_http(self, mock_image_iter):
uri = 'http://www.example.com'
image = mock.Mock()
mock_image_iter.return_value = test_utils.FakeHTTPResponse()
image.size = None
mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 0)
self.assertIsNone(image_import_script.set_image_data(image,
uri,
None))
@@ -127,10 +128,12 @@ class TestImageImport(test_utils.BaseTestCase):
def test_set_image_data_with_callback(self, mock_gidi, mock_sw):
data = [b'0' * 60, b'0' * 50, b'0' * 10, b'0' * 150]
result_data = []
mock_gidi.return_value = iter(data)
# Return the iterator and the total size in bytes of the data
mock_gidi.return_value = (iter(data), len(data))
mock_sw.return_value.expired.side_effect = [False, True, False,
False]
image = mock.MagicMock()
image.size = None
callback = mock.MagicMock()
def fake_set_data(data_iter, **kwargs):
@@ -154,7 +157,7 @@ class TestImageImport(test_utils.BaseTestCase):
"""Verify set_data called with size=image.size when image.size set."""
uri = 'http://www.example.com'
image = mock.Mock(size=1024)
mock_image_iter.return_value = test_utils.FakeHTTPResponse()
mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 1024)
image_import_script.set_image_data(image, uri, None)
@@ -170,7 +173,7 @@ class TestImageImport(test_utils.BaseTestCase):
"""Verify set_data is called with size=0 when image.size is None."""
uri = 'http://www.example.com'
image = mock.Mock(size=None)
mock_image_iter.return_value = test_utils.FakeHTTPResponse()
mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 0)
image_import_script.set_image_data(image, uri, None)
@@ -179,3 +182,14 @@ class TestImageImport(test_utils.BaseTestCase):
self.assertEqual(0, call_kwargs['size'])
self.assertIn('backend', call_kwargs)
self.assertIn('set_active', call_kwargs)
@mock.patch.object(utils, 'get_image_data_iter')
def test_set_image_data_fails_when_image_size_mismatch(
self, mock_image_iter):
"""Verify set_data fails during size mismatch"""
uri = 'http://www.example.com'
image = mock.Mock(size=1024)
mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 512)
self.assertRaises(exception.ImportTaskError,
image_import_script.set_image_data, image, uri, None)

View File

@@ -0,0 +1,10 @@
---
fixes:
- |
Bug 2141673_: image import can fail for bigger images with s3 backend
Glance now tries to detect the image size in advance so that some backends
can be more efficient when uploading image data. This is true for the S3
backend which can use a multipart upload instead of a singlepart upload.
.. _2141673: https://bugs.launchpad.net/glance/+bug/2141673