diff --git a/glance/async_/flows/_internal_plugins/web_download.py b/glance/async_/flows/_internal_plugins/web_download.py index 3e831dbbd9..17a52b3dc9 100644 --- a/glance/async_/flows/_internal_plugins/web_download.py +++ b/glance/async_/flows/_internal_plugins/web_download.py @@ -48,25 +48,24 @@ class _WebDownload(base_download.BaseDownload): # we recommend as the best solution. For more details on this, please # refer to the comment in the `_ImportToStore.execute` method. try: - data = script_utils.get_image_data_iter(self.uri) + data, size = script_utils.get_image_data_iter(self.uri) except Exception as e: with excutils.save_and_reraise_exception(): LOG.error("Task %(task_id)s failed with exception %(error)s", {"error": e, "task_id": self.task_id}) - self._path, bytes_written = self.store.add(self.image_id, data, 0)[0:2] - try: - content_length = int(data.headers['content-length']) - if bytes_written != content_length: - msg = (_("Task %(task_id)s failed because downloaded data " - "size %(data_size)i is different from expected %(" - "expected)i") % - {"task_id": self.task_id, "data_size": bytes_written, - "expected": content_length}) - raise exception.ImportTaskError(msg) - except (KeyError, ValueError): - pass + self._path, bytes_written = self.store.add(self.image_id, data, + size)[0:2] + + if bytes_written != size and size != 0: + msg = (_("Task %(task_id)s failed because downloaded data " + "size %(data_size)i is different from expected %(" + "expected)i") % {"task_id": self.task_id, + "data_size": bytes_written, + "expected": size}) + raise exception.ImportTaskError(msg) + return self._path diff --git a/glance/async_/flows/base_import.py b/glance/async_/flows/base_import.py index cfc2236b63..33cb95c9a6 100644 --- a/glance/async_/flows/base_import.py +++ b/glance/async_/flows/base_import.py @@ -153,9 +153,9 @@ class _ImportToFS(task.Task): # While using any path should be "technically" fine, it's not what # we recommend as the best solution. For more details on this, please # refer to the comment in the `_ImportToStore.execute` method. - data = script_utils.get_image_data_iter(self.uri) + data, size = script_utils.get_image_data_iter(self.uri) - path = self.store.add(image_id, data, 0, context=None)[0] + path = self.store.add(image_id, data, size, context=None)[0] try: # NOTE(flaper87): Consider moving this code to a common diff --git a/glance/common/scripts/api_image_import/main.py b/glance/common/scripts/api_image_import/main.py index e4db531510..d3807e7081 100644 --- a/glance/common/scripts/api_image_import/main.py +++ b/glance/common/scripts/api_image_import/main.py @@ -118,8 +118,8 @@ def set_image_data(image, uri, task_id, backend=None): try: LOG.info("Task %(task_id)s: Got image data uri %(data_uri)s to be " "imported", {"data_uri": uri, "task_id": task_id}) - data_iter = script_utils.get_image_data_iter(uri) - image.set_data(data_iter, backend=backend) + data_iter, size = script_utils.get_image_data_iter(uri) + image.set_data(data_iter, backend=backend, size=size) except Exception as e: with excutils.save_and_reraise_exception(): LOG.warning("Task %(task_id)s failed with exception %(error)s", diff --git a/glance/common/scripts/image_import/main.py b/glance/common/scripts/image_import/main.py index 0cd078590d..23e1871374 100644 --- a/glance/common/scripts/image_import/main.py +++ b/glance/common/scripts/image_import/main.py @@ -140,16 +140,23 @@ def set_image_data(image, uri, task_id, backend=None, set_active=True, try: LOG.info(_LI("Task %(task_id)s: Got image data uri %(data_uri)s to be " "imported"), {"data_uri": uri, "task_id": task_id}) - data_iter = script_utils.get_image_data_iter(uri) + data_iter, size = script_utils.get_image_data_iter(uri) + if callback: # If a callback was provided, wrap our data iterator to call # the function every 60 seconds. data_iter = script_utils.CallbackIterator( data_iter, callback, min_interval=60) - image_size = image.size if image.size is not None else 0 - image.set_data(data_iter, size=image_size, backend=backend, + if image.size is not None and image.size != size: + msg = _( + "Task %(task_id)s: Image size mismatch. Expected %(expected)d " + "but got %(actual)d" + ) % {"task_id": task_id, "expected": image.size, "actual": size} + raise exception.ImportTaskError(msg) + image.set_data(data_iter, size=size, backend=backend, set_active=set_active) + except Exception as e: with excutils.save_and_reraise_exception(): LOG.warning(_LW("Task %(task_id)s failed with exception " diff --git a/glance/common/scripts/utils.py b/glance/common/scripts/utils.py index f7c6a9f6a4..e7ff36e342 100644 --- a/glance/common/scripts/utils.py +++ b/glance/common/scripts/utils.py @@ -20,7 +20,7 @@ __all__ = [ 'validate_location_uri', 'get_image_data_iter', ] - +import os import urllib from oslo_log import log as logging @@ -133,6 +133,7 @@ def get_image_data_iter(uri): Validation/sanitization of the uri is expected to happen before we get here. """ + size = 0 # NOTE(flaper87): This is safe because the input uri is already # verified before the task is created. if uri.startswith("file://"): @@ -146,9 +147,16 @@ def get_image_data_iter(uri): # # We're not using StringIO or other tools to avoid reading everything # into memory. Some images may be quite heavy. - return open(uri, "rb") + size = os.path.getsize(uri) + return open(uri, "rb"), size - return urllib.request.urlopen(uri) + urlopen = urllib.request.urlopen(uri) + try: + size = int(urlopen.headers['content-length']) + except (KeyError, ValueError, TypeError): + pass + + return urlopen, size class CallbackIterator(object): diff --git a/glance/tests/unit/async_/flows/test_convert.py b/glance/tests/unit/async_/flows/test_convert.py index aa65ab7ae5..976dc60be8 100644 --- a/glance/tests/unit/async_/flows/test_convert.py +++ b/glance/tests/unit/async_/flows/test_convert.py @@ -155,7 +155,7 @@ class TestImportTask(test_utils.BaseTestCase): assert os.path.exists(args[3].split("file://")[-1]) return (json.dumps({ - "virtual-size": 10737418240, + "virtual-size": 10, "filename": "/tmp/image.qcow2", "cluster-size": 65536, "format": "raw", @@ -173,7 +173,11 @@ class TestImportTask(test_utils.BaseTestCase): return ("", None) with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + content = b"TEST_IMAGE" + + def new_data_iter(*args, **kwargs): + return io.BytesIO(content), len(content) + dmock.side_effect = new_data_iter with mock.patch.object(processutils, 'execute') as exc_mock: exc_mock.side_effect = fake_execute @@ -187,7 +191,7 @@ class TestImportTask(test_utils.BaseTestCase): # the tasks have been executed. self.assertEqual([], os.listdir(self.work_dir)) self.assertEqual('raw', image.disk_format) - self.assertEqual(10737418240, image.virtual_size) + self.assertEqual(10, image.virtual_size) # NOTE(hemanthm): Asserting that the source format is passed # to qemu-utis to avoid inferring the image format when diff --git a/glance/tests/unit/async_/flows/test_import.py b/glance/tests/unit/async_/flows/test_import.py index b49ef1867b..8a4a1fb6b9 100644 --- a/glance/tests/unit/async_/flows/test_import.py +++ b/glance/tests/unit/async_/flows/test_import.py @@ -17,7 +17,6 @@ import io import json import os from unittest import mock -import urllib import glance_store from oslo_concurrency import processutils as putils @@ -123,7 +122,11 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + content = b"TEST_IMAGE" + + def new_data_iter(*args, **kwargs): + return io.BytesIO(content), len(content) + dmock.side_effect = new_data_iter with mock.patch.object(putils, 'trycmd') as tmock: tmock.return_value = (json.dumps({ @@ -166,7 +169,7 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10) with mock.patch.object(import_flow._ImportToFS, 'execute') as emk: executor.begin_processing(self.task.task_id) @@ -200,7 +203,7 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10) with mock.patch.object(putils, 'trycmd') as tmock: out = json.dumps({'format-specific': @@ -270,7 +273,7 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10) with mock.patch.object(putils, 'trycmd') as tmock: tmock.return_value = (json.dumps({ @@ -320,7 +323,7 @@ class TestImportTask(test_utils.BaseTestCase): img_factory.new_image.side_effect = create_image with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: - dmock.return_value = io.BytesIO(b"TEST_IMAGE") + dmock.return_value = (io.BytesIO(b"TEST_IMAGE"), 10) with mock.patch.object(putils, 'trycmd') as tmock: tmock.return_value = (json.dumps({ @@ -371,9 +374,9 @@ class TestImportTask(test_utils.BaseTestCase): self.img_repo.get.return_value = self.image img_factory.new_image.side_effect = create_image - with mock.patch.object(urllib.request, 'urlopen') as umock: + with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: content = b"TEST_IMAGE" - umock.return_value = io.BytesIO(content) + dmock.return_value = (io.BytesIO(content), len(content)) with mock.patch.object(import_flow, "_get_import_flows") as imock: imock.return_value = (x for x in []) @@ -383,7 +386,7 @@ class TestImportTask(test_utils.BaseTestCase): "%s.tasks_import" % image_path) self.assertFalse(os.path.exists(tmp_image_path)) self.assertTrue(os.path.exists(image_path)) - self.assertEqual(1, umock.call_count) + self.assertEqual(1, dmock.call_count) with open(image_path, 'rb') as ifile: self.assertEqual(content, ifile.read()) @@ -430,7 +433,7 @@ class TestImportTask(test_utils.BaseTestCase): with mock.patch.object(script_utils, 'get_image_data_iter') as dmock: content = b"test" - dmock.return_value = [content] + dmock.return_value = ([content], 4) with mock.patch.object(putils, 'trycmd') as tmock: tmock.return_value = (json.dumps({ diff --git a/glance/tests/unit/async_/flows/test_web_download.py b/glance/tests/unit/async_/flows/test_web_download.py index 997fcf90a7..6913d10e36 100644 --- a/glance/tests/unit/async_/flows/test_web_download.py +++ b/glance/tests/unit/async_/flows/test_web_download.py @@ -75,36 +75,44 @@ class TestWebDownloadTask(test_utils.BaseTestCase): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: mock_add.return_value = ["path", 4] - mock_iter.return_value.headers = {} + data_mock = mock.MagicMock() + data_mock.headers = {} + mock_iter.return_value = (data_mock, 0) self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 0) @mock.patch.object(filesystem.Store, 'add') def test_web_download_with_content_length(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: - mock_iter.return_value.headers = {'content-length': '4'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': '4'} + mock_iter.return_value = (data_mock, 4) mock_add.return_value = ["path", 4] self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 4) @mock.patch.object(filesystem.Store, 'add') def test_web_download_with_invalid_content_length(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: - mock_iter.return_value.headers = {'content-length': "not_valid"} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': "not_valid"} + mock_iter.return_value = (data_mock, 0) mock_add.return_value = ["path", 4] self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 0) @mock.patch.object(filesystem.Store, 'add') def test_web_download_fails_when_data_size_different(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: - mock_iter.return_value.headers = {'content-length': '4'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': '4'} + mock_iter.return_value = (data_mock, 4) mock_add.return_value = ["path", 3] self.assertRaises( glance.common.exception.ImportTaskError, @@ -122,26 +130,32 @@ class TestWebDownloadTask(test_utils.BaseTestCase): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: mock_add.return_value = ["path", 4] - mock_iter.return_value.headers = {'content-length': '4'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': '4'} + mock_iter.return_value = (data_mock, 4) self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 4) @mock.patch.object(filesystem.Store, 'add') def test_web_download_invalid_content_length(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: mock_add.return_value = ["path", 4] - mock_iter.return_value.headers = {'content-length': 'not_valid'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': 'not_valid'} + mock_iter.return_value = (data_mock, 0) self.assertEqual(self.web_download_task.execute(), "path") mock_add.assert_called_once_with(self.image_id, - mock_iter.return_value, 0) + data_mock, 0) @mock.patch.object(filesystem.Store, 'add') def test_web_download_wrong_content_length(self, mock_add): with mock.patch.object(script_utils, 'get_image_data_iter') as mock_iter: mock_add.return_value = ["path", 2] - mock_iter.return_value.headers = {'content-length': '4'} + data_mock = mock.MagicMock() + data_mock.headers = {'content-length': '4'} + mock_iter.return_value = (data_mock, 4) self.assertRaises(glance.common.exception.ImportTaskError, self.web_download_task.execute) diff --git a/glance/tests/unit/common/scripts/image_import/test_main.py b/glance/tests/unit/common/scripts/image_import/test_main.py index 9ac71ae0cb..5a648e1881 100644 --- a/glance/tests/unit/common/scripts/image_import/test_main.py +++ b/glance/tests/unit/common/scripts/image_import/test_main.py @@ -88,7 +88,8 @@ class TestImageImport(test_utils.BaseTestCase): def test_set_image_data_http(self, mock_image_iter): uri = 'http://www.example.com' image = mock.Mock() - mock_image_iter.return_value = test_utils.FakeHTTPResponse() + image.size = None + mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 0) self.assertIsNone(image_import_script.set_image_data(image, uri, None)) @@ -127,10 +128,12 @@ class TestImageImport(test_utils.BaseTestCase): def test_set_image_data_with_callback(self, mock_gidi, mock_sw): data = [b'0' * 60, b'0' * 50, b'0' * 10, b'0' * 150] result_data = [] - mock_gidi.return_value = iter(data) + # Return the iterator and the total size in bytes of the data + mock_gidi.return_value = (iter(data), len(data)) mock_sw.return_value.expired.side_effect = [False, True, False, False] image = mock.MagicMock() + image.size = None callback = mock.MagicMock() def fake_set_data(data_iter, **kwargs): @@ -154,7 +157,7 @@ class TestImageImport(test_utils.BaseTestCase): """Verify set_data called with size=image.size when image.size set.""" uri = 'http://www.example.com' image = mock.Mock(size=1024) - mock_image_iter.return_value = test_utils.FakeHTTPResponse() + mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 1024) image_import_script.set_image_data(image, uri, None) @@ -170,7 +173,7 @@ class TestImageImport(test_utils.BaseTestCase): """Verify set_data is called with size=0 when image.size is None.""" uri = 'http://www.example.com' image = mock.Mock(size=None) - mock_image_iter.return_value = test_utils.FakeHTTPResponse() + mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 0) image_import_script.set_image_data(image, uri, None) @@ -179,3 +182,14 @@ class TestImageImport(test_utils.BaseTestCase): self.assertEqual(0, call_kwargs['size']) self.assertIn('backend', call_kwargs) self.assertIn('set_active', call_kwargs) + + @mock.patch.object(utils, 'get_image_data_iter') + def test_set_image_data_fails_when_image_size_mismatch( + self, mock_image_iter): + """Verify set_data fails during size mismatch""" + uri = 'http://www.example.com' + image = mock.Mock(size=1024) + mock_image_iter.return_value = (test_utils.FakeHTTPResponse(), 512) + + self.assertRaises(exception.ImportTaskError, + image_import_script.set_image_data, image, uri, None) diff --git a/releasenotes/notes/return-size-get_image_data_iter-6ff6c742dd87b59b.yaml b/releasenotes/notes/return-size-get_image_data_iter-6ff6c742dd87b59b.yaml new file mode 100644 index 0000000000..f3861964d4 --- /dev/null +++ b/releasenotes/notes/return-size-get_image_data_iter-6ff6c742dd87b59b.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Bug 2141673_: image import can fail for bigger images with s3 backend + + Glance now tries to detect the image size in advance so that some backends + can be more efficient when uploading image data. This is true for the S3 + backend which can use a multipart upload instead of a singlepart upload. + + .. _2141673: https://bugs.launchpad.net/glance/+bug/2141673