From 48449171a73e9b470b3caa136f20d56cd1757bdc Mon Sep 17 00:00:00 2001 From: Ewan Mellor Date: Wed, 28 Dec 2011 21:13:55 -0800 Subject: [PATCH] Bug #909533: Swift uploads through Glance using ridiculously small chunks Remove the use of a temporary disk buffer when streaming to Swift. This was added on the assumption that it was not possible to stream chunks to Swift direct from webob.Request.body_file. That's not true -- a simple file-like object wrapping body_file does the job perfectly. This removes the need for swift_store_object_buffer_dir in the config file. It is also significantly cheaper, since there is one copy fewer. Fix the parsing of the swift_store_large_object_size and swift_store_large_object_chunk_size options. These are specified in MB in the config file, but needs to be in bytes internally, because they are compared against the image_size parameter given to add(). Update the unit tests to match. This includes an additional check that put_object is called the correct number of times. The unit tests missed the latter problem problem because there was no check that a small object is only uploaded in one chunk, and despite the comment to the contrary there was no check that the right number of chunks were written in the large-object case either. Added these in all places where store.add is called. Change-Id: Ieb4cf68516b53bd16d2671e49e805b26118b3671 --- doc/source/configuring.rst | 16 +---- etc/glance-api.conf | 5 -- glance/store/swift.py | 94 ++++++++++++++++----------- glance/tests/unit/test_swift_store.py | 89 ++++++++++++++++++++----- 4 files changed, 132 insertions(+), 72 deletions(-) diff --git a/doc/source/configuring.rst b/doc/source/configuring.rst index cf1f40784e..0c51e373fa 100644 --- a/doc/source/configuring.rst +++ b/doc/source/configuring.rst @@ -328,21 +328,7 @@ Can only be specified in configuration files. `This option is specific to the Swift storage backend.` When doing a large object manifest, what size, in MB, should -Glance write chunks to Swift? This amount of data is written -to a temporary disk buffer during the process of chunking -the image file, and the default is 200MB - -* ``swift_store_object_buffer_dir=PATH`` - -Optional. Default: ``the platform's default temporary directory`` - -Can only be specified in configuration files. - -`This option is specific to the Swift storage backend.` - -When sending large images to Swift, what directory should be -used to buffer the chunks? By default the platform's -temporary directory will be used. +Glance write chunks to Swift? The default is 200MB. Configuring the S3 Storage Backend ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/etc/glance-api.conf b/etc/glance-api.conf index ae396db9f9..391ca1f8b7 100644 --- a/etc/glance-api.conf +++ b/etc/glance-api.conf @@ -137,11 +137,6 @@ swift_store_large_object_chunk_size = 200 # Ex. https://example.com/v1.0/ -> https://snet-example.com/v1.0/ swift_enable_snet = False -# When sending large images to Swift, the chunks will be written to a -# temporary buffer on disk. By default the platform's temporary directory -# will be used. If required, an alternative directory can be specified here. -# swift_store_object_buffer_dir = /path/to/dir - # ============ S3 Store Options ============================= # Address where the S3 authentication service lives diff --git a/glance/store/swift.py b/glance/store/swift.py index 876e1ea1a4..2b51276758 100644 --- a/glance/store/swift.py +++ b/glance/store/swift.py @@ -40,6 +40,7 @@ except ImportError: DEFAULT_CONTAINER = 'glance' DEFAULT_LARGE_OBJECT_SIZE = 5 * 1024 # 5GB DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 200 # 200M +ONE_MB = 1000 * 1024 logger = logging.getLogger('glance.store.swift') @@ -196,7 +197,6 @@ class Store(glance.store.base.Store): default=DEFAULT_LARGE_OBJECT_SIZE), cfg.IntOpt('swift_store_large_object_chunk_size', default=DEFAULT_LARGE_OBJECT_CHUNK_SIZE), - cfg.StrOpt('swift_store_object_buffer_dir'), cfg.BoolOpt('swift_store_create_container_on_put', default=False), ] @@ -216,11 +216,13 @@ class Store(glance.store.base.Store): self.key = self._option_get('swift_store_key') self.container = self.conf.swift_store_container try: - self.large_object_size = self.conf.swift_store_large_object_size + # The config file has swift_store_large_object_*size in MB, but + # internally we store it in bytes, since the image_size parameter + # passed to add() is also in bytes. + self.large_object_size = \ + self.conf.swift_store_large_object_size * ONE_MB self.large_object_chunk_size = \ - self.conf.swift_store_large_object_chunk_size - self.swift_store_object_buffer_dir = \ - self.conf.swift_store_object_buffer_dir + self.conf.swift_store_large_object_chunk_size * ONE_MB except cfg.ConfigFileValueError, e: reason = _("Error in configuration conf: %s") % e logger.error(reason) @@ -347,12 +349,10 @@ class Store(glance.store.base.Store): # Image size is known, and is less than large_object_size. # Send to Swift with regular PUT. obj_etag = swift_conn.put_object(self.container, obj_name, - image_file) + image_file, + content_length=image_size) else: - # Write the image into Swift in chunks. We cannot - # stream chunks of the webob.Request.body_file, unfortunately, - # so we must write chunks of the body_file into a temporary - # disk buffer, and then pass this disk buffer to Swift. + # Write the image into Swift in chunks. chunk_id = 1 if image_size > 0: total_chunks = str(int( @@ -367,37 +367,40 @@ class Store(glance.store.base.Store): total_chunks = '?' checksum = hashlib.md5() - tmp = self.swift_store_object_buffer_dir combined_chunks_size = 0 while True: - with tempfile.NamedTemporaryFile(dir=tmp) as disk_buffer: - chunk = image_file.read(self.large_object_chunk_size) - if not chunk: + chunk_size = self.large_object_chunk_size + if image_size == 0: + content_length = None + else: + left = image_size - combined_chunks_size + if left == 0: break - chunk_size = len(chunk) - logger.debug(_("Writing %(chunk_size)d bytes for " - "chunk %(chunk_id)d/" - "%(total_chunks)s to disk buffer " - "for image %(image_id)s") - % locals()) - checksum.update(chunk) - disk_buffer.write(chunk) - disk_buffer.flush() - logger.debug(_("Writing chunk %(chunk_id)d/" - "%(total_chunks)s to Swift " - "for image %(image_id)s") - % locals()) - chunk_etag = swift_conn.put_object( - self.container, - "%s-%05d" % (obj_name, chunk_id), - open(disk_buffer.name, 'rb')) - logger.debug(_("Wrote chunk %(chunk_id)d/" - "%(total_chunks)s to Swift " - "returning MD5 of content: " - "%(chunk_etag)s") - % locals()) + if chunk_size > left: + chunk_size = left + content_length = chunk_size + + chunk_name = "%s-%05d" % (obj_name, chunk_id) + reader = ChunkReader(image_file, checksum, chunk_size) + chunk_etag = swift_conn.put_object( + self.container, chunk_name, reader, + content_length=content_length) + bytes_read = reader.bytes_read + logger.debug(_("Wrote chunk %(chunk_id)d/" + "%(total_chunks)s of length %(bytes_read)d " + "to Swift returning MD5 of content: " + "%(chunk_etag)s") + % locals()) + + if bytes_read == 0: + # Delete the last chunk, because it's of zero size. + # This will happen if image_size == 0. + logger.debug(_("Deleting final zero-length chunk")) + swift_conn.delete_object(self.container, chunk_name) + break + chunk_id += 1 - combined_chunks_size += chunk_size + combined_chunks_size += bytes_read # In the case we have been given an unknown image size, # set the image_size to the total size of the combined chunks. @@ -484,6 +487,23 @@ class Store(glance.store.base.Store): raise +class ChunkReader(object): + def __init__(self, fd, checksum, total): + self.fd = fd + self.checksum = checksum + self.total = total + self.bytes_read = 0 + + def read(self, i): + left = self.total - self.bytes_read + if i > left: + i = left + result = self.fd.read(i) + self.bytes_read += len(result) + self.checksum.update(result) + return result + + def create_container_if_missing(container, swift_conn, conf): """ Creates a missing container in Swift if the diff --git a/glance/tests/unit/test_swift_store.py b/glance/tests/unit/test_swift_store.py index 6144a2900b..905241f5df 100644 --- a/glance/tests/unit/test_swift_store.py +++ b/glance/tests/unit/test_swift_store.py @@ -20,6 +20,7 @@ import StringIO import hashlib import httplib +import tempfile import unittest import stubout @@ -39,6 +40,7 @@ Store = glance.store.swift.Store FIVE_KB = (5 * 1024) FIVE_GB = (5 * 1024 * 1024 * 1024) MAX_SWIFT_OBJECT_SIZE = FIVE_GB +SWIFT_PUT_OBJECT_CALLS = 0 SWIFT_CONF = {'verbose': True, 'debug': True, 'swift_store_user': 'user', @@ -71,6 +73,8 @@ def stub_out_swift_common_client(stubs): def fake_put_object(url, token, container, name, contents, **kwargs): # PUT returns the ETag header for the newly-added object # Large object manifest... + global SWIFT_PUT_OBJECT_CALLS + SWIFT_PUT_OBJECT_CALLS += 1 fixture_key = "%s/%s" % (container, name) if not fixture_key in fixture_headers.keys(): if kwargs.get('headers'): @@ -241,6 +245,9 @@ class TestStore(unittest.TestCase): '/glance/%s' % expected_image_id image_swift = StringIO.StringIO(expected_swift_contents) + global SWIFT_PUT_OBJECT_CALLS + SWIFT_PUT_OBJECT_CALLS = 0 + location, size, checksum = self.store.add(expected_image_id, image_swift, expected_swift_size) @@ -248,6 +255,8 @@ class TestStore(unittest.TestCase): self.assertEquals(expected_location, location) self.assertEquals(expected_swift_size, size) self.assertEquals(expected_checksum, checksum) + # Expecting a single object to be created on Swift i.e. no chunking. + self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 1) loc = get_location_from_uri(expected_location) (new_image_swift, new_image_size) = self.store.get(loc) @@ -294,6 +303,9 @@ class TestStore(unittest.TestCase): image_swift = StringIO.StringIO(expected_swift_contents) + global SWIFT_PUT_OBJECT_CALLS + SWIFT_PUT_OBJECT_CALLS = 0 + self.store = Store(test_utils.TestConfigOpts(new_conf)) location, size, checksum = self.store.add(image_id, image_swift, expected_swift_size) @@ -301,6 +313,7 @@ class TestStore(unittest.TestCase): self.assertEquals(expected_location, location) self.assertEquals(expected_swift_size, size) self.assertEquals(expected_checksum, checksum) + self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 1) loc = get_location_from_uri(expected_location) (new_image_swift, new_image_size) = self.store.get(loc) @@ -321,6 +334,9 @@ class TestStore(unittest.TestCase): image_swift = StringIO.StringIO("nevergonnamakeit") self.store = Store(test_utils.TestConfigOpts(conf)) + global SWIFT_PUT_OBJECT_CALLS + SWIFT_PUT_OBJECT_CALLS = 0 + # We check the exception text to ensure the container # missing text is found in it, otherwise, we would have # simply used self.assertRaises here @@ -332,6 +348,7 @@ class TestStore(unittest.TestCase): self.assertTrue("container noexist does not exist " "in Swift" in str(e)) self.assertTrue(exception_caught) + self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 0) def test_add_no_container_and_create(self): """ @@ -349,6 +366,9 @@ class TestStore(unittest.TestCase): '/noexist/%s' % expected_image_id image_swift = StringIO.StringIO(expected_swift_contents) + global SWIFT_PUT_OBJECT_CALLS + SWIFT_PUT_OBJECT_CALLS = 0 + self.store = Store(test_utils.TestConfigOpts(conf)) location, size, checksum = self.store.add(expected_image_id, image_swift, @@ -357,6 +377,7 @@ class TestStore(unittest.TestCase): self.assertEquals(expected_location, location) self.assertEquals(expected_swift_size, size) self.assertEquals(expected_checksum, checksum) + self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 1) loc = get_location_from_uri(expected_location) (new_image_swift, new_image_size) = self.store.get(loc) @@ -369,7 +390,7 @@ class TestStore(unittest.TestCase): def test_add_large_object(self): """ Tests that adding a very large image. We simulate the large - object by setting the DEFAULT_LARGE_OBJECT_SIZE to a small number + object by setting store.large_object_size to a small number and then verify that there have been a number of calls to put_object()... """ @@ -383,22 +404,28 @@ class TestStore(unittest.TestCase): '/glance/%s' % expected_image_id image_swift = StringIO.StringIO(expected_swift_contents) - orig_max_size = glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE - orig_temp_size = glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE + global SWIFT_PUT_OBJECT_CALLS + SWIFT_PUT_OBJECT_CALLS = 0 + + self.store = Store(test_utils.TestConfigOpts(conf)) + orig_max_size = self.store.large_object_size + orig_temp_size = self.store.large_object_chunk_size try: - glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE = 1024 - glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 1024 - self.store = Store(test_utils.TestConfigOpts(conf)) + self.store.large_object_size = 1024 + self.store.large_object_chunk_size = 1024 location, size, checksum = self.store.add(expected_image_id, image_swift, expected_swift_size) finally: - swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = orig_temp_size - swift.DEFAULT_LARGE_OBJECT_SIZE = orig_max_size + self.store.large_object_chunk_size = orig_temp_size + self.store.large_object_size = orig_max_size self.assertEquals(expected_location, location) self.assertEquals(expected_swift_size, size) self.assertEquals(expected_checksum, checksum) + # Expecting 6 objects to be created on Swift -- 5 chunks and 1 + # manifest. + self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 6) loc = get_location_from_uri(expected_location) (new_image_swift, new_image_size) = self.store.get(loc) @@ -431,27 +458,35 @@ class TestStore(unittest.TestCase): '/glance/%s' % expected_image_id image_swift = StringIO.StringIO(expected_swift_contents) + global SWIFT_PUT_OBJECT_CALLS + SWIFT_PUT_OBJECT_CALLS = 0 + # Temporarily set Swift MAX_SWIFT_OBJECT_SIZE to 1KB and add our image, # explicitly setting the image_length to 0 - orig_max_size = glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE - orig_temp_size = glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE + self.store = Store(test_utils.TestConfigOpts(conf)) + orig_max_size = self.store.large_object_size + orig_temp_size = self.store.large_object_chunk_size global MAX_SWIFT_OBJECT_SIZE orig_max_swift_object_size = MAX_SWIFT_OBJECT_SIZE try: MAX_SWIFT_OBJECT_SIZE = 1024 - glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE = 1024 - glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 1024 - self.store = Store(test_utils.TestConfigOpts(conf)) + self.store.large_object_size = 1024 + self.store.large_object_chunk_size = 1024 location, size, checksum = self.store.add(expected_image_id, image_swift, 0) finally: - swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = orig_temp_size - swift.DEFAULT_LARGE_OBJECT_SIZE = orig_max_size + self.store.large_object_chunk_size = orig_temp_size + self.store.large_object_size = orig_max_size MAX_SWIFT_OBJECT_SIZE = orig_max_swift_object_size self.assertEquals(expected_location, location) self.assertEquals(expected_swift_size, size) self.assertEquals(expected_checksum, checksum) + # Expecting 7 calls to put_object -- 5 chunks, a zero chunk which is + # then deleted, and the manifest. Note the difference with above + # where the image_size is specified in advance (there's no zero chunk + # in that case). + self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 7) loc = get_location_from_uri(expected_location) (new_image_swift, new_image_size) = self.store.get(loc) @@ -517,3 +552,27 @@ class TestStore(unittest.TestCase): """ loc = get_location_from_uri("swift://user:key@authurl/glance/noexist") self.assertRaises(exception.NotFound, self.store.delete, loc) + + +class TestChunkReader(unittest.TestCase): + + def test_read_all_data(self): + """ + Replicate what goes on in the Swift driver with the + repeated creation of the ChunkReader object + """ + CHUNKSIZE = 100 + checksum = hashlib.md5() + data_file = tempfile.NamedTemporaryFile() + data_file.write('*' * 1024) + data_file.flush() + infile = open(data_file.name, 'rb') + bytes_read = 0 + while True: + cr = glance.store.swift.ChunkReader(infile, checksum, CHUNKSIZE) + chunk = cr.read(CHUNKSIZE) + bytes_read += len(chunk) + if len(chunk) == 0: + break + self.assertEqual(1024, bytes_read) + data_file.close()