Merge "Bug #909533: Swift uploads through Glance using ridiculously small chunks"

This commit is contained in:
Jenkins 2012-01-09 22:26:08 +00:00 committed by Gerrit Code Review
commit e4bd841b6d
4 changed files with 132 additions and 72 deletions

View File

@ -328,21 +328,7 @@ Can only be specified in configuration files.
`This option is specific to the Swift storage backend.` `This option is specific to the Swift storage backend.`
When doing a large object manifest, what size, in MB, should When doing a large object manifest, what size, in MB, should
Glance write chunks to Swift? This amount of data is written Glance write chunks to Swift? The default is 200MB.
to a temporary disk buffer during the process of chunking
the image file, and the default is 200MB
* ``swift_store_object_buffer_dir=PATH``
Optional. Default: ``the platform's default temporary directory``
Can only be specified in configuration files.
`This option is specific to the Swift storage backend.`
When sending large images to Swift, what directory should be
used to buffer the chunks? By default the platform's
temporary directory will be used.
Configuring the S3 Storage Backend Configuring the S3 Storage Backend
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -137,11 +137,6 @@ swift_store_large_object_chunk_size = 200
# Ex. https://example.com/v1.0/ -> https://snet-example.com/v1.0/ # Ex. https://example.com/v1.0/ -> https://snet-example.com/v1.0/
swift_enable_snet = False swift_enable_snet = False
# When sending large images to Swift, the chunks will be written to a
# temporary buffer on disk. By default the platform's temporary directory
# will be used. If required, an alternative directory can be specified here.
# swift_store_object_buffer_dir = /path/to/dir
# ============ S3 Store Options ============================= # ============ S3 Store Options =============================
# Address where the S3 authentication service lives # Address where the S3 authentication service lives

View File

@ -40,6 +40,7 @@ except ImportError:
DEFAULT_CONTAINER = 'glance' DEFAULT_CONTAINER = 'glance'
DEFAULT_LARGE_OBJECT_SIZE = 5 * 1024 # 5GB DEFAULT_LARGE_OBJECT_SIZE = 5 * 1024 # 5GB
DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 200 # 200M DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 200 # 200M
ONE_MB = 1000 * 1024
logger = logging.getLogger('glance.store.swift') logger = logging.getLogger('glance.store.swift')
@ -196,7 +197,6 @@ class Store(glance.store.base.Store):
default=DEFAULT_LARGE_OBJECT_SIZE), default=DEFAULT_LARGE_OBJECT_SIZE),
cfg.IntOpt('swift_store_large_object_chunk_size', cfg.IntOpt('swift_store_large_object_chunk_size',
default=DEFAULT_LARGE_OBJECT_CHUNK_SIZE), default=DEFAULT_LARGE_OBJECT_CHUNK_SIZE),
cfg.StrOpt('swift_store_object_buffer_dir'),
cfg.BoolOpt('swift_store_create_container_on_put', default=False), cfg.BoolOpt('swift_store_create_container_on_put', default=False),
] ]
@ -216,11 +216,13 @@ class Store(glance.store.base.Store):
self.key = self._option_get('swift_store_key') self.key = self._option_get('swift_store_key')
self.container = self.conf.swift_store_container self.container = self.conf.swift_store_container
try: try:
self.large_object_size = self.conf.swift_store_large_object_size # The config file has swift_store_large_object_*size in MB, but
# internally we store it in bytes, since the image_size parameter
# passed to add() is also in bytes.
self.large_object_size = \
self.conf.swift_store_large_object_size * ONE_MB
self.large_object_chunk_size = \ self.large_object_chunk_size = \
self.conf.swift_store_large_object_chunk_size self.conf.swift_store_large_object_chunk_size * ONE_MB
self.swift_store_object_buffer_dir = \
self.conf.swift_store_object_buffer_dir
except cfg.ConfigFileValueError, e: except cfg.ConfigFileValueError, e:
reason = _("Error in configuration conf: %s") % e reason = _("Error in configuration conf: %s") % e
logger.error(reason) logger.error(reason)
@ -347,12 +349,10 @@ class Store(glance.store.base.Store):
# Image size is known, and is less than large_object_size. # Image size is known, and is less than large_object_size.
# Send to Swift with regular PUT. # Send to Swift with regular PUT.
obj_etag = swift_conn.put_object(self.container, obj_name, obj_etag = swift_conn.put_object(self.container, obj_name,
image_file) image_file,
content_length=image_size)
else: else:
# Write the image into Swift in chunks. We cannot # Write the image into Swift in chunks.
# stream chunks of the webob.Request.body_file, unfortunately,
# so we must write chunks of the body_file into a temporary
# disk buffer, and then pass this disk buffer to Swift.
chunk_id = 1 chunk_id = 1
if image_size > 0: if image_size > 0:
total_chunks = str(int( total_chunks = str(int(
@ -367,37 +367,40 @@ class Store(glance.store.base.Store):
total_chunks = '?' total_chunks = '?'
checksum = hashlib.md5() checksum = hashlib.md5()
tmp = self.swift_store_object_buffer_dir
combined_chunks_size = 0 combined_chunks_size = 0
while True: while True:
with tempfile.NamedTemporaryFile(dir=tmp) as disk_buffer: chunk_size = self.large_object_chunk_size
chunk = image_file.read(self.large_object_chunk_size) if image_size == 0:
if not chunk: content_length = None
else:
left = image_size - combined_chunks_size
if left == 0:
break break
chunk_size = len(chunk) if chunk_size > left:
logger.debug(_("Writing %(chunk_size)d bytes for " chunk_size = left
"chunk %(chunk_id)d/" content_length = chunk_size
"%(total_chunks)s to disk buffer "
"for image %(image_id)s") chunk_name = "%s-%05d" % (obj_name, chunk_id)
% locals()) reader = ChunkReader(image_file, checksum, chunk_size)
checksum.update(chunk) chunk_etag = swift_conn.put_object(
disk_buffer.write(chunk) self.container, chunk_name, reader,
disk_buffer.flush() content_length=content_length)
logger.debug(_("Writing chunk %(chunk_id)d/" bytes_read = reader.bytes_read
"%(total_chunks)s to Swift " logger.debug(_("Wrote chunk %(chunk_id)d/"
"for image %(image_id)s") "%(total_chunks)s of length %(bytes_read)d "
% locals()) "to Swift returning MD5 of content: "
chunk_etag = swift_conn.put_object( "%(chunk_etag)s")
self.container, % locals())
"%s-%05d" % (obj_name, chunk_id),
open(disk_buffer.name, 'rb')) if bytes_read == 0:
logger.debug(_("Wrote chunk %(chunk_id)d/" # Delete the last chunk, because it's of zero size.
"%(total_chunks)s to Swift " # This will happen if image_size == 0.
"returning MD5 of content: " logger.debug(_("Deleting final zero-length chunk"))
"%(chunk_etag)s") swift_conn.delete_object(self.container, chunk_name)
% locals()) break
chunk_id += 1 chunk_id += 1
combined_chunks_size += chunk_size combined_chunks_size += bytes_read
# In the case we have been given an unknown image size, # In the case we have been given an unknown image size,
# set the image_size to the total size of the combined chunks. # set the image_size to the total size of the combined chunks.
@ -484,6 +487,23 @@ class Store(glance.store.base.Store):
raise raise
class ChunkReader(object):
def __init__(self, fd, checksum, total):
self.fd = fd
self.checksum = checksum
self.total = total
self.bytes_read = 0
def read(self, i):
left = self.total - self.bytes_read
if i > left:
i = left
result = self.fd.read(i)
self.bytes_read += len(result)
self.checksum.update(result)
return result
def create_container_if_missing(container, swift_conn, conf): def create_container_if_missing(container, swift_conn, conf):
""" """
Creates a missing container in Swift if the Creates a missing container in Swift if the

View File

@ -20,6 +20,7 @@
import StringIO import StringIO
import hashlib import hashlib
import httplib import httplib
import tempfile
import unittest import unittest
import stubout import stubout
@ -39,6 +40,7 @@ Store = glance.store.swift.Store
FIVE_KB = (5 * 1024) FIVE_KB = (5 * 1024)
FIVE_GB = (5 * 1024 * 1024 * 1024) FIVE_GB = (5 * 1024 * 1024 * 1024)
MAX_SWIFT_OBJECT_SIZE = FIVE_GB MAX_SWIFT_OBJECT_SIZE = FIVE_GB
SWIFT_PUT_OBJECT_CALLS = 0
SWIFT_CONF = {'verbose': True, SWIFT_CONF = {'verbose': True,
'debug': True, 'debug': True,
'swift_store_user': 'user', 'swift_store_user': 'user',
@ -71,6 +73,8 @@ def stub_out_swift_common_client(stubs):
def fake_put_object(url, token, container, name, contents, **kwargs): def fake_put_object(url, token, container, name, contents, **kwargs):
# PUT returns the ETag header for the newly-added object # PUT returns the ETag header for the newly-added object
# Large object manifest... # Large object manifest...
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS += 1
fixture_key = "%s/%s" % (container, name) fixture_key = "%s/%s" % (container, name)
if not fixture_key in fixture_headers.keys(): if not fixture_key in fixture_headers.keys():
if kwargs.get('headers'): if kwargs.get('headers'):
@ -241,6 +245,9 @@ class TestStore(unittest.TestCase):
'/glance/%s' % expected_image_id '/glance/%s' % expected_image_id
image_swift = StringIO.StringIO(expected_swift_contents) image_swift = StringIO.StringIO(expected_swift_contents)
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
location, size, checksum = self.store.add(expected_image_id, location, size, checksum = self.store.add(expected_image_id,
image_swift, image_swift,
expected_swift_size) expected_swift_size)
@ -248,6 +255,8 @@ class TestStore(unittest.TestCase):
self.assertEquals(expected_location, location) self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size) self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum) self.assertEquals(expected_checksum, checksum)
# Expecting a single object to be created on Swift i.e. no chunking.
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 1)
loc = get_location_from_uri(expected_location) loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc) (new_image_swift, new_image_size) = self.store.get(loc)
@ -294,6 +303,9 @@ class TestStore(unittest.TestCase):
image_swift = StringIO.StringIO(expected_swift_contents) image_swift = StringIO.StringIO(expected_swift_contents)
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
self.store = Store(test_utils.TestConfigOpts(new_conf)) self.store = Store(test_utils.TestConfigOpts(new_conf))
location, size, checksum = self.store.add(image_id, image_swift, location, size, checksum = self.store.add(image_id, image_swift,
expected_swift_size) expected_swift_size)
@ -301,6 +313,7 @@ class TestStore(unittest.TestCase):
self.assertEquals(expected_location, location) self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size) self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum) self.assertEquals(expected_checksum, checksum)
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 1)
loc = get_location_from_uri(expected_location) loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc) (new_image_swift, new_image_size) = self.store.get(loc)
@ -321,6 +334,9 @@ class TestStore(unittest.TestCase):
image_swift = StringIO.StringIO("nevergonnamakeit") image_swift = StringIO.StringIO("nevergonnamakeit")
self.store = Store(test_utils.TestConfigOpts(conf)) self.store = Store(test_utils.TestConfigOpts(conf))
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
# We check the exception text to ensure the container # We check the exception text to ensure the container
# missing text is found in it, otherwise, we would have # missing text is found in it, otherwise, we would have
# simply used self.assertRaises here # simply used self.assertRaises here
@ -332,6 +348,7 @@ class TestStore(unittest.TestCase):
self.assertTrue("container noexist does not exist " self.assertTrue("container noexist does not exist "
"in Swift" in str(e)) "in Swift" in str(e))
self.assertTrue(exception_caught) self.assertTrue(exception_caught)
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 0)
def test_add_no_container_and_create(self): def test_add_no_container_and_create(self):
""" """
@ -349,6 +366,9 @@ class TestStore(unittest.TestCase):
'/noexist/%s' % expected_image_id '/noexist/%s' % expected_image_id
image_swift = StringIO.StringIO(expected_swift_contents) image_swift = StringIO.StringIO(expected_swift_contents)
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
self.store = Store(test_utils.TestConfigOpts(conf)) self.store = Store(test_utils.TestConfigOpts(conf))
location, size, checksum = self.store.add(expected_image_id, location, size, checksum = self.store.add(expected_image_id,
image_swift, image_swift,
@ -357,6 +377,7 @@ class TestStore(unittest.TestCase):
self.assertEquals(expected_location, location) self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size) self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum) self.assertEquals(expected_checksum, checksum)
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 1)
loc = get_location_from_uri(expected_location) loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc) (new_image_swift, new_image_size) = self.store.get(loc)
@ -369,7 +390,7 @@ class TestStore(unittest.TestCase):
def test_add_large_object(self): def test_add_large_object(self):
""" """
Tests that adding a very large image. We simulate the large Tests that adding a very large image. We simulate the large
object by setting the DEFAULT_LARGE_OBJECT_SIZE to a small number object by setting store.large_object_size to a small number
and then verify that there have been a number of calls to and then verify that there have been a number of calls to
put_object()... put_object()...
""" """
@ -383,22 +404,28 @@ class TestStore(unittest.TestCase):
'/glance/%s' % expected_image_id '/glance/%s' % expected_image_id
image_swift = StringIO.StringIO(expected_swift_contents) image_swift = StringIO.StringIO(expected_swift_contents)
orig_max_size = glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE global SWIFT_PUT_OBJECT_CALLS
orig_temp_size = glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE SWIFT_PUT_OBJECT_CALLS = 0
self.store = Store(test_utils.TestConfigOpts(conf))
orig_max_size = self.store.large_object_size
orig_temp_size = self.store.large_object_chunk_size
try: try:
glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE = 1024 self.store.large_object_size = 1024
glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 1024 self.store.large_object_chunk_size = 1024
self.store = Store(test_utils.TestConfigOpts(conf))
location, size, checksum = self.store.add(expected_image_id, location, size, checksum = self.store.add(expected_image_id,
image_swift, image_swift,
expected_swift_size) expected_swift_size)
finally: finally:
swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = orig_temp_size self.store.large_object_chunk_size = orig_temp_size
swift.DEFAULT_LARGE_OBJECT_SIZE = orig_max_size self.store.large_object_size = orig_max_size
self.assertEquals(expected_location, location) self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size) self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum) self.assertEquals(expected_checksum, checksum)
# Expecting 6 objects to be created on Swift -- 5 chunks and 1
# manifest.
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 6)
loc = get_location_from_uri(expected_location) loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc) (new_image_swift, new_image_size) = self.store.get(loc)
@ -431,27 +458,35 @@ class TestStore(unittest.TestCase):
'/glance/%s' % expected_image_id '/glance/%s' % expected_image_id
image_swift = StringIO.StringIO(expected_swift_contents) image_swift = StringIO.StringIO(expected_swift_contents)
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
# Temporarily set Swift MAX_SWIFT_OBJECT_SIZE to 1KB and add our image, # Temporarily set Swift MAX_SWIFT_OBJECT_SIZE to 1KB and add our image,
# explicitly setting the image_length to 0 # explicitly setting the image_length to 0
orig_max_size = glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE self.store = Store(test_utils.TestConfigOpts(conf))
orig_temp_size = glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE orig_max_size = self.store.large_object_size
orig_temp_size = self.store.large_object_chunk_size
global MAX_SWIFT_OBJECT_SIZE global MAX_SWIFT_OBJECT_SIZE
orig_max_swift_object_size = MAX_SWIFT_OBJECT_SIZE orig_max_swift_object_size = MAX_SWIFT_OBJECT_SIZE
try: try:
MAX_SWIFT_OBJECT_SIZE = 1024 MAX_SWIFT_OBJECT_SIZE = 1024
glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE = 1024 self.store.large_object_size = 1024
glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 1024 self.store.large_object_chunk_size = 1024
self.store = Store(test_utils.TestConfigOpts(conf))
location, size, checksum = self.store.add(expected_image_id, location, size, checksum = self.store.add(expected_image_id,
image_swift, 0) image_swift, 0)
finally: finally:
swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = orig_temp_size self.store.large_object_chunk_size = orig_temp_size
swift.DEFAULT_LARGE_OBJECT_SIZE = orig_max_size self.store.large_object_size = orig_max_size
MAX_SWIFT_OBJECT_SIZE = orig_max_swift_object_size MAX_SWIFT_OBJECT_SIZE = orig_max_swift_object_size
self.assertEquals(expected_location, location) self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size) self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum) self.assertEquals(expected_checksum, checksum)
# Expecting 7 calls to put_object -- 5 chunks, a zero chunk which is
# then deleted, and the manifest. Note the difference with above
# where the image_size is specified in advance (there's no zero chunk
# in that case).
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 7)
loc = get_location_from_uri(expected_location) loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc) (new_image_swift, new_image_size) = self.store.get(loc)
@ -517,3 +552,27 @@ class TestStore(unittest.TestCase):
""" """
loc = get_location_from_uri("swift://user:key@authurl/glance/noexist") loc = get_location_from_uri("swift://user:key@authurl/glance/noexist")
self.assertRaises(exception.NotFound, self.store.delete, loc) self.assertRaises(exception.NotFound, self.store.delete, loc)
class TestChunkReader(unittest.TestCase):
def test_read_all_data(self):
"""
Replicate what goes on in the Swift driver with the
repeated creation of the ChunkReader object
"""
CHUNKSIZE = 100
checksum = hashlib.md5()
data_file = tempfile.NamedTemporaryFile()
data_file.write('*' * 1024)
data_file.flush()
infile = open(data_file.name, 'rb')
bytes_read = 0
while True:
cr = glance.store.swift.ChunkReader(infile, checksum, CHUNKSIZE)
chunk = cr.read(CHUNKSIZE)
bytes_read += len(chunk)
if len(chunk) == 0:
break
self.assertEqual(1024, bytes_read)
data_file.close()