Merge "Bug #909533: Swift uploads through Glance using ridiculously small chunks"

This commit is contained in:
Jenkins 2012-01-09 22:26:08 +00:00 committed by Gerrit Code Review
commit e4bd841b6d
4 changed files with 132 additions and 72 deletions

View File

@ -328,21 +328,7 @@ Can only be specified in configuration files.
`This option is specific to the Swift storage backend.`
When doing a large object manifest, what size, in MB, should
Glance write chunks to Swift? This amount of data is written
to a temporary disk buffer during the process of chunking
the image file, and the default is 200MB
* ``swift_store_object_buffer_dir=PATH``
Optional. Default: ``the platform's default temporary directory``
Can only be specified in configuration files.
`This option is specific to the Swift storage backend.`
When sending large images to Swift, what directory should be
used to buffer the chunks? By default the platform's
temporary directory will be used.
Glance write chunks to Swift? The default is 200MB.
Configuring the S3 Storage Backend
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -137,11 +137,6 @@ swift_store_large_object_chunk_size = 200
# Ex. https://example.com/v1.0/ -> https://snet-example.com/v1.0/
swift_enable_snet = False
# When sending large images to Swift, the chunks will be written to a
# temporary buffer on disk. By default the platform's temporary directory
# will be used. If required, an alternative directory can be specified here.
# swift_store_object_buffer_dir = /path/to/dir
# ============ S3 Store Options =============================
# Address where the S3 authentication service lives

View File

@ -40,6 +40,7 @@ except ImportError:
DEFAULT_CONTAINER = 'glance'
DEFAULT_LARGE_OBJECT_SIZE = 5 * 1024 # 5GB
DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 200 # 200M
ONE_MB = 1000 * 1024
logger = logging.getLogger('glance.store.swift')
@ -196,7 +197,6 @@ class Store(glance.store.base.Store):
default=DEFAULT_LARGE_OBJECT_SIZE),
cfg.IntOpt('swift_store_large_object_chunk_size',
default=DEFAULT_LARGE_OBJECT_CHUNK_SIZE),
cfg.StrOpt('swift_store_object_buffer_dir'),
cfg.BoolOpt('swift_store_create_container_on_put', default=False),
]
@ -216,11 +216,13 @@ class Store(glance.store.base.Store):
self.key = self._option_get('swift_store_key')
self.container = self.conf.swift_store_container
try:
self.large_object_size = self.conf.swift_store_large_object_size
# The config file has swift_store_large_object_*size in MB, but
# internally we store it in bytes, since the image_size parameter
# passed to add() is also in bytes.
self.large_object_size = \
self.conf.swift_store_large_object_size * ONE_MB
self.large_object_chunk_size = \
self.conf.swift_store_large_object_chunk_size
self.swift_store_object_buffer_dir = \
self.conf.swift_store_object_buffer_dir
self.conf.swift_store_large_object_chunk_size * ONE_MB
except cfg.ConfigFileValueError, e:
reason = _("Error in configuration conf: %s") % e
logger.error(reason)
@ -347,12 +349,10 @@ class Store(glance.store.base.Store):
# Image size is known, and is less than large_object_size.
# Send to Swift with regular PUT.
obj_etag = swift_conn.put_object(self.container, obj_name,
image_file)
image_file,
content_length=image_size)
else:
# Write the image into Swift in chunks. We cannot
# stream chunks of the webob.Request.body_file, unfortunately,
# so we must write chunks of the body_file into a temporary
# disk buffer, and then pass this disk buffer to Swift.
# Write the image into Swift in chunks.
chunk_id = 1
if image_size > 0:
total_chunks = str(int(
@ -367,37 +367,40 @@ class Store(glance.store.base.Store):
total_chunks = '?'
checksum = hashlib.md5()
tmp = self.swift_store_object_buffer_dir
combined_chunks_size = 0
while True:
with tempfile.NamedTemporaryFile(dir=tmp) as disk_buffer:
chunk = image_file.read(self.large_object_chunk_size)
if not chunk:
chunk_size = self.large_object_chunk_size
if image_size == 0:
content_length = None
else:
left = image_size - combined_chunks_size
if left == 0:
break
chunk_size = len(chunk)
logger.debug(_("Writing %(chunk_size)d bytes for "
"chunk %(chunk_id)d/"
"%(total_chunks)s to disk buffer "
"for image %(image_id)s")
% locals())
checksum.update(chunk)
disk_buffer.write(chunk)
disk_buffer.flush()
logger.debug(_("Writing chunk %(chunk_id)d/"
"%(total_chunks)s to Swift "
"for image %(image_id)s")
% locals())
if chunk_size > left:
chunk_size = left
content_length = chunk_size
chunk_name = "%s-%05d" % (obj_name, chunk_id)
reader = ChunkReader(image_file, checksum, chunk_size)
chunk_etag = swift_conn.put_object(
self.container,
"%s-%05d" % (obj_name, chunk_id),
open(disk_buffer.name, 'rb'))
self.container, chunk_name, reader,
content_length=content_length)
bytes_read = reader.bytes_read
logger.debug(_("Wrote chunk %(chunk_id)d/"
"%(total_chunks)s to Swift "
"returning MD5 of content: "
"%(total_chunks)s of length %(bytes_read)d "
"to Swift returning MD5 of content: "
"%(chunk_etag)s")
% locals())
if bytes_read == 0:
# Delete the last chunk, because it's of zero size.
# This will happen if image_size == 0.
logger.debug(_("Deleting final zero-length chunk"))
swift_conn.delete_object(self.container, chunk_name)
break
chunk_id += 1
combined_chunks_size += chunk_size
combined_chunks_size += bytes_read
# In the case we have been given an unknown image size,
# set the image_size to the total size of the combined chunks.
@ -484,6 +487,23 @@ class Store(glance.store.base.Store):
raise
class ChunkReader(object):
def __init__(self, fd, checksum, total):
self.fd = fd
self.checksum = checksum
self.total = total
self.bytes_read = 0
def read(self, i):
left = self.total - self.bytes_read
if i > left:
i = left
result = self.fd.read(i)
self.bytes_read += len(result)
self.checksum.update(result)
return result
def create_container_if_missing(container, swift_conn, conf):
"""
Creates a missing container in Swift if the

View File

@ -20,6 +20,7 @@
import StringIO
import hashlib
import httplib
import tempfile
import unittest
import stubout
@ -39,6 +40,7 @@ Store = glance.store.swift.Store
FIVE_KB = (5 * 1024)
FIVE_GB = (5 * 1024 * 1024 * 1024)
MAX_SWIFT_OBJECT_SIZE = FIVE_GB
SWIFT_PUT_OBJECT_CALLS = 0
SWIFT_CONF = {'verbose': True,
'debug': True,
'swift_store_user': 'user',
@ -71,6 +73,8 @@ def stub_out_swift_common_client(stubs):
def fake_put_object(url, token, container, name, contents, **kwargs):
# PUT returns the ETag header for the newly-added object
# Large object manifest...
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS += 1
fixture_key = "%s/%s" % (container, name)
if not fixture_key in fixture_headers.keys():
if kwargs.get('headers'):
@ -241,6 +245,9 @@ class TestStore(unittest.TestCase):
'/glance/%s' % expected_image_id
image_swift = StringIO.StringIO(expected_swift_contents)
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
location, size, checksum = self.store.add(expected_image_id,
image_swift,
expected_swift_size)
@ -248,6 +255,8 @@ class TestStore(unittest.TestCase):
self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum)
# Expecting a single object to be created on Swift i.e. no chunking.
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 1)
loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc)
@ -294,6 +303,9 @@ class TestStore(unittest.TestCase):
image_swift = StringIO.StringIO(expected_swift_contents)
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
self.store = Store(test_utils.TestConfigOpts(new_conf))
location, size, checksum = self.store.add(image_id, image_swift,
expected_swift_size)
@ -301,6 +313,7 @@ class TestStore(unittest.TestCase):
self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum)
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 1)
loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc)
@ -321,6 +334,9 @@ class TestStore(unittest.TestCase):
image_swift = StringIO.StringIO("nevergonnamakeit")
self.store = Store(test_utils.TestConfigOpts(conf))
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
# We check the exception text to ensure the container
# missing text is found in it, otherwise, we would have
# simply used self.assertRaises here
@ -332,6 +348,7 @@ class TestStore(unittest.TestCase):
self.assertTrue("container noexist does not exist "
"in Swift" in str(e))
self.assertTrue(exception_caught)
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 0)
def test_add_no_container_and_create(self):
"""
@ -349,6 +366,9 @@ class TestStore(unittest.TestCase):
'/noexist/%s' % expected_image_id
image_swift = StringIO.StringIO(expected_swift_contents)
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
self.store = Store(test_utils.TestConfigOpts(conf))
location, size, checksum = self.store.add(expected_image_id,
image_swift,
@ -357,6 +377,7 @@ class TestStore(unittest.TestCase):
self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum)
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 1)
loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc)
@ -369,7 +390,7 @@ class TestStore(unittest.TestCase):
def test_add_large_object(self):
"""
Tests that adding a very large image. We simulate the large
object by setting the DEFAULT_LARGE_OBJECT_SIZE to a small number
object by setting store.large_object_size to a small number
and then verify that there have been a number of calls to
put_object()...
"""
@ -383,22 +404,28 @@ class TestStore(unittest.TestCase):
'/glance/%s' % expected_image_id
image_swift = StringIO.StringIO(expected_swift_contents)
orig_max_size = glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE
orig_temp_size = glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE
try:
glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE = 1024
glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 1024
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
self.store = Store(test_utils.TestConfigOpts(conf))
orig_max_size = self.store.large_object_size
orig_temp_size = self.store.large_object_chunk_size
try:
self.store.large_object_size = 1024
self.store.large_object_chunk_size = 1024
location, size, checksum = self.store.add(expected_image_id,
image_swift,
expected_swift_size)
finally:
swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = orig_temp_size
swift.DEFAULT_LARGE_OBJECT_SIZE = orig_max_size
self.store.large_object_chunk_size = orig_temp_size
self.store.large_object_size = orig_max_size
self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum)
# Expecting 6 objects to be created on Swift -- 5 chunks and 1
# manifest.
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 6)
loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc)
@ -431,27 +458,35 @@ class TestStore(unittest.TestCase):
'/glance/%s' % expected_image_id
image_swift = StringIO.StringIO(expected_swift_contents)
global SWIFT_PUT_OBJECT_CALLS
SWIFT_PUT_OBJECT_CALLS = 0
# Temporarily set Swift MAX_SWIFT_OBJECT_SIZE to 1KB and add our image,
# explicitly setting the image_length to 0
orig_max_size = glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE
orig_temp_size = glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE
self.store = Store(test_utils.TestConfigOpts(conf))
orig_max_size = self.store.large_object_size
orig_temp_size = self.store.large_object_chunk_size
global MAX_SWIFT_OBJECT_SIZE
orig_max_swift_object_size = MAX_SWIFT_OBJECT_SIZE
try:
MAX_SWIFT_OBJECT_SIZE = 1024
glance.store.swift.DEFAULT_LARGE_OBJECT_SIZE = 1024
glance.store.swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = 1024
self.store = Store(test_utils.TestConfigOpts(conf))
self.store.large_object_size = 1024
self.store.large_object_chunk_size = 1024
location, size, checksum = self.store.add(expected_image_id,
image_swift, 0)
finally:
swift.DEFAULT_LARGE_OBJECT_CHUNK_SIZE = orig_temp_size
swift.DEFAULT_LARGE_OBJECT_SIZE = orig_max_size
self.store.large_object_chunk_size = orig_temp_size
self.store.large_object_size = orig_max_size
MAX_SWIFT_OBJECT_SIZE = orig_max_swift_object_size
self.assertEquals(expected_location, location)
self.assertEquals(expected_swift_size, size)
self.assertEquals(expected_checksum, checksum)
# Expecting 7 calls to put_object -- 5 chunks, a zero chunk which is
# then deleted, and the manifest. Note the difference with above
# where the image_size is specified in advance (there's no zero chunk
# in that case).
self.assertEquals(SWIFT_PUT_OBJECT_CALLS, 7)
loc = get_location_from_uri(expected_location)
(new_image_swift, new_image_size) = self.store.get(loc)
@ -517,3 +552,27 @@ class TestStore(unittest.TestCase):
"""
loc = get_location_from_uri("swift://user:key@authurl/glance/noexist")
self.assertRaises(exception.NotFound, self.store.delete, loc)
class TestChunkReader(unittest.TestCase):
def test_read_all_data(self):
"""
Replicate what goes on in the Swift driver with the
repeated creation of the ChunkReader object
"""
CHUNKSIZE = 100
checksum = hashlib.md5()
data_file = tempfile.NamedTemporaryFile()
data_file.write('*' * 1024)
data_file.flush()
infile = open(data_file.name, 'rb')
bytes_read = 0
while True:
cr = glance.store.swift.ChunkReader(infile, checksum, CHUNKSIZE)
chunk = cr.read(CHUNKSIZE)
bytes_read += len(chunk)
if len(chunk) == 0:
break
self.assertEqual(1024, bytes_read)
data_file.close()