Merge "Fix encoding issue in ssync_sender.send_put()"

This commit is contained in:
Jenkins
2017-04-19 19:37:38 +00:00
committed by Gerrit Code Review
5 changed files with 129 additions and 18 deletions

View File

@@ -114,6 +114,20 @@ def _get_filename(fd):
return fd return fd
def _encode_metadata(metadata):
"""
UTF8 encode any unicode keys or values in given metadata dict.
:param metadata: a dict
"""
def encode_str(item):
if isinstance(item, six.text_type):
return item.encode('utf8')
return item
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
def read_metadata(fd): def read_metadata(fd):
""" """
Helper function to read the pickled metadata from an object file. Helper function to read the pickled metadata from an object file.
@@ -140,7 +154,10 @@ def read_metadata(fd):
raise DiskFileNotExist() raise DiskFileNotExist()
# TODO: we might want to re-raise errors that don't denote a missing # TODO: we might want to re-raise errors that don't denote a missing
# xattr here. Seems to be ENODATA on linux and ENOATTR on BSD/OSX. # xattr here. Seems to be ENODATA on linux and ENOATTR on BSD/OSX.
return pickle.loads(metadata) # strings are utf-8 encoded when written, but have not always been
# (see https://bugs.launchpad.net/swift/+bug/1678018) so encode them again
# when read
return _encode_metadata(pickle.loads(metadata))
def write_metadata(fd, metadata, xattr_size=65536): def write_metadata(fd, metadata, xattr_size=65536):
@@ -150,7 +167,7 @@ def write_metadata(fd, metadata, xattr_size=65536):
:param fd: file descriptor or filename to write the metadata :param fd: file descriptor or filename to write the metadata
:param metadata: metadata to write :param metadata: metadata to write
""" """
metastr = pickle.dumps(metadata, PICKLE_PROTOCOL) metastr = pickle.dumps(_encode_metadata(metadata), PICKLE_PROTOCOL)
key = 0 key = 0
while metastr: while metastr:
try: try:

View File

@@ -81,8 +81,13 @@ class TestReconstructorRebuild(ECProbeTest):
# PUT object and POST some metadata # PUT object and POST some metadata
contents = Body() contents = Body()
headers = {'x-object-meta-foo': 'meta-foo'} headers = {
self.headers_post = {'x-object-meta-bar': 'meta-bar'} self._make_name('x-object-meta-').decode('utf8'):
self._make_name('meta-foo-').decode('utf8'),
}
self.headers_post = {
self._make_name('x-object-meta-').decode('utf8'):
self._make_name('meta-bar-').decode('utf8')}
self.etag = client.put_object(self.url, self.token, self.etag = client.put_object(self.url, self.token,
self.container_name, self.container_name,

View File

@@ -286,6 +286,58 @@ class TestDiskFileModuleMethods(unittest.TestCase):
exp_dir = '/srv/node/sdb5/objects-1/123' exp_dir = '/srv/node/sdb5/objects-1/123'
self.assertEqual(part_dir, exp_dir) self.assertEqual(part_dir, exp_dir)
def test_write_read_metadata(self):
path = os.path.join(self.testdir, str(uuid.uuid4()))
metadata = {'name': '/a/c/o',
'Content-Length': 99,
u'X-Object-Sysmeta-Ec-Frag-Index': 4,
u'X-Object-Meta-Strange': u'should be bytes',
b'X-Object-Meta-x\xff': b'not utf8 \xff',
u'X-Object-Meta-y\xe8': u'not ascii \xe8'}
expected = {b'name': b'/a/c/o',
b'Content-Length': 99,
b'X-Object-Sysmeta-Ec-Frag-Index': 4,
b'X-Object-Meta-Strange': b'should be bytes',
b'X-Object-Meta-x\xff': b'not utf8 \xff',
b'X-Object-Meta-y\xc3\xa8': b'not ascii \xc3\xa8'}
def check_metadata():
with open(path, 'rb') as fd:
actual = diskfile.read_metadata(fd)
self.assertEqual(expected, actual)
for k in actual.keys():
self.assertIsInstance(k, six.binary_type)
for k in (b'name',
b'X-Object-Meta-Strange',
b'X-Object-Meta-x\xff',
b'X-Object-Meta-y\xc3\xa8'):
self.assertIsInstance(actual[k], six.binary_type)
with open(path, 'wb') as fd:
diskfile.write_metadata(fd, metadata)
check_metadata()
# mock the read path to check the write path encoded persisted metadata
with mock.patch.object(diskfile, '_encode_metadata', lambda x: x):
check_metadata()
# simulate a legacy diskfile that might have persisted unicode metadata
with mock.patch.object(diskfile, '_encode_metadata', lambda x: x):
with open(path, 'wb') as fd:
diskfile.write_metadata(fd, metadata)
# sanity check, while still mocked, that we did persist unicode
with open(path, 'rb') as fd:
actual = diskfile.read_metadata(fd)
for k, v in actual.items():
if k == u'X-Object-Meta-Strange':
self.assertIsInstance(k, six.text_type)
self.assertIsInstance(v, six.text_type)
break
else:
self.fail('Did not find X-Object-Meta-Strange')
# check that read_metadata converts binary_type
check_metadata()
@patch_policies @patch_policies
class TestObjectAuditLocationGenerator(unittest.TestCase): class TestObjectAuditLocationGenerator(unittest.TestCase):

View File

@@ -7255,6 +7255,19 @@ class TestObjectServer(unittest.TestCase):
obj_datafile = found_files['.data'][0] obj_datafile = found_files['.data'][0]
self.assertEqual("%s#2#d.data" % put_timestamp.internal, self.assertEqual("%s#2#d.data" % put_timestamp.internal,
os.path.basename(obj_datafile)) os.path.basename(obj_datafile))
with open(obj_datafile) as fd:
actual_meta = diskfile.read_metadata(fd)
expected_meta = {'Content-Length': '82',
'name': '/a/c/o',
'X-Object-Sysmeta-Ec-Frag-Index': '2',
'X-Timestamp': put_timestamp.normal,
'Content-Type': 'text/plain'}
for k, v in actual_meta.items():
self.assertIsInstance(k, six.binary_type)
self.assertIsInstance(v, six.binary_type)
self.assertIsNotNone(actual_meta.pop('ETag', None))
self.assertEqual(expected_meta, actual_meta)
# but no other files # but no other files
self.assertFalse(found_files['.data'][1:]) self.assertFalse(found_files['.data'][1:])
found_files.pop('.data') found_files.pop('.data')

View File

@@ -1452,62 +1452,86 @@ class TestSender(BaseTest):
exc = err exc = err
self.assertEqual(str(exc), '0.01 seconds: send_put chunk') self.assertEqual(str(exc), '0.01 seconds: send_put chunk')
def test_send_put(self): def _check_send_put(self, obj_name, meta_value):
ts_iter = make_timestamp_iter() ts_iter = make_timestamp_iter()
t1 = next(ts_iter) t1 = next(ts_iter)
body = 'test' body = 'test'
extra_metadata = {'Some-Other-Header': 'value'} extra_metadata = {'Some-Other-Header': 'value',
df = self._make_open_diskfile(body=body, timestamp=t1, u'Unicode-Meta-Name': meta_value}
df = self._make_open_diskfile(obj=obj_name, body=body,
timestamp=t1,
extra_metadata=extra_metadata) extra_metadata=extra_metadata)
expected = dict(df.get_metadata()) expected = dict(df.get_metadata())
expected['body'] = body expected['body'] = body
expected['chunk_size'] = len(body) expected['chunk_size'] = len(body)
expected['meta'] = meta_value
path = six.moves.urllib.parse.quote(expected['name'])
expected['path'] = path
expected['length'] = format(145 + len(path) + len(meta_value), 'x')
# .meta file metadata is not included in expected for data only PUT # .meta file metadata is not included in expected for data only PUT
t2 = next(ts_iter) t2 = next(ts_iter)
metadata = {'X-Timestamp': t2.internal, 'X-Object-Meta-Fruit': 'kiwi'} metadata = {'X-Timestamp': t2.internal, 'X-Object-Meta-Fruit': 'kiwi'}
df.write_metadata(metadata) df.write_metadata(metadata)
df.open() df.open()
self.sender.connection = FakeConnection() self.sender.connection = FakeConnection()
self.sender.send_put('/a/c/o', df) self.sender.send_put(path, df)
self.assertEqual( self.assertEqual(
''.join(self.sender.connection.sent), ''.join(self.sender.connection.sent),
'82\r\n' '%(length)s\r\n'
'PUT /a/c/o\r\n' 'PUT %(path)s\r\n'
'Content-Length: %(Content-Length)s\r\n' 'Content-Length: %(Content-Length)s\r\n'
'ETag: %(ETag)s\r\n' 'ETag: %(ETag)s\r\n'
'Some-Other-Header: value\r\n' 'Some-Other-Header: value\r\n'
'Unicode-Meta-Name: %(meta)s\r\n'
'X-Timestamp: %(X-Timestamp)s\r\n' 'X-Timestamp: %(X-Timestamp)s\r\n'
'\r\n' '\r\n'
'\r\n' '\r\n'
'%(chunk_size)s\r\n' '%(chunk_size)s\r\n'
'%(body)s\r\n' % expected) '%(body)s\r\n' % expected)
def test_send_post(self): def test_send_put(self):
self._check_send_put('o', 'meta')
def test_send_put_unicode(self):
self._check_send_put(
'o_with_caract\xc3\xa8res_like_in_french', 'm\xc3\xa8ta')
def _check_send_post(self, obj_name, meta_value):
ts_iter = make_timestamp_iter() ts_iter = make_timestamp_iter()
# create .data file # create .data file
extra_metadata = {'X-Object-Meta-Foo': 'old_value', extra_metadata = {'X-Object-Meta-Foo': 'old_value',
'X-Object-Sysmeta-Test': 'test_sysmeta', 'X-Object-Sysmeta-Test': 'test_sysmeta',
'Content-Type': 'test_content_type'} 'Content-Type': 'test_content_type'}
ts_0 = next(ts_iter) ts_0 = next(ts_iter)
df = self._make_open_diskfile(extra_metadata=extra_metadata, df = self._make_open_diskfile(obj=obj_name,
extra_metadata=extra_metadata,
timestamp=ts_0) timestamp=ts_0)
# create .meta file # create .meta file
ts_1 = next(ts_iter) ts_1 = next(ts_iter)
newer_metadata = {'X-Object-Meta-Foo': 'new_value', newer_metadata = {u'X-Object-Meta-Foo': meta_value,
'X-Timestamp': ts_1.internal} 'X-Timestamp': ts_1.internal}
df.write_metadata(newer_metadata) df.write_metadata(newer_metadata)
path = six.moves.urllib.parse.quote(df.read_metadata()['name'])
length = format(61 + len(path) + len(meta_value), 'x')
self.sender.connection = FakeConnection() self.sender.connection = FakeConnection()
with df.open(): with df.open():
self.sender.send_post('/a/c/o', df) self.sender.send_post(path, df)
self.assertEqual( self.assertEqual(
''.join(self.sender.connection.sent), ''.join(self.sender.connection.sent),
'4c\r\n' '%s\r\n'
'POST /a/c/o\r\n' 'POST %s\r\n'
'X-Object-Meta-Foo: new_value\r\n' 'X-Object-Meta-Foo: %s\r\n'
'X-Timestamp: %s\r\n' 'X-Timestamp: %s\r\n'
'\r\n' '\r\n'
'\r\n' % ts_1.internal) '\r\n' % (length, path, meta_value, ts_1.internal))
def test_send_post(self):
self._check_send_post('o', 'meta')
def test_send_post_unicode(self):
self._check_send_post(
'o_with_caract\xc3\xa8res_like_in_french', 'm\xc3\xa8ta')
def test_disconnect_timeout(self): def test_disconnect_timeout(self):
self.sender.connection = FakeConnection() self.sender.connection = FakeConnection()