Allow arbitrary UTF-8 strings as delimiters in listings

AWS seems to support this, so let's allow s3api to do it, too.

Previously, S3 clients trying to use multi-character delimiters would
get 500s back, because s3api didn't know how to handle the 412s that the
container server would send.

As long as we're adding support for container listings, may as well do
it for accounts, too.

Change-Id: I62032ddd50a3493b8b99a40fb48d840ac763d0e7
Co-Authored-By: Thiago da Silva <thiagodasilva@gmail.com>
Closes-Bug: #1797305
This commit is contained in:
Tim Burke 2018-10-11 15:23:39 -07:00
parent 5cb53838a6
commit 1ded0d6c87
9 changed files with 314 additions and 59 deletions

View File

@ -457,12 +457,16 @@ class AccountBroker(DatabaseBroker):
end = name.find(delimiter, len(prefix))
if end > 0:
if reverse:
end_marker = name[:end + 1]
end_marker = name[:end + len(delimiter)]
else:
marker = name[:end] + chr(ord(delimiter) + 1)
marker = ''.join([
name[:end],
delimiter[:-1],
chr(ord(delimiter[-1:]) + 1),
])
# we want result to be inclusive of delim+1
delim_force_gte = True
dir_name = name[:end + 1]
dir_name = name[:end + len(delimiter)]
if dir_name != orig_marker:
results.append([dir_name, 0, 0, '0', 1])
curs.close()

View File

@ -207,9 +207,6 @@ class AccountController(BaseStorageServer):
drive, part, account = split_and_validate_path(req, 3)
prefix = get_param(req, 'prefix')
delimiter = get_param(req, 'delimiter')
if delimiter and (len(delimiter) > 1 or ord(delimiter) > 254):
# delimiters can be made more flexible later
return HTTPPreconditionFailed(body='Bad delimiter')
limit = constraints.ACCOUNT_LISTING_LIMIT
given_limit = get_param(req, 'limit')
reverse = config_true_value(get_param(req, 'reverse'))

View File

@ -1186,19 +1186,27 @@ class ContainerBroker(DatabaseBroker):
continue
if end >= 0 and len(name) > end + len(delimiter):
if reverse:
end_marker = name[:end + 1]
end_marker = name[:end + len(delimiter)]
else:
marker = name[:end] + chr(ord(delimiter) + 1)
marker = ''.join([
name[:end],
delimiter[:-1],
chr(ord(delimiter[-1:]) + 1),
])
curs.close()
break
elif end >= 0:
if reverse:
end_marker = name[:end + 1]
end_marker = name[:end + len(delimiter)]
else:
marker = name[:end] + chr(ord(delimiter) + 1)
marker = ''.join([
name[:end],
delimiter[:-1],
chr(ord(delimiter[-1:]) + 1),
])
# we want result to be inclusive of delim+1
delim_force_gte = True
dir_name = name[:end + 1]
dir_name = name[:end + len(delimiter)]
if dir_name != orig_marker:
results.append([dir_name, '0', 0, None, ''])
curs.close()

View File

@ -637,9 +637,6 @@ class ContainerController(BaseStorageServer):
path = get_param(req, 'path')
prefix = get_param(req, 'prefix')
delimiter = get_param(req, 'delimiter')
if delimiter and (len(delimiter) > 1 or ord(delimiter) > 254):
# delimiters can be made more flexible later
return HTTPPreconditionFailed(body='Bad delimiter')
marker = get_param(req, 'marker', '')
end_marker = get_param(req, 'end_marker')
limit = constraints.CONTAINER_LISTING_LIMIT

View File

@ -235,6 +235,24 @@ class TestS3ApiBucket(S3ApiBaseBoto3):
resp_prefixes,
[{'Prefix': p} for p in expect_prefixes])
def test_get_bucket_with_multi_char_delimiter(self):
bucket = 'bucket'
put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object',
'dir/subdir/object')
self._prepare_test_get_bucket(bucket, put_objects)
delimiter = '/obj'
expect_objects = ('object', 'object2')
expect_prefixes = ('dir/subdir/obj', 'subdir/obj', 'subdir2/obj')
resp = self.conn.list_objects(Bucket=bucket, Delimiter=delimiter)
self.assertEqual(200, resp['ResponseMetadata']['HTTPStatusCode'])
self.assertEqual(resp['Delimiter'], delimiter)
self._validate_object_listing(resp['Contents'], expect_objects)
resp_prefixes = resp['CommonPrefixes']
self.assertEqual(
resp_prefixes,
[{'Prefix': p} for p in expect_prefixes])
def test_get_bucket_with_encoding_type(self):
bucket = 'bucket'
put_objects = ('object', 'object2')

View File

@ -301,6 +301,25 @@ class TestAccount(Base):
results = [r for r in results if r in expected]
self.assertEqual(expected, results)
def testListMultiCharDelimiter(self):
delimiter = '-&'
containers = ['test', delimiter.join(['test', 'bar']),
delimiter.join(['test', 'foo'])]
for c in containers:
cont = self.env.account.container(c)
self.assertTrue(cont.create())
results = self.env.account.containers(parms={'delimiter': delimiter})
expected = ['test', 'test-&']
results = [r for r in results if r in expected]
self.assertEqual(expected, results)
results = self.env.account.containers(parms={'delimiter': delimiter,
'reverse': 'yes'})
expected.reverse()
results = [r for r in results if r in expected]
self.assertEqual(expected, results)
def testListDelimiterAndPrefix(self):
delimiter = 'a'
containers = ['bar', 'bazar']
@ -668,6 +687,36 @@ class TestContainer(Base):
results = [x.get('name', x.get('subdir')) for x in results]
self.assertEqual(results, ['test-', 'test'])
def testListMultiCharDelimiter(self):
cont = self.env.account.container(Utils.create_name())
self.assertTrue(cont.create())
delimiter = '-&'
files = ['test', delimiter.join(['test', 'bar']),
delimiter.join(['test', 'foo'])]
for f in files:
file_item = cont.file(f)
self.assertTrue(file_item.write_random())
for format_type in [None, 'json', 'xml']:
results = cont.files(parms={'format': format_type})
if isinstance(results[0], dict):
results = [x.get('name', x.get('subdir')) for x in results]
self.assertEqual(results, ['test', 'test-&bar', 'test-&foo'])
results = cont.files(parms={'delimiter': delimiter,
'format': format_type})
if isinstance(results[0], dict):
results = [x.get('name', x.get('subdir')) for x in results]
self.assertEqual(results, ['test', 'test-&'])
results = cont.files(parms={'delimiter': delimiter,
'format': format_type,
'reverse': 'yes'})
if isinstance(results[0], dict):
results = [x.get('name', x.get('subdir')) for x in results]
self.assertEqual(results, ['test-&', 'test'])
def testListDelimiterAndPrefix(self):
cont = self.env.account.container(Utils.create_name())
self.assertTrue(cont.create())

View File

@ -432,12 +432,12 @@ class TestContainerShardingNonUTF8(BaseTestContainerSharding):
expected = expected[:params['limit']]
self.assertEqual(expected, listing)
def check_listing_precondition_fails(**params):
def check_listing_fails(exp_status, **params):
qs = '&'.join(['%s=%s' % param for param in params.items()])
with self.assertRaises(ClientException) as cm:
client.get_container(
self.url, self.token, self.container_name, query_string=qs)
self.assertEqual(412, cm.exception.http_status)
self.assertEqual(exp_status, cm.exception.http_status)
return cm.exception
def do_listing_checks(objects):
@ -469,12 +469,16 @@ class TestContainerShardingNonUTF8(BaseTestContainerSharding):
self.url, self.token, self.container_name,
query_string='delimiter=-')
self.assertEqual([{'subdir': 'obj-'}], listing)
headers, listing = client.get_container(
self.url, self.token, self.container_name,
query_string='delimiter=j-')
self.assertEqual([{'subdir': 'obj-'}], listing)
limit = self.cluster_info['swift']['container_listing_limit']
exc = check_listing_precondition_fails(limit=limit + 1)
exc = check_listing_fails(412, limit=limit + 1)
self.assertIn(b'Maximum limit', exc.http_response_content)
exc = check_listing_precondition_fails(delimiter='ab')
self.assertIn(b'Bad delimiter', exc.http_response_content)
exc = check_listing_fails(400, delimiter='%ff')
self.assertIn(b'not valid UTF-8', exc.http_response_content)
# sanity checks
do_listing_checks(obj_names)

View File

@ -1485,13 +1485,6 @@ class TestAccountController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 406)
def test_GET_delimiter_too_long(self):
req = Request.blank('/sda1/p/a?delimiter=xx',
environ={'REQUEST_METHOD': 'GET',
'HTTP_X_TIMESTAMP': '0'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 412)
def test_GET_prefix_delimiter_plain(self):
req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'})
@ -1655,6 +1648,110 @@ class TestAccountController(unittest.TestCase):
listing.append(node2.firstChild.nodeValue)
self.assertEqual(listing, ['sub.1.0', 'sub.1.1', 'sub.1.2'])
def test_GET_multichar_delimiter(self):
self.maxDiff = None
req = Request.blank('/sda1/p/a', method='PUT', headers={
'x-timestamp': '0'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 201, resp.body)
for i in ('US~~TX~~A', 'US~~TX~~B', 'US~~OK~~A', 'US~~OK~~B',
'US~~OK~Tulsa~~A', 'US~~OK~Tulsa~~B',
'US~~UT~~A', 'US~~UT~~~B'):
req = Request.blank('/sda1/p/a/%s' % i, method='PUT', headers={
'X-Put-Timestamp': '1',
'X-Delete-Timestamp': '0',
'X-Object-Count': '0',
'X-Bytes-Used': '0',
'X-Timestamp': normalize_timestamp(0)})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 201)
req = Request.blank(
'/sda1/p/a?prefix=US~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~OK~Tulsa~~"},
{"subdir": "US~~OK~~"},
{"subdir": "US~~TX~~"},
{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"},
{"subdir": "US~~TX~~"},
{"subdir": "US~~OK~~"},
{"subdir": "US~~OK~Tulsa~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~A"},
{"subdir": "US~~UT~~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"subdir": "US~~UT~~~"},
{"name": "US~~UT~~A"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~A"},
{"name": "US~~UT~~~B"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~~B"},
{"name": "US~~UT~~A"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~~B"}])
def test_through_call(self):
inbuf = BytesIO()
errbuf = StringIO()
@ -1779,18 +1876,13 @@ class TestAccountController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 400,
"%d on param %s" % (resp.status_int, param))
# Good UTF8 sequence for delimiter, too long (1 byte delimiters only)
req = Request.blank('/sda1/p/a?delimiter=\xce\xa9',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 412,
"%d on param delimiter" % (resp.status_int))
Request.blank('/sda1/p/a',
headers={'X-Timestamp': normalize_timestamp(1)},
environ={'REQUEST_METHOD': 'PUT'}).get_response(
self.controller)
# Good UTF8 sequence, ignored for limit, doesn't affect other queries
for param in ('limit', 'marker', 'prefix', 'end_marker', 'format'):
for param in ('limit', 'marker', 'prefix', 'end_marker', 'format',
'delimiter'):
req = Request.blank('/sda1/p/a?%s=\xce\xa9' % param,
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)

View File

@ -3028,13 +3028,14 @@ class TestContainerController(unittest.TestCase):
'/sda1/p/a/c', method='PUT', headers=headers, body=body)
self.assertEqual(202, req.get_response(self.controller).status_int)
def do_test(params):
def do_test(params, expected_status):
params['format'] = 'json'
headers = {'X-Backend-Record-Type': 'shard'}
req = Request.blank('/sda1/p/a/c', method='GET',
headers=headers, params=params)
with mock_timestamp_now(ts_now):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, expected_status)
self.assertEqual(resp.content_type, 'text/html')
self.assertNotIn('X-Backend-Record-Type', resp.headers)
self.assertNotIn('X-Backend-Sharding-State', resp.headers)
@ -3042,26 +3043,19 @@ class TestContainerController(unittest.TestCase):
self.assertNotIn('X-Container-Bytes-Used', resp.headers)
self.assertNotIn('X-Timestamp', resp.headers)
self.assertNotIn('X-PUT-Timestamp', resp.headers)
return resp
resp = do_test({'states': 'bad'})
self.assertEqual(resp.status_int, 400)
resp = do_test({'delimiter': 'bad'})
self.assertEqual(resp.status_int, 412)
resp = do_test({'limit': str(constraints.CONTAINER_LISTING_LIMIT + 1)})
self.assertEqual(resp.status_int, 412)
do_test({'states': 'bad'}, 400)
do_test({'limit': str(constraints.CONTAINER_LISTING_LIMIT + 1)}, 412)
with mock.patch('swift.container.server.check_drive',
side_effect=ValueError('sda1 is not mounted')):
resp = do_test({})
self.assertEqual(resp.status_int, 507)
do_test({}, 507)
# delete the container
req = Request.blank('/sda1/p/a/c', method='DELETE',
headers={'X-Timestamp': next(ts_iter).normal})
self.assertEqual(204, req.get_response(self.controller).status_int)
resp = do_test({'states': 'bad'})
self.assertEqual(resp.status_int, 404)
do_test({'states': 'bad'}, 404)
def test_GET_auto_record_type(self):
# make a container
@ -3982,13 +3976,6 @@ class TestContainerController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.body.split(b'\n'), [b'a1', b'a2', b'a3', b''])
def test_GET_delimiter_too_long(self):
req = Request.blank('/sda1/p/a/c?delimiter=xx',
environ={'REQUEST_METHOD': 'GET',
'HTTP_X_TIMESTAMP': '0'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 412)
def test_GET_delimiter(self):
req = Request.blank(
'/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
@ -4014,6 +4001,111 @@ class TestContainerController(unittest.TestCase):
{"subdir": "US-TX-"},
{"subdir": "US-UT-"}])
def test_GET_multichar_delimiter(self):
self.maxDiff = None
req = Request.blank(
'/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'})
resp = req.get_response(self.controller)
for i in ('US~~TX~~A', 'US~~TX~~B', 'US~~OK~~A', 'US~~OK~~B',
'US~~OK~Tulsa~~A', 'US~~OK~Tulsa~~B',
'US~~UT~~A', 'US~~UT~~~B'):
req = Request.blank(
'/sda1/p/a/c/%s' % i,
environ={
'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1',
'HTTP_X_CONTENT_TYPE': 'text/plain', 'HTTP_X_ETAG': 'x',
'HTTP_X_SIZE': 0})
self._update_object_put_headers(req)
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 201)
req = Request.blank(
'/sda1/p/a/c?prefix=US~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~OK~Tulsa~~"},
{"subdir": "US~~OK~~"},
{"subdir": "US~~TX~~"},
{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"},
{"subdir": "US~~TX~~"},
{"subdir": "US~~OK~~"},
{"subdir": "US~~OK~Tulsa~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~A"},
{"subdir": "US~~UT~~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"subdir": "US~~UT~~~"},
{"name": "US~~UT~~A"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~A"},
{"name": "US~~UT~~~B"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~~B"},
{"name": "US~~UT~~A"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~~B"}])
def test_GET_delimiter_non_ascii(self):
req = Request.blank(
'/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
@ -4279,18 +4371,12 @@ class TestContainerController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 400,
"%d on param %s" % (resp.status_int, param))
# Good UTF8 sequence for delimiter, too long (1 byte delimiters only)
req = Request.blank('/sda1/p/a/c?delimiter=\xce\xa9',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 412,
"%d on param delimiter" % (resp.status_int))
req = Request.blank('/sda1/p/a/c', method='PUT',
headers={'X-Timestamp': Timestamp(1).internal})
req.get_response(self.controller)
# Good UTF8 sequence, ignored for limit, doesn't affect other queries
for param in ('limit', 'marker', 'path', 'prefix', 'end_marker',
'format'):
'format', 'delimiter'):
req = Request.blank('/sda1/p/a/c?%s=\xce\xa9' % param,
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)