Merge "Allow arbitrary UTF-8 strings as delimiters in listings"

This commit is contained in:
Zuul 2019-09-16 19:51:03 +00:00 committed by Gerrit Code Review
commit 6339bd8f6c
9 changed files with 314 additions and 59 deletions

View File

@ -457,12 +457,16 @@ class AccountBroker(DatabaseBroker):
end = name.find(delimiter, len(prefix))
if end > 0:
if reverse:
end_marker = name[:end + 1]
end_marker = name[:end + len(delimiter)]
else:
marker = name[:end] + chr(ord(delimiter) + 1)
marker = ''.join([
name[:end],
delimiter[:-1],
chr(ord(delimiter[-1:]) + 1),
])
# we want result to be inclusive of delim+1
delim_force_gte = True
dir_name = name[:end + 1]
dir_name = name[:end + len(delimiter)]
if dir_name != orig_marker:
results.append([dir_name, 0, 0, '0', 1])
curs.close()

View File

@ -207,9 +207,6 @@ class AccountController(BaseStorageServer):
drive, part, account = split_and_validate_path(req, 3)
prefix = get_param(req, 'prefix')
delimiter = get_param(req, 'delimiter')
if delimiter and (len(delimiter) > 1 or ord(delimiter) > 254):
# delimiters can be made more flexible later
return HTTPPreconditionFailed(body='Bad delimiter')
limit = constraints.ACCOUNT_LISTING_LIMIT
given_limit = get_param(req, 'limit')
reverse = config_true_value(get_param(req, 'reverse'))

View File

@ -1186,19 +1186,27 @@ class ContainerBroker(DatabaseBroker):
continue
if end >= 0 and len(name) > end + len(delimiter):
if reverse:
end_marker = name[:end + 1]
end_marker = name[:end + len(delimiter)]
else:
marker = name[:end] + chr(ord(delimiter) + 1)
marker = ''.join([
name[:end],
delimiter[:-1],
chr(ord(delimiter[-1:]) + 1),
])
curs.close()
break
elif end >= 0:
if reverse:
end_marker = name[:end + 1]
end_marker = name[:end + len(delimiter)]
else:
marker = name[:end] + chr(ord(delimiter) + 1)
marker = ''.join([
name[:end],
delimiter[:-1],
chr(ord(delimiter[-1:]) + 1),
])
# we want result to be inclusive of delim+1
delim_force_gte = True
dir_name = name[:end + 1]
dir_name = name[:end + len(delimiter)]
if dir_name != orig_marker:
results.append([dir_name, '0', 0, None, ''])
curs.close()

View File

@ -637,9 +637,6 @@ class ContainerController(BaseStorageServer):
path = get_param(req, 'path')
prefix = get_param(req, 'prefix')
delimiter = get_param(req, 'delimiter')
if delimiter and (len(delimiter) > 1 or ord(delimiter) > 254):
# delimiters can be made more flexible later
return HTTPPreconditionFailed(body='Bad delimiter')
marker = get_param(req, 'marker', '')
end_marker = get_param(req, 'end_marker')
limit = constraints.CONTAINER_LISTING_LIMIT

View File

@ -235,6 +235,24 @@ class TestS3ApiBucket(S3ApiBaseBoto3):
resp_prefixes,
[{'Prefix': p} for p in expect_prefixes])
def test_get_bucket_with_multi_char_delimiter(self):
bucket = 'bucket'
put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object',
'dir/subdir/object')
self._prepare_test_get_bucket(bucket, put_objects)
delimiter = '/obj'
expect_objects = ('object', 'object2')
expect_prefixes = ('dir/subdir/obj', 'subdir/obj', 'subdir2/obj')
resp = self.conn.list_objects(Bucket=bucket, Delimiter=delimiter)
self.assertEqual(200, resp['ResponseMetadata']['HTTPStatusCode'])
self.assertEqual(resp['Delimiter'], delimiter)
self._validate_object_listing(resp['Contents'], expect_objects)
resp_prefixes = resp['CommonPrefixes']
self.assertEqual(
resp_prefixes,
[{'Prefix': p} for p in expect_prefixes])
def test_get_bucket_with_encoding_type(self):
bucket = 'bucket'
put_objects = ('object', 'object2')

View File

@ -301,6 +301,25 @@ class TestAccount(Base):
results = [r for r in results if r in expected]
self.assertEqual(expected, results)
def testListMultiCharDelimiter(self):
delimiter = '-&'
containers = ['test', delimiter.join(['test', 'bar']),
delimiter.join(['test', 'foo'])]
for c in containers:
cont = self.env.account.container(c)
self.assertTrue(cont.create())
results = self.env.account.containers(parms={'delimiter': delimiter})
expected = ['test', 'test-&']
results = [r for r in results if r in expected]
self.assertEqual(expected, results)
results = self.env.account.containers(parms={'delimiter': delimiter,
'reverse': 'yes'})
expected.reverse()
results = [r for r in results if r in expected]
self.assertEqual(expected, results)
def testListDelimiterAndPrefix(self):
delimiter = 'a'
containers = ['bar', 'bazar']
@ -668,6 +687,36 @@ class TestContainer(Base):
results = [x.get('name', x.get('subdir')) for x in results]
self.assertEqual(results, ['test-', 'test'])
def testListMultiCharDelimiter(self):
cont = self.env.account.container(Utils.create_name())
self.assertTrue(cont.create())
delimiter = '-&'
files = ['test', delimiter.join(['test', 'bar']),
delimiter.join(['test', 'foo'])]
for f in files:
file_item = cont.file(f)
self.assertTrue(file_item.write_random())
for format_type in [None, 'json', 'xml']:
results = cont.files(parms={'format': format_type})
if isinstance(results[0], dict):
results = [x.get('name', x.get('subdir')) for x in results]
self.assertEqual(results, ['test', 'test-&bar', 'test-&foo'])
results = cont.files(parms={'delimiter': delimiter,
'format': format_type})
if isinstance(results[0], dict):
results = [x.get('name', x.get('subdir')) for x in results]
self.assertEqual(results, ['test', 'test-&'])
results = cont.files(parms={'delimiter': delimiter,
'format': format_type,
'reverse': 'yes'})
if isinstance(results[0], dict):
results = [x.get('name', x.get('subdir')) for x in results]
self.assertEqual(results, ['test-&', 'test'])
def testListDelimiterAndPrefix(self):
cont = self.env.account.container(Utils.create_name())
self.assertTrue(cont.create())

View File

@ -432,12 +432,12 @@ class TestContainerShardingNonUTF8(BaseTestContainerSharding):
expected = expected[:params['limit']]
self.assertEqual(expected, listing)
def check_listing_precondition_fails(**params):
def check_listing_fails(exp_status, **params):
qs = '&'.join(['%s=%s' % param for param in params.items()])
with self.assertRaises(ClientException) as cm:
client.get_container(
self.url, self.token, self.container_name, query_string=qs)
self.assertEqual(412, cm.exception.http_status)
self.assertEqual(exp_status, cm.exception.http_status)
return cm.exception
def do_listing_checks(objects):
@ -469,12 +469,16 @@ class TestContainerShardingNonUTF8(BaseTestContainerSharding):
self.url, self.token, self.container_name,
query_string='delimiter=-')
self.assertEqual([{'subdir': 'obj-'}], listing)
headers, listing = client.get_container(
self.url, self.token, self.container_name,
query_string='delimiter=j-')
self.assertEqual([{'subdir': 'obj-'}], listing)
limit = self.cluster_info['swift']['container_listing_limit']
exc = check_listing_precondition_fails(limit=limit + 1)
exc = check_listing_fails(412, limit=limit + 1)
self.assertIn(b'Maximum limit', exc.http_response_content)
exc = check_listing_precondition_fails(delimiter='ab')
self.assertIn(b'Bad delimiter', exc.http_response_content)
exc = check_listing_fails(400, delimiter='%ff')
self.assertIn(b'not valid UTF-8', exc.http_response_content)
# sanity checks
do_listing_checks(obj_names)

View File

@ -1485,13 +1485,6 @@ class TestAccountController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 406)
def test_GET_delimiter_too_long(self):
req = Request.blank('/sda1/p/a?delimiter=xx',
environ={'REQUEST_METHOD': 'GET',
'HTTP_X_TIMESTAMP': '0'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 412)
def test_GET_prefix_delimiter_plain(self):
req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'})
@ -1655,6 +1648,110 @@ class TestAccountController(unittest.TestCase):
listing.append(node2.firstChild.nodeValue)
self.assertEqual(listing, ['sub.1.0', 'sub.1.1', 'sub.1.2'])
def test_GET_multichar_delimiter(self):
self.maxDiff = None
req = Request.blank('/sda1/p/a', method='PUT', headers={
'x-timestamp': '0'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 201, resp.body)
for i in ('US~~TX~~A', 'US~~TX~~B', 'US~~OK~~A', 'US~~OK~~B',
'US~~OK~Tulsa~~A', 'US~~OK~Tulsa~~B',
'US~~UT~~A', 'US~~UT~~~B'):
req = Request.blank('/sda1/p/a/%s' % i, method='PUT', headers={
'X-Put-Timestamp': '1',
'X-Delete-Timestamp': '0',
'X-Object-Count': '0',
'X-Bytes-Used': '0',
'X-Timestamp': normalize_timestamp(0)})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 201)
req = Request.blank(
'/sda1/p/a?prefix=US~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~OK~Tulsa~~"},
{"subdir": "US~~OK~~"},
{"subdir": "US~~TX~~"},
{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"},
{"subdir": "US~~TX~~"},
{"subdir": "US~~OK~~"},
{"subdir": "US~~OK~Tulsa~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~A"},
{"subdir": "US~~UT~~~"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"subdir": "US~~UT~~~"},
{"name": "US~~UT~~A"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~A"},
{"name": "US~~UT~~~B"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~~B"},
{"name": "US~~UT~~A"}])
req = Request.blank(
'/sda1/p/a?prefix=US~~UT~~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~~B"}])
def test_through_call(self):
inbuf = BytesIO()
errbuf = StringIO()
@ -1779,18 +1876,13 @@ class TestAccountController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 400,
"%d on param %s" % (resp.status_int, param))
# Good UTF8 sequence for delimiter, too long (1 byte delimiters only)
req = Request.blank('/sda1/p/a?delimiter=\xce\xa9',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 412,
"%d on param delimiter" % (resp.status_int))
Request.blank('/sda1/p/a',
headers={'X-Timestamp': normalize_timestamp(1)},
environ={'REQUEST_METHOD': 'PUT'}).get_response(
self.controller)
# Good UTF8 sequence, ignored for limit, doesn't affect other queries
for param in ('limit', 'marker', 'prefix', 'end_marker', 'format'):
for param in ('limit', 'marker', 'prefix', 'end_marker', 'format',
'delimiter'):
req = Request.blank('/sda1/p/a?%s=\xce\xa9' % param,
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)

View File

@ -3028,13 +3028,14 @@ class TestContainerController(unittest.TestCase):
'/sda1/p/a/c', method='PUT', headers=headers, body=body)
self.assertEqual(202, req.get_response(self.controller).status_int)
def do_test(params):
def do_test(params, expected_status):
params['format'] = 'json'
headers = {'X-Backend-Record-Type': 'shard'}
req = Request.blank('/sda1/p/a/c', method='GET',
headers=headers, params=params)
with mock_timestamp_now(ts_now):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, expected_status)
self.assertEqual(resp.content_type, 'text/html')
self.assertNotIn('X-Backend-Record-Type', resp.headers)
self.assertNotIn('X-Backend-Sharding-State', resp.headers)
@ -3042,26 +3043,19 @@ class TestContainerController(unittest.TestCase):
self.assertNotIn('X-Container-Bytes-Used', resp.headers)
self.assertNotIn('X-Timestamp', resp.headers)
self.assertNotIn('X-PUT-Timestamp', resp.headers)
return resp
resp = do_test({'states': 'bad'})
self.assertEqual(resp.status_int, 400)
resp = do_test({'delimiter': 'bad'})
self.assertEqual(resp.status_int, 412)
resp = do_test({'limit': str(constraints.CONTAINER_LISTING_LIMIT + 1)})
self.assertEqual(resp.status_int, 412)
do_test({'states': 'bad'}, 400)
do_test({'limit': str(constraints.CONTAINER_LISTING_LIMIT + 1)}, 412)
with mock.patch('swift.container.server.check_drive',
side_effect=ValueError('sda1 is not mounted')):
resp = do_test({})
self.assertEqual(resp.status_int, 507)
do_test({}, 507)
# delete the container
req = Request.blank('/sda1/p/a/c', method='DELETE',
headers={'X-Timestamp': next(ts_iter).normal})
self.assertEqual(204, req.get_response(self.controller).status_int)
resp = do_test({'states': 'bad'})
self.assertEqual(resp.status_int, 404)
do_test({'states': 'bad'}, 404)
def test_GET_auto_record_type(self):
# make a container
@ -3982,13 +3976,6 @@ class TestContainerController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.body.split(b'\n'), [b'a1', b'a2', b'a3', b''])
def test_GET_delimiter_too_long(self):
req = Request.blank('/sda1/p/a/c?delimiter=xx',
environ={'REQUEST_METHOD': 'GET',
'HTTP_X_TIMESTAMP': '0'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 412)
def test_GET_delimiter(self):
req = Request.blank(
'/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
@ -4014,6 +4001,111 @@ class TestContainerController(unittest.TestCase):
{"subdir": "US-TX-"},
{"subdir": "US-UT-"}])
def test_GET_multichar_delimiter(self):
self.maxDiff = None
req = Request.blank(
'/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'})
resp = req.get_response(self.controller)
for i in ('US~~TX~~A', 'US~~TX~~B', 'US~~OK~~A', 'US~~OK~~B',
'US~~OK~Tulsa~~A', 'US~~OK~Tulsa~~B',
'US~~UT~~A', 'US~~UT~~~B'):
req = Request.blank(
'/sda1/p/a/c/%s' % i,
environ={
'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1',
'HTTP_X_CONTENT_TYPE': 'text/plain', 'HTTP_X_ETAG': 'x',
'HTTP_X_SIZE': 0})
self._update_object_put_headers(req)
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 201)
req = Request.blank(
'/sda1/p/a/c?prefix=US~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~OK~Tulsa~~"},
{"subdir": "US~~OK~~"},
{"subdir": "US~~TX~~"},
{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"},
{"subdir": "US~~TX~~"},
{"subdir": "US~~OK~~"},
{"subdir": "US~~OK~Tulsa~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
json.loads(resp.body),
[{"subdir": "US~~UT~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~A"},
{"subdir": "US~~UT~~~"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"subdir": "US~~UT~~~"},
{"name": "US~~UT~~A"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~A"},
{"name": "US~~UT~~~B"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~~&delimiter=~~&format=json&reverse=on',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~~B"},
{"name": "US~~UT~~A"}])
req = Request.blank(
'/sda1/p/a/c?prefix=US~~UT~~~&delimiter=~~&format=json',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(
[{k: v for k, v in item.items() if k in ('subdir', 'name')}
for item in json.loads(resp.body)],
[{"name": "US~~UT~~~B"}])
def test_GET_delimiter_non_ascii(self):
req = Request.blank(
'/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
@ -4279,18 +4371,12 @@ class TestContainerController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 400,
"%d on param %s" % (resp.status_int, param))
# Good UTF8 sequence for delimiter, too long (1 byte delimiters only)
req = Request.blank('/sda1/p/a/c?delimiter=\xce\xa9',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 412,
"%d on param delimiter" % (resp.status_int))
req = Request.blank('/sda1/p/a/c', method='PUT',
headers={'X-Timestamp': Timestamp(1).internal})
req.get_response(self.controller)
# Good UTF8 sequence, ignored for limit, doesn't affect other queries
for param in ('limit', 'marker', 'path', 'prefix', 'end_marker',
'format'):
'format', 'delimiter'):
req = Request.blank('/sda1/p/a/c?%s=\xce\xa9' % param,
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.controller)