Implement "GET Bucket (List Objects) Version 2"

Before this commit, V2 listing parameters ('start-after',
'continuation-token' and 'fetch-owner') were just ignored, making some
S3 clients return errors, or handle paging badly.

V2 listing is selected by passing 'list-type=2' in query string.
'marker' is replaced by either 'start-after' or 'continuation-token'.

This commit wraps 'start-after' and 'continuation-token' in 'marker',
which is passed to swift. 'NextContinuationToken' is a base64 encoding
of the last returned object, so it is opaque to the client.

Change-Id: I23bf83cb8bbaf4c4935bf6b56791051c032c688c
This commit is contained in:
Florent Vennetier (OpenIO) 2017-03-14 14:33:47 +01:00 committed by Tim Burke
parent 397ed3ab6a
commit 69313a69bf
5 changed files with 354 additions and 22 deletions

View File

@ -4,8 +4,17 @@ start =
element ListBucketResult {
element Name { xsd:string },
element Prefix { xsd:string },
element Marker { xsd:string },
element NextMarker { xsd:string }?,
(
(
element Marker { xsd:string },
element NextMarker { xsd:string }?
) | (
element NextContinuationToken { xsd:string }?,
element ContinuationToken { xsd:string }?,
element StartAfter { xsd:string }?,
element KeyCount { xsd:int }
)
),
element MaxKeys { xsd:int },
element EncodingType { xsd:string }?,
element Delimiter { xsd:string }?,

View File

@ -14,9 +14,11 @@
# limitations under the License.
import sys
from base64 import standard_b64encode as b64encode
from base64 import standard_b64decode as b64decode
from swift.common.http import HTTP_OK
from swift.common.utils import json, public
from swift.common.utils import json, public, config_true_value
from swift3.controllers.base import Controller
from swift3.etree import Element, SubElement, tostring, fromstring, \
@ -115,6 +117,19 @@ class BucketController(Controller):
if 'delimiter' in req.params:
query.update({'delimiter': req.params['delimiter']})
# GET Bucket (List Objects) Version 2 parameters
is_v2 = int(req.params.get('list-type', '1')) == 2
fetch_owner = False
if is_v2:
if 'start-after' in req.params:
query.update({'marker': req.params['start-after']})
# continuation-token overrides start-after
if 'continuation-token' in req.params:
decoded = b64decode(req.params['continuation-token'])
query.update({'marker': decoded})
if 'fetch-owner' in req.params:
fetch_owner = config_true_value(req.params['fetch-owner'])
resp = req.get_response(self.app, query=query)
objects = json.loads(resp.body)
@ -122,20 +137,36 @@ class BucketController(Controller):
elem = Element('ListBucketResult')
SubElement(elem, 'Name').text = req.container_name
SubElement(elem, 'Prefix').text = req.params.get('prefix')
SubElement(elem, 'Marker').text = req.params.get('marker')
# in order to judge that truncated is valid, check whether
# max_keys + 1 th element exists in swift.
is_truncated = max_keys > 0 and len(objects) > max_keys
objects = objects[:max_keys]
if is_truncated and 'delimiter' in req.params:
if 'name' in objects[-1]:
SubElement(elem, 'NextMarker').text = \
objects[-1]['name']
if 'subdir' in objects[-1]:
SubElement(elem, 'NextMarker').text = \
objects[-1]['subdir']
if not is_v2:
SubElement(elem, 'Marker').text = req.params.get('marker')
if is_truncated and 'delimiter' in req.params:
if 'name' in objects[-1]:
SubElement(elem, 'NextMarker').text = \
objects[-1]['name']
if 'subdir' in objects[-1]:
SubElement(elem, 'NextMarker').text = \
objects[-1]['subdir']
else:
if is_truncated:
if 'name' in objects[-1]:
SubElement(elem, 'NextContinuationToken').text = \
b64encode(objects[-1]['name'])
if 'subdir' in objects[-1]:
SubElement(elem, 'NextContinuationToken').text = \
b64encode(objects[-1]['subdir'])
if 'continuation-token' in req.params:
SubElement(elem, 'ContinuationToken').text = \
req.params['continuation-token']
if 'start-after' in req.params:
SubElement(elem, 'StartAfter').text = \
req.params['start-after']
SubElement(elem, 'KeyCount').text = str(len(objects))
SubElement(elem, 'MaxKeys').text = str(tag_max_keys)
@ -156,9 +187,10 @@ class BucketController(Controller):
o['last_modified'][:-3] + 'Z'
SubElement(contents, 'ETag').text = '"%s"' % o['hash']
SubElement(contents, 'Size').text = str(o['bytes'])
owner = SubElement(contents, 'Owner')
SubElement(owner, 'ID').text = req.user_id
SubElement(owner, 'DisplayName').text = req.user_id
if fetch_owner or not is_v2:
owner = SubElement(contents, 'Owner')
SubElement(owner, 'ID').text = req.user_id
SubElement(owner, 'DisplayName').text = req.user_id
SubElement(contents, 'StorageClass').text = 'STANDARD'
for o in objects:

View File

@ -9,14 +9,38 @@
<element name="Prefix">
<data type="string"/>
</element>
<element name="Marker">
<data type="string"/>
</element>
<optional>
<element name="NextMarker">
<data type="string"/>
</element>
</optional>
<choice>
<group>
<element name="Marker">
<data type="string"/>
</element>
<optional>
<element name="NextMarker">
<data type="string"/>
</element>
</optional>
</group>
<group>
<optional>
<element name="NextContinuationToken">
<data type="string"/>
</element>
</optional>
<optional>
<element name="ContinuationToken">
<data type="string"/>
</element>
</optional>
<optional>
<element name="StartAfter">
<data type="string"/>
</element>
</optional>
<element name="KeyCount">
<data type="int"/>
</element>
</group>
</choice>
<element name="MaxKeys">
<data type="int"/>
</element>

View File

@ -283,6 +283,121 @@ class TestSwift3Bucket(Swift3FunctionalTestCase):
self.assertTrue(o.find('Owner/DisplayName').text,
self.conn.user_id)
def test_get_bucket_v2_with_start_after(self):
bucket = 'bucket'
put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object',
'dir/subdir/object')
self._prepare_test_get_bucket(bucket, put_objects)
marker = 'object'
query = 'list-type=2&start-after=%s' % marker
expect_objects = ('object2', 'subdir/object', 'subdir2/object')
status, headers, body = \
self.conn.make_request('GET', bucket, query=query)
self.assertEqual(status, 200)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('StartAfter').text, marker)
resp_objects = elem.findall('./Contents')
self.assertEqual(len(list(resp_objects)), len(expect_objects))
for i, o in enumerate(resp_objects):
self.assertEqual(o.find('Key').text, expect_objects[i])
self.assertTrue(o.find('LastModified').text is not None)
self.assertRegexpMatches(
o.find('LastModified').text,
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$')
self.assertTrue(o.find('ETag').text is not None)
self.assertTrue(o.find('Size').text is not None)
self.assertEqual(o.find('StorageClass').text, 'STANDARD')
self.assertIsNone(o.find('Owner/ID'))
self.assertIsNone(o.find('Owner/DisplayName'))
def test_get_bucket_v2_with_fetch_owner(self):
bucket = 'bucket'
put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object',
'dir/subdir/object')
self._prepare_test_get_bucket(bucket, put_objects)
query = 'list-type=2&fetch-owner=true'
expect_objects = ('dir/subdir/object', 'object', 'object2',
'subdir/object', 'subdir2/object')
status, headers, body = \
self.conn.make_request('GET', bucket, query=query)
self.assertEqual(status, 200)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('KeyCount').text, '5')
resp_objects = elem.findall('./Contents')
self.assertEqual(len(list(resp_objects)), len(expect_objects))
for i, o in enumerate(resp_objects):
self.assertEqual(o.find('Key').text, expect_objects[i])
self.assertTrue(o.find('LastModified').text is not None)
self.assertRegexpMatches(
o.find('LastModified').text,
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$')
self.assertTrue(o.find('ETag').text is not None)
self.assertTrue(o.find('Size').text is not None)
self.assertEqual(o.find('StorageClass').text, 'STANDARD')
self.assertTrue(o.find('Owner/ID').text, self.conn.user_id)
self.assertTrue(o.find('Owner/DisplayName').text,
self.conn.user_id)
def test_get_bucket_v2_with_continuation_token(self):
bucket = 'bucket'
put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object',
'dir/subdir/object')
self._prepare_test_get_bucket(bucket, put_objects)
query = 'list-type=2&max-keys=3'
expect_objects = ('dir/subdir/object', 'object', 'object2')
status, headers, body = \
self.conn.make_request('GET', bucket, query=query)
self.assertEqual(status, 200)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('MaxKeys').text, '3')
self.assertEqual(elem.find('KeyCount').text, '3')
self.assertEqual(elem.find('IsTruncated').text, 'true')
next_cont_token_elem = elem.find('NextContinuationToken')
self.assertIsNotNone(next_cont_token_elem)
resp_objects = elem.findall('./Contents')
self.assertEqual(len(list(resp_objects)), len(expect_objects))
for i, o in enumerate(resp_objects):
self.assertEqual(o.find('Key').text, expect_objects[i])
self.assertTrue(o.find('LastModified').text is not None)
self.assertRegexpMatches(
o.find('LastModified').text,
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$')
self.assertTrue(o.find('ETag').text is not None)
self.assertTrue(o.find('Size').text is not None)
self.assertEqual(o.find('StorageClass').text, 'STANDARD')
self.assertIsNone(o.find('Owner/ID'))
self.assertIsNone(o.find('Owner/DisplayName'))
query = 'list-type=2&max-keys=3&continuation-token=%s' % \
next_cont_token_elem.text
expect_objects = ('subdir/object', 'subdir2/object')
status, headers, body = \
self.conn.make_request('GET', bucket, query=query)
self.assertEqual(status, 200)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('MaxKeys').text, '3')
self.assertEqual(elem.find('KeyCount').text, '2')
self.assertEqual(elem.find('IsTruncated').text, 'false')
self.assertIsNone(elem.find('NextContinuationToken'))
cont_token_elem = elem.find('ContinuationToken')
self.assertEqual(cont_token_elem.text, next_cont_token_elem.text)
resp_objects = elem.findall('./Contents')
self.assertEqual(len(list(resp_objects)), len(expect_objects))
for i, o in enumerate(resp_objects):
self.assertEqual(o.find('Key').text, expect_objects[i])
self.assertTrue(o.find('LastModified').text is not None)
self.assertRegexpMatches(
o.find('LastModified').text,
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$')
self.assertTrue(o.find('ETag').text is not None)
self.assertTrue(o.find('Size').text is not None)
self.assertEqual(o.find('StorageClass').text, 'STANDARD')
self.assertIsNone(o.find('Owner/ID'))
self.assertIsNone(o.find('Owner/DisplayName'))
def test_head_bucket_error(self):
self.conn.make_request('PUT', 'bucket')

View File

@ -69,8 +69,20 @@ class TestSwift3Bucket(Swift3TestCase):
{}, None)
self.swift.register('GET', '/v1/AUTH_test/junk', swob.HTTPOk, {},
object_list)
self.swift.register(
'GET',
'/v1/AUTH_test/junk?delimiter=a&format=json&limit=3&marker=viola',
swob.HTTPOk, {}, json.dumps(objects[2:]))
self.swift.register('GET', '/v1/AUTH_test/junk-subdir', swob.HTTPOk,
{}, json.dumps(object_list_subdir))
self.swift.register(
'GET',
'/v1/AUTH_test/subdirs?delimiter=/&format=json&limit=3',
swob.HTTPOk, {}, json.dumps([
{'subdir': 'nothing/'},
{'subdir': 'but/'},
{'subdir': 'subdirs/'},
]))
def setUp(self):
super(TestSwift3Bucket, self).setUp()
@ -183,6 +195,47 @@ class TestSwift3Bucket(Swift3TestCase):
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('./IsTruncated').text, 'true')
req = Request.blank('/subdirs?delimiter=/&max-keys=2',
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('./IsTruncated').text, 'true')
self.assertEqual(elem.find('./NextMarker').text, 'but/')
def test_bucket_GET_v2_is_truncated(self):
bucket_name = 'junk'
req = Request.blank('/%s?list-type=2&max-keys=5' % bucket_name,
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('./KeyCount').text, '5')
self.assertEqual(elem.find('./IsTruncated').text, 'false')
req = Request.blank('/%s?list-type=2&max-keys=4' % bucket_name,
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
elem = fromstring(body, 'ListBucketResult')
self.assertIsNotNone(elem.find('./NextContinuationToken'))
self.assertEqual(elem.find('./KeyCount').text, '4')
self.assertEqual(elem.find('./IsTruncated').text, 'true')
req = Request.blank('/subdirs?list-type=2&delimiter=/&max-keys=2',
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
elem = fromstring(body, 'ListBucketResult')
self.assertIsNotNone(elem.find('./NextContinuationToken'))
self.assertEqual(elem.find('./KeyCount').text, '2')
self.assertEqual(elem.find('./IsTruncated').text, 'true')
def test_bucket_GET_max_keys(self):
bucket_name = 'junk'
@ -259,6 +312,26 @@ class TestSwift3Bucket(Swift3TestCase):
self.assertEqual(args['marker'], 'b')
self.assertEqual(args['prefix'], 'c')
def test_bucket_GET_v2_passthroughs(self):
bucket_name = 'junk'
req = Request.blank(
'/%s?list-type=2&delimiter=a&start-after=b&prefix=c' % bucket_name,
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('./Prefix').text, 'c')
self.assertEqual(elem.find('./StartAfter').text, 'b')
self.assertEqual(elem.find('./Delimiter').text, 'a')
_, path = self.swift.calls[-1]
_, query_string = path.split('?')
args = dict(cgi.parse_qsl(query_string))
self.assertEqual(args['delimiter'], 'a')
# "start-after" is converted to "marker"
self.assertEqual(args['marker'], 'b')
self.assertEqual(args['prefix'], 'c')
def test_bucket_GET_with_nonascii_queries(self):
bucket_name = 'junk'
req = Request.blank(
@ -279,6 +352,26 @@ class TestSwift3Bucket(Swift3TestCase):
self.assertEqual(args['marker'], '\xef\xbc\xa2')
self.assertEqual(args['prefix'], '\xef\xbc\xa3')
def test_bucket_GET_v2_with_nonascii_queries(self):
bucket_name = 'junk'
req = Request.blank(
'/%s?list-type=2&delimiter=\xef\xbc\xa1&start-after=\xef\xbc\xa2&'
'prefix=\xef\xbc\xa3' % bucket_name,
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('./Prefix').text, '\xef\xbc\xa3')
self.assertEqual(elem.find('./StartAfter').text, '\xef\xbc\xa2')
self.assertEqual(elem.find('./Delimiter').text, '\xef\xbc\xa1')
_, path = self.swift.calls[-1]
_, query_string = path.split('?')
args = dict(cgi.parse_qsl(query_string))
self.assertEqual(args['delimiter'], '\xef\xbc\xa1')
self.assertEqual(args['marker'], '\xef\xbc\xa2')
self.assertEqual(args['prefix'], '\xef\xbc\xa3')
def test_bucket_GET_with_delimiter_max_keys(self):
bucket_name = 'junk'
req = Request.blank('/%s?delimiter=a&max-keys=2' % bucket_name,
@ -292,6 +385,33 @@ class TestSwift3Bucket(Swift3TestCase):
self.assertEqual(elem.find('./MaxKeys').text, '2')
self.assertEqual(elem.find('./IsTruncated').text, 'true')
def test_bucket_GET_v2_with_delimiter_max_keys(self):
bucket_name = 'junk'
req = Request.blank(
'/%s?list-type=2&delimiter=a&max-keys=2' % bucket_name,
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
self.assertEqual(status.split()[0], '200')
elem = fromstring(body, 'ListBucketResult')
next_token = elem.find('./NextContinuationToken')
self.assertIsNotNone(next_token)
self.assertEqual(elem.find('./MaxKeys').text, '2')
self.assertEqual(elem.find('./IsTruncated').text, 'true')
req = Request.blank(
'/%s?list-type=2&delimiter=a&max-keys=2&continuation-token=%s' %
(bucket_name, next_token.text),
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
self.assertEqual(status.split()[0], '200')
elem = fromstring(body, 'ListBucketResult')
names = [o.find('./Key').text for o in elem.iterchildren('Contents')]
self.assertEqual(names[0], 'lily')
def test_bucket_GET_subdir_with_delimiter_max_keys(self):
bucket_name = 'junk-subdir'
req = Request.blank('/%s?delimiter=a&max-keys=1' % bucket_name,
@ -305,6 +425,38 @@ class TestSwift3Bucket(Swift3TestCase):
self.assertEqual(elem.find('./MaxKeys').text, '1')
self.assertEqual(elem.find('./IsTruncated').text, 'true')
def test_bucket_GET_v2_fetch_owner(self):
bucket_name = 'junk'
req = Request.blank('/%s?list-type=2' % bucket_name,
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
self.assertEqual(status.split()[0], '200')
elem = fromstring(body, 'ListBucketResult')
name = elem.find('./Name').text
self.assertEqual(name, bucket_name)
objects = elem.iterchildren('Contents')
for o in objects:
self.assertIsNone(o.find('./Owner'))
req = Request.blank('/%s?list-type=2&fetch-owner=true' % bucket_name,
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_swift3(req)
self.assertEqual(status.split()[0], '200')
elem = fromstring(body, 'ListBucketResult')
name = elem.find('./Name').text
self.assertEqual(name, bucket_name)
objects = elem.iterchildren('Contents')
for o in objects:
self.assertIsNotNone(o.find('./Owner'))
@s3acl
def test_bucket_PUT_error(self):
code = self._test_method_error('PUT', '/bucket', swob.HTTPCreated,