some container serialization cleanup

Have json and xml use common record cleanup code.
Do a somewhat better job of parsing extensions from content-types.
Use a real XML serializer.

Change-Id: I10e14dffd1da590b4fd180b4d33ab5de862e2b55
This commit is contained in:
Michael Barton 2013-07-23 14:54:51 -07:00
parent af9447490d
commit 53345da70e
6 changed files with 135 additions and 76 deletions

View File

@ -1022,6 +1022,11 @@ class Response(object):
self.headers.update(headers)
for key, value in kw.iteritems():
setattr(self, key, value)
# When specifying both 'content_type' and 'charset' in the kwargs,
# charset needs to be applied *after* content_type, otherwise charset
# can get wiped out when content_type sorts later in dict order.
if 'charset' in kw and 'content_type' in kw:
self.charset = kw['charset']
def _prepare_for_ranges(self, ranges):
"""

View File

@ -2211,3 +2211,33 @@ def ismount(path):
return True
return False
_rfc_token = r'[^()<>@,;:\"/\[\]?={}\x00-\x20\x7f]+'
_rfc_extension_pattern = re.compile(
r'(?:\s*;\s*(' + _rfc_token + r")\s*(?:=\s*(" + _rfc_token +
r'|"(?:[^"\\]|\\.)*"))?)')
def parse_content_type(content_type):
"""
Parse a content-type and its parameters into values.
RFC 2616 sec 14.17 and 3.7 are pertinent.
Examples:
'text/plain; charset=UTF-8' -> ('text/plain', [('charset, 'UTF-8')])
'text/plain; charset=UTF-8; level=1' ->
('text/plain', [('charset, 'UTF-8'), ('level', '1')])
:param content_type: content_type to parse
:returns: a typle containing (content type, list of k, v parameter tuples)
"""
parm_list = []
if ';' in content_type:
content_type, parms = content_type.split(';', 1)
parms = ';' + parms
for m in _rfc_extension_pattern.findall(parms):
key = m[0].strip()
value = m[1].strip()
parm_list.append((key, value))
return content_type, parm_list

View File

@ -18,9 +18,9 @@ from __future__ import with_statement
import os
import time
import traceback
from xml.sax import saxutils
from datetime import datetime
from gettext import gettext as _
from xml.etree.cElementTree import Element, SubElement, tostring
from eventlet import Timeout
@ -30,7 +30,7 @@ from swift.common.request_helpers import get_param
from swift.common.utils import get_logger, hash_path, public, \
normalize_timestamp, storage_directory, validate_sync_to, \
config_true_value, validate_device_partition, json, timing_stats, \
replication
replication, parse_content_type
from swift.common.constraints import CONTAINER_LISTING_LIMIT, \
check_mount, check_float, check_utf8, FORMAT2CONTENT_TYPE
from swift.common.bufferedhttp import http_connect
@ -330,25 +330,37 @@ class ContainerController(object):
headers['Content-Type'] = out_content_type
return HTTPNoContent(request=req, headers=headers, charset='utf-8')
def derive_content_type_metadata(self, content_type, size):
def update_data_record(self, record):
"""
Will check the last parameter and if it starts with 'swift_bytes=' will
strip it off. Returns either the passed in content_type and size
or the content_type without the swift_bytes param and its value as
the new size.
:params content_type: Content Type from db
:params size: # bytes from db, an int
:returns: tuple: content_type, size
Perform any mutations to container listing records that are common to
all serialization formats, and returns it as a dict.
Converts created time to iso timestamp.
Replaces size with 'swift_bytes' content type parameter.
:params record: object entry record
:returns: modified record
"""
if ';' in content_type:
new_content_type, param = content_type.rsplit(';', 1)
if param.lstrip().startswith('swift_bytes='):
key, value = param.split('=')
(name, created, size, content_type, etag) = record
if content_type is None:
return {'subdir': name}
response = {'bytes': size, 'hash': etag, 'name': name}
last_modified = datetime.utcfromtimestamp(float(created)).isoformat()
# python isoformat() doesn't include msecs when zero
if len(last_modified) < len("1970-01-01T00:00:00.000000"):
last_modified += ".000000"
response['last_modified'] = last_modified + 'Z'
content_type, params = parse_content_type(content_type)
for key, value in params:
if key == 'swift_bytes':
try:
return new_content_type, int(value)
response['bytes'] = int(value)
except ValueError:
self.logger.exception("Invalid swift_bytes")
return content_type, size
else:
content_type += ';%s=%s' % (key, value)
response['content_type'] = content_type
return response
@public
@timing_stats()
@ -398,65 +410,39 @@ class ContainerController(object):
'X-Timestamp': info['created_at'],
'X-PUT-Timestamp': info['put_timestamp'],
}
resp_headers.update(
(key, value)
for key, (value, timestamp) in broker.metadata.iteritems()
if value != '' and (key.lower() in self.save_headers or
key.lower().startswith('x-container-meta-')))
for key, (value, timestamp) in broker.metadata.iteritems():
if value and (key.lower() in self.save_headers or
key.lower().startswith('x-container-meta-')):
resp_headers[key] = value
ret = Response(request=req, headers=resp_headers,
content_type=out_content_type, charset='utf-8')
container_list = broker.list_objects_iter(limit, marker, end_marker,
prefix, delimiter, path)
if out_content_type == 'application/json':
data = []
for (name, created_at, size, content_type, etag) in container_list:
if content_type is None:
data.append({"subdir": name})
else:
created_at = datetime.utcfromtimestamp(
float(created_at)).isoformat()
# python isoformat() doesn't include msecs when zero
if len(created_at) < len("1970-01-01T00:00:00.000000"):
created_at += ".000000"
created_at += 'Z'
content_type, size = self.derive_content_type_metadata(
content_type, size)
data.append({'last_modified': created_at, 'bytes': size,
'content_type': content_type, 'hash': etag,
'name': name})
container_list = json.dumps(data)
ret.body = json.dumps([self.update_data_record(record)
for record in container_list])
elif out_content_type.endswith('/xml'):
xml_output = []
for (name, created_at, size, content_type, etag) in container_list:
created_at = datetime.utcfromtimestamp(
float(created_at)).isoformat()
# python isoformat() doesn't include msecs when zero
if len(created_at) < len("1970-01-01T00:00:00.000000"):
created_at += ".000000"
created_at += 'Z'
if content_type is None:
xml_output.append(
'<subdir name=%s><name>%s</name></subdir>' %
(saxutils.quoteattr(name), saxutils.escape(name)))
doc = Element('container', name=container.decode('utf-8'))
for obj in container_list:
record = self.update_data_record(obj)
if 'subdir' in record:
name = record['subdir'].decode('utf-8')
sub = SubElement(doc, 'subdir', name=name)
SubElement(sub, 'name').text = name
else:
content_type, size = self.derive_content_type_metadata(
content_type, size)
content_type = saxutils.escape(content_type)
xml_output.append(
'<object><name>%s</name><hash>%s</hash>'
'<bytes>%d</bytes><content_type>%s</content_type>'
'<last_modified>%s</last_modified></object>' %
(saxutils.escape(name), etag, size, content_type,
created_at))
container_list = ''.join([
'<?xml version="1.0" encoding="UTF-8"?>\n',
'<container name=%s>' % saxutils.quoteattr(container),
''.join(xml_output), '</container>'])
obj_element = SubElement(doc, 'object')
for field in ["name", "hash", "bytes", "content_type",
"last_modified"]:
SubElement(obj_element, field).text = str(
record.pop(field)).decode('utf-8')
for field in sorted(record.keys()):
SubElement(obj_element, field).text = str(
record[field]).decode('utf-8')
ret.body = tostring(doc, encoding='UTF-8')
else:
if not container_list:
return HTTPNoContent(request=req, headers=resp_headers)
container_list = '\n'.join(r[0] for r in container_list) + '\n'
ret = Response(body=container_list, request=req, headers=resp_headers)
ret.content_type = out_content_type
ret.charset = 'utf-8'
ret.body = '\n'.join(rec[0] for rec in container_list) + '\n'
return ret
@public

View File

@ -952,6 +952,14 @@ class TestResponse(unittest.TestCase):
resp.charset = 'utf16'
self.assertEquals(resp.charset, 'utf16')
def test_charset_content_type(self):
resp = swift.common.swob.Response(
content_type='text/plain', charset='utf-8')
self.assertEquals(resp.charset, 'utf-8')
resp = swift.common.swob.Response(
charset='utf-8', content_type='text/plain')
self.assertEquals(resp.charset, 'utf-8')
def test_etag(self):
resp = self._get_response()
resp.etag = 'hi'

View File

@ -1529,6 +1529,27 @@ log_name = %(yarr)s'''
finally:
shutil.rmtree(tmpdir)
def test_parse_content_type(self):
self.assertEquals(utils.parse_content_type('text/plain'),
('text/plain', []))
self.assertEquals(utils.parse_content_type('text/plain;charset=utf-8'),
('text/plain', [('charset', 'utf-8')]))
self.assertEquals(
utils.parse_content_type('text/plain;hello="world";charset=utf-8'),
('text/plain', [('hello', '"world"'), ('charset', 'utf-8')]))
self.assertEquals(
utils.parse_content_type('text/plain; hello="world"; a=b'),
('text/plain', [('hello', '"world"'), ('a', 'b')]))
self.assertEquals(
utils.parse_content_type(r'text/plain; x="\""; a=b'),
('text/plain', [('x', r'"\""'), ('a', 'b')]))
self.assertEquals(
utils.parse_content_type(r'text/plain; x; a=b'),
('text/plain', [('x', ''), ('a', 'b')]))
self.assertEquals(
utils.parse_content_type(r'text/plain; x="\""; a'),
('text/plain', [('x', r'"\""'), ('a', '')]))
class TestFileLikeIter(unittest.TestCase):

View File

@ -21,6 +21,7 @@ from contextlib import contextmanager
from shutil import rmtree
from StringIO import StringIO
from tempfile import mkdtemp
from xml.dom import minidom
from eventlet import spawn, Timeout, listen
import simplejson
@ -890,7 +891,7 @@ class TestContainerController(unittest.TestCase):
'HTTP_X_SIZE': 0})
resp = self.controller.PUT(req)
self.assertEquals(resp.status_int, 201)
xml_body = '<?xml version="1.0" encoding="UTF-8"?>\n' \
xml_body = "<?xml version='1.0' encoding='UTF-8'?>\n" \
'<container name="xmlc">' \
'<object><name>0</name><hash>x</hash><bytes>0</bytes>' \
'<content_type>text/plain</content_type>' \
@ -905,6 +906,7 @@ class TestContainerController(unittest.TestCase):
'<last_modified>1970-01-01T00:00:01.000000Z' \
'</last_modified></object>' \
'</container>'
# tests
req = Request.blank('/sda1/p/a/xmlc?format=xml',
environ={'REQUEST_METHOD': 'GET'})
@ -961,7 +963,8 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'})
resp = self.controller.PUT(req)
for i, ctype in enumerate((snowman.encode('utf-8'), 'text/plain; "utf-8"')):
for i, ctype in enumerate((snowman.encode('utf-8'),
'text/plain; charset="utf-8"')):
req = Request.blank('/sda1/p/a/c/%s' % i, environ={
'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '1', 'HTTP_X_CONTENT_TYPE': ctype,
@ -971,7 +974,7 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c?format=json', environ={'REQUEST_METHOD': 'GET'})
resp = self.controller.GET(req)
result = [x['content_type'] for x in simplejson.loads(resp.body)]
self.assertEquals(result, [u'\u2603', 'text/plain; "utf-8"'])
self.assertEquals(result, [u'\u2603', 'text/plain;charset="utf-8"'])
def test_GET_accept_not_valid(self):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
@ -1071,7 +1074,7 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c?prefix=US-&delimiter=-&format=xml',
environ={'REQUEST_METHOD': 'GET'})
resp = self.controller.GET(req)
self.assertEquals(resp.body, '<?xml version="1.0" encoding="UTF-8"?>'
self.assertEquals(resp.body, "<?xml version='1.0' encoding='UTF-8'?>"
'\n<container name="c"><subdir name="US-OK-"><name>US-OK-</name></subdir>'
'<subdir name="US-TX-"><name>US-TX-</name></subdir>'
'<subdir name="US-UT-"><name>US-UT-</name></subdir></container>')
@ -1090,11 +1093,17 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c?delimiter=/&format=xml',
environ={'REQUEST_METHOD': 'GET'})
resp = self.controller.GET(req)
self.assertEquals(
resp.body,
'<?xml version="1.0" encoding="UTF-8"?>\n<container name="c">'
'<subdir name="&lt;\'sub\' &quot;dir&quot;&gt;/">'
'<name>&lt;\'sub\' "dir"&gt;/</name></subdir></container>')
dom = minidom.parseString(resp.body)
self.assert_(len(dom.getElementsByTagName('container')) == 1)
container = dom.getElementsByTagName('container')[0]
self.assert_(len(container.getElementsByTagName('subdir')) == 1)
subdir = container.getElementsByTagName('subdir')[0]
self.assertEquals(unicode(subdir.attributes['name'].value),
u'<\'sub\' "dir">/')
self.assert_(len(subdir.getElementsByTagName('name')) == 1)
name = subdir.getElementsByTagName('name')[0]
self.assertEquals(unicode(name.childNodes[0].data),
u'<\'sub\' "dir">/')
def test_GET_path(self):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',