some container serialization cleanup

Have json and xml use common record cleanup code.
Do a somewhat better job of parsing extensions from content-types.
Use a real XML serializer.

Change-Id: I10e14dffd1da590b4fd180b4d33ab5de862e2b55
This commit is contained in:
Michael Barton 2013-07-23 14:54:51 -07:00
parent af9447490d
commit 53345da70e
6 changed files with 135 additions and 76 deletions

View File

@ -1022,6 +1022,11 @@ class Response(object):
self.headers.update(headers) self.headers.update(headers)
for key, value in kw.iteritems(): for key, value in kw.iteritems():
setattr(self, key, value) setattr(self, key, value)
# When specifying both 'content_type' and 'charset' in the kwargs,
# charset needs to be applied *after* content_type, otherwise charset
# can get wiped out when content_type sorts later in dict order.
if 'charset' in kw and 'content_type' in kw:
self.charset = kw['charset']
def _prepare_for_ranges(self, ranges): def _prepare_for_ranges(self, ranges):
""" """

View File

@ -2211,3 +2211,33 @@ def ismount(path):
return True return True
return False return False
_rfc_token = r'[^()<>@,;:\"/\[\]?={}\x00-\x20\x7f]+'
_rfc_extension_pattern = re.compile(
r'(?:\s*;\s*(' + _rfc_token + r")\s*(?:=\s*(" + _rfc_token +
r'|"(?:[^"\\]|\\.)*"))?)')
def parse_content_type(content_type):
"""
Parse a content-type and its parameters into values.
RFC 2616 sec 14.17 and 3.7 are pertinent.
Examples:
'text/plain; charset=UTF-8' -> ('text/plain', [('charset, 'UTF-8')])
'text/plain; charset=UTF-8; level=1' ->
('text/plain', [('charset, 'UTF-8'), ('level', '1')])
:param content_type: content_type to parse
:returns: a typle containing (content type, list of k, v parameter tuples)
"""
parm_list = []
if ';' in content_type:
content_type, parms = content_type.split(';', 1)
parms = ';' + parms
for m in _rfc_extension_pattern.findall(parms):
key = m[0].strip()
value = m[1].strip()
parm_list.append((key, value))
return content_type, parm_list

View File

@ -18,9 +18,9 @@ from __future__ import with_statement
import os import os
import time import time
import traceback import traceback
from xml.sax import saxutils
from datetime import datetime from datetime import datetime
from gettext import gettext as _ from gettext import gettext as _
from xml.etree.cElementTree import Element, SubElement, tostring
from eventlet import Timeout from eventlet import Timeout
@ -30,7 +30,7 @@ from swift.common.request_helpers import get_param
from swift.common.utils import get_logger, hash_path, public, \ from swift.common.utils import get_logger, hash_path, public, \
normalize_timestamp, storage_directory, validate_sync_to, \ normalize_timestamp, storage_directory, validate_sync_to, \
config_true_value, validate_device_partition, json, timing_stats, \ config_true_value, validate_device_partition, json, timing_stats, \
replication replication, parse_content_type
from swift.common.constraints import CONTAINER_LISTING_LIMIT, \ from swift.common.constraints import CONTAINER_LISTING_LIMIT, \
check_mount, check_float, check_utf8, FORMAT2CONTENT_TYPE check_mount, check_float, check_utf8, FORMAT2CONTENT_TYPE
from swift.common.bufferedhttp import http_connect from swift.common.bufferedhttp import http_connect
@ -330,25 +330,37 @@ class ContainerController(object):
headers['Content-Type'] = out_content_type headers['Content-Type'] = out_content_type
return HTTPNoContent(request=req, headers=headers, charset='utf-8') return HTTPNoContent(request=req, headers=headers, charset='utf-8')
def derive_content_type_metadata(self, content_type, size): def update_data_record(self, record):
""" """
Will check the last parameter and if it starts with 'swift_bytes=' will Perform any mutations to container listing records that are common to
strip it off. Returns either the passed in content_type and size all serialization formats, and returns it as a dict.
or the content_type without the swift_bytes param and its value as
the new size. Converts created time to iso timestamp.
:params content_type: Content Type from db Replaces size with 'swift_bytes' content type parameter.
:params size: # bytes from db, an int
:returns: tuple: content_type, size :params record: object entry record
:returns: modified record
""" """
if ';' in content_type: (name, created, size, content_type, etag) = record
new_content_type, param = content_type.rsplit(';', 1) if content_type is None:
if param.lstrip().startswith('swift_bytes='): return {'subdir': name}
key, value = param.split('=') response = {'bytes': size, 'hash': etag, 'name': name}
last_modified = datetime.utcfromtimestamp(float(created)).isoformat()
# python isoformat() doesn't include msecs when zero
if len(last_modified) < len("1970-01-01T00:00:00.000000"):
last_modified += ".000000"
response['last_modified'] = last_modified + 'Z'
content_type, params = parse_content_type(content_type)
for key, value in params:
if key == 'swift_bytes':
try: try:
return new_content_type, int(value) response['bytes'] = int(value)
except ValueError: except ValueError:
self.logger.exception("Invalid swift_bytes") self.logger.exception("Invalid swift_bytes")
return content_type, size else:
content_type += ';%s=%s' % (key, value)
response['content_type'] = content_type
return response
@public @public
@timing_stats() @timing_stats()
@ -398,65 +410,39 @@ class ContainerController(object):
'X-Timestamp': info['created_at'], 'X-Timestamp': info['created_at'],
'X-PUT-Timestamp': info['put_timestamp'], 'X-PUT-Timestamp': info['put_timestamp'],
} }
resp_headers.update( for key, (value, timestamp) in broker.metadata.iteritems():
(key, value) if value and (key.lower() in self.save_headers or
for key, (value, timestamp) in broker.metadata.iteritems() key.lower().startswith('x-container-meta-')):
if value != '' and (key.lower() in self.save_headers or resp_headers[key] = value
key.lower().startswith('x-container-meta-'))) ret = Response(request=req, headers=resp_headers,
content_type=out_content_type, charset='utf-8')
container_list = broker.list_objects_iter(limit, marker, end_marker, container_list = broker.list_objects_iter(limit, marker, end_marker,
prefix, delimiter, path) prefix, delimiter, path)
if out_content_type == 'application/json': if out_content_type == 'application/json':
data = [] ret.body = json.dumps([self.update_data_record(record)
for (name, created_at, size, content_type, etag) in container_list: for record in container_list])
if content_type is None:
data.append({"subdir": name})
else:
created_at = datetime.utcfromtimestamp(
float(created_at)).isoformat()
# python isoformat() doesn't include msecs when zero
if len(created_at) < len("1970-01-01T00:00:00.000000"):
created_at += ".000000"
created_at += 'Z'
content_type, size = self.derive_content_type_metadata(
content_type, size)
data.append({'last_modified': created_at, 'bytes': size,
'content_type': content_type, 'hash': etag,
'name': name})
container_list = json.dumps(data)
elif out_content_type.endswith('/xml'): elif out_content_type.endswith('/xml'):
xml_output = [] doc = Element('container', name=container.decode('utf-8'))
for (name, created_at, size, content_type, etag) in container_list: for obj in container_list:
created_at = datetime.utcfromtimestamp( record = self.update_data_record(obj)
float(created_at)).isoformat() if 'subdir' in record:
# python isoformat() doesn't include msecs when zero name = record['subdir'].decode('utf-8')
if len(created_at) < len("1970-01-01T00:00:00.000000"): sub = SubElement(doc, 'subdir', name=name)
created_at += ".000000" SubElement(sub, 'name').text = name
created_at += 'Z'
if content_type is None:
xml_output.append(
'<subdir name=%s><name>%s</name></subdir>' %
(saxutils.quoteattr(name), saxutils.escape(name)))
else: else:
content_type, size = self.derive_content_type_metadata( obj_element = SubElement(doc, 'object')
content_type, size) for field in ["name", "hash", "bytes", "content_type",
content_type = saxutils.escape(content_type) "last_modified"]:
xml_output.append( SubElement(obj_element, field).text = str(
'<object><name>%s</name><hash>%s</hash>' record.pop(field)).decode('utf-8')
'<bytes>%d</bytes><content_type>%s</content_type>' for field in sorted(record.keys()):
'<last_modified>%s</last_modified></object>' % SubElement(obj_element, field).text = str(
(saxutils.escape(name), etag, size, content_type, record[field]).decode('utf-8')
created_at)) ret.body = tostring(doc, encoding='UTF-8')
container_list = ''.join([
'<?xml version="1.0" encoding="UTF-8"?>\n',
'<container name=%s>' % saxutils.quoteattr(container),
''.join(xml_output), '</container>'])
else: else:
if not container_list: if not container_list:
return HTTPNoContent(request=req, headers=resp_headers) return HTTPNoContent(request=req, headers=resp_headers)
container_list = '\n'.join(r[0] for r in container_list) + '\n' ret.body = '\n'.join(rec[0] for rec in container_list) + '\n'
ret = Response(body=container_list, request=req, headers=resp_headers)
ret.content_type = out_content_type
ret.charset = 'utf-8'
return ret return ret
@public @public

View File

@ -952,6 +952,14 @@ class TestResponse(unittest.TestCase):
resp.charset = 'utf16' resp.charset = 'utf16'
self.assertEquals(resp.charset, 'utf16') self.assertEquals(resp.charset, 'utf16')
def test_charset_content_type(self):
resp = swift.common.swob.Response(
content_type='text/plain', charset='utf-8')
self.assertEquals(resp.charset, 'utf-8')
resp = swift.common.swob.Response(
charset='utf-8', content_type='text/plain')
self.assertEquals(resp.charset, 'utf-8')
def test_etag(self): def test_etag(self):
resp = self._get_response() resp = self._get_response()
resp.etag = 'hi' resp.etag = 'hi'

View File

@ -1529,6 +1529,27 @@ log_name = %(yarr)s'''
finally: finally:
shutil.rmtree(tmpdir) shutil.rmtree(tmpdir)
def test_parse_content_type(self):
self.assertEquals(utils.parse_content_type('text/plain'),
('text/plain', []))
self.assertEquals(utils.parse_content_type('text/plain;charset=utf-8'),
('text/plain', [('charset', 'utf-8')]))
self.assertEquals(
utils.parse_content_type('text/plain;hello="world";charset=utf-8'),
('text/plain', [('hello', '"world"'), ('charset', 'utf-8')]))
self.assertEquals(
utils.parse_content_type('text/plain; hello="world"; a=b'),
('text/plain', [('hello', '"world"'), ('a', 'b')]))
self.assertEquals(
utils.parse_content_type(r'text/plain; x="\""; a=b'),
('text/plain', [('x', r'"\""'), ('a', 'b')]))
self.assertEquals(
utils.parse_content_type(r'text/plain; x; a=b'),
('text/plain', [('x', ''), ('a', 'b')]))
self.assertEquals(
utils.parse_content_type(r'text/plain; x="\""; a'),
('text/plain', [('x', r'"\""'), ('a', '')]))
class TestFileLikeIter(unittest.TestCase): class TestFileLikeIter(unittest.TestCase):

View File

@ -21,6 +21,7 @@ from contextlib import contextmanager
from shutil import rmtree from shutil import rmtree
from StringIO import StringIO from StringIO import StringIO
from tempfile import mkdtemp from tempfile import mkdtemp
from xml.dom import minidom
from eventlet import spawn, Timeout, listen from eventlet import spawn, Timeout, listen
import simplejson import simplejson
@ -890,7 +891,7 @@ class TestContainerController(unittest.TestCase):
'HTTP_X_SIZE': 0}) 'HTTP_X_SIZE': 0})
resp = self.controller.PUT(req) resp = self.controller.PUT(req)
self.assertEquals(resp.status_int, 201) self.assertEquals(resp.status_int, 201)
xml_body = '<?xml version="1.0" encoding="UTF-8"?>\n' \ xml_body = "<?xml version='1.0' encoding='UTF-8'?>\n" \
'<container name="xmlc">' \ '<container name="xmlc">' \
'<object><name>0</name><hash>x</hash><bytes>0</bytes>' \ '<object><name>0</name><hash>x</hash><bytes>0</bytes>' \
'<content_type>text/plain</content_type>' \ '<content_type>text/plain</content_type>' \
@ -905,6 +906,7 @@ class TestContainerController(unittest.TestCase):
'<last_modified>1970-01-01T00:00:01.000000Z' \ '<last_modified>1970-01-01T00:00:01.000000Z' \
'</last_modified></object>' \ '</last_modified></object>' \
'</container>' '</container>'
# tests # tests
req = Request.blank('/sda1/p/a/xmlc?format=xml', req = Request.blank('/sda1/p/a/xmlc?format=xml',
environ={'REQUEST_METHOD': 'GET'}) environ={'REQUEST_METHOD': 'GET'})
@ -961,7 +963,8 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'}) 'HTTP_X_TIMESTAMP': '0'})
resp = self.controller.PUT(req) resp = self.controller.PUT(req)
for i, ctype in enumerate((snowman.encode('utf-8'), 'text/plain; "utf-8"')): for i, ctype in enumerate((snowman.encode('utf-8'),
'text/plain; charset="utf-8"')):
req = Request.blank('/sda1/p/a/c/%s' % i, environ={ req = Request.blank('/sda1/p/a/c/%s' % i, environ={
'REQUEST_METHOD': 'PUT', 'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '1', 'HTTP_X_CONTENT_TYPE': ctype, 'HTTP_X_TIMESTAMP': '1', 'HTTP_X_CONTENT_TYPE': ctype,
@ -971,7 +974,7 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c?format=json', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/sda1/p/a/c?format=json', environ={'REQUEST_METHOD': 'GET'})
resp = self.controller.GET(req) resp = self.controller.GET(req)
result = [x['content_type'] for x in simplejson.loads(resp.body)] result = [x['content_type'] for x in simplejson.loads(resp.body)]
self.assertEquals(result, [u'\u2603', 'text/plain; "utf-8"']) self.assertEquals(result, [u'\u2603', 'text/plain;charset="utf-8"'])
def test_GET_accept_not_valid(self): def test_GET_accept_not_valid(self):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
@ -1071,7 +1074,7 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c?prefix=US-&delimiter=-&format=xml', req = Request.blank('/sda1/p/a/c?prefix=US-&delimiter=-&format=xml',
environ={'REQUEST_METHOD': 'GET'}) environ={'REQUEST_METHOD': 'GET'})
resp = self.controller.GET(req) resp = self.controller.GET(req)
self.assertEquals(resp.body, '<?xml version="1.0" encoding="UTF-8"?>' self.assertEquals(resp.body, "<?xml version='1.0' encoding='UTF-8'?>"
'\n<container name="c"><subdir name="US-OK-"><name>US-OK-</name></subdir>' '\n<container name="c"><subdir name="US-OK-"><name>US-OK-</name></subdir>'
'<subdir name="US-TX-"><name>US-TX-</name></subdir>' '<subdir name="US-TX-"><name>US-TX-</name></subdir>'
'<subdir name="US-UT-"><name>US-UT-</name></subdir></container>') '<subdir name="US-UT-"><name>US-UT-</name></subdir></container>')
@ -1090,11 +1093,17 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c?delimiter=/&format=xml', req = Request.blank('/sda1/p/a/c?delimiter=/&format=xml',
environ={'REQUEST_METHOD': 'GET'}) environ={'REQUEST_METHOD': 'GET'})
resp = self.controller.GET(req) resp = self.controller.GET(req)
self.assertEquals( dom = minidom.parseString(resp.body)
resp.body, self.assert_(len(dom.getElementsByTagName('container')) == 1)
'<?xml version="1.0" encoding="UTF-8"?>\n<container name="c">' container = dom.getElementsByTagName('container')[0]
'<subdir name="&lt;\'sub\' &quot;dir&quot;&gt;/">' self.assert_(len(container.getElementsByTagName('subdir')) == 1)
'<name>&lt;\'sub\' "dir"&gt;/</name></subdir></container>') subdir = container.getElementsByTagName('subdir')[0]
self.assertEquals(unicode(subdir.attributes['name'].value),
u'<\'sub\' "dir">/')
self.assert_(len(subdir.getElementsByTagName('name')) == 1)
name = subdir.getElementsByTagName('name')[0]
self.assertEquals(unicode(name.childNodes[0].data),
u'<\'sub\' "dir">/')
def test_GET_path(self): def test_GET_path(self):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',