diff --git a/swift/common/swob.py b/swift/common/swob.py
index 4713d0deb2..9ec57a026e 100644
--- a/swift/common/swob.py
+++ b/swift/common/swob.py
@@ -1022,6 +1022,11 @@ class Response(object):
self.headers.update(headers)
for key, value in kw.iteritems():
setattr(self, key, value)
+ # When specifying both 'content_type' and 'charset' in the kwargs,
+ # charset needs to be applied *after* content_type, otherwise charset
+ # can get wiped out when content_type sorts later in dict order.
+ if 'charset' in kw and 'content_type' in kw:
+ self.charset = kw['charset']
def _prepare_for_ranges(self, ranges):
"""
diff --git a/swift/common/utils.py b/swift/common/utils.py
index 98c423251d..acd16c2bcc 100644
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@@ -2211,3 +2211,33 @@ def ismount(path):
return True
return False
+
+
+_rfc_token = r'[^()<>@,;:\"/\[\]?={}\x00-\x20\x7f]+'
+_rfc_extension_pattern = re.compile(
+ r'(?:\s*;\s*(' + _rfc_token + r")\s*(?:=\s*(" + _rfc_token +
+ r'|"(?:[^"\\]|\\.)*"))?)')
+
+
+def parse_content_type(content_type):
+ """
+ Parse a content-type and its parameters into values.
+ RFC 2616 sec 14.17 and 3.7 are pertinent.
+ Examples:
+
+ 'text/plain; charset=UTF-8' -> ('text/plain', [('charset, 'UTF-8')])
+ 'text/plain; charset=UTF-8; level=1' ->
+ ('text/plain', [('charset, 'UTF-8'), ('level', '1')])
+
+ :param content_type: content_type to parse
+ :returns: a typle containing (content type, list of k, v parameter tuples)
+ """
+ parm_list = []
+ if ';' in content_type:
+ content_type, parms = content_type.split(';', 1)
+ parms = ';' + parms
+ for m in _rfc_extension_pattern.findall(parms):
+ key = m[0].strip()
+ value = m[1].strip()
+ parm_list.append((key, value))
+ return content_type, parm_list
diff --git a/swift/container/server.py b/swift/container/server.py
index ba5fc5c9a7..e84c0de8d3 100644
--- a/swift/container/server.py
+++ b/swift/container/server.py
@@ -18,9 +18,9 @@ from __future__ import with_statement
import os
import time
import traceback
-from xml.sax import saxutils
from datetime import datetime
from gettext import gettext as _
+from xml.etree.cElementTree import Element, SubElement, tostring
from eventlet import Timeout
@@ -30,7 +30,7 @@ from swift.common.request_helpers import get_param
from swift.common.utils import get_logger, hash_path, public, \
normalize_timestamp, storage_directory, validate_sync_to, \
config_true_value, validate_device_partition, json, timing_stats, \
- replication
+ replication, parse_content_type
from swift.common.constraints import CONTAINER_LISTING_LIMIT, \
check_mount, check_float, check_utf8, FORMAT2CONTENT_TYPE
from swift.common.bufferedhttp import http_connect
@@ -330,25 +330,37 @@ class ContainerController(object):
headers['Content-Type'] = out_content_type
return HTTPNoContent(request=req, headers=headers, charset='utf-8')
- def derive_content_type_metadata(self, content_type, size):
+ def update_data_record(self, record):
"""
- Will check the last parameter and if it starts with 'swift_bytes=' will
- strip it off. Returns either the passed in content_type and size
- or the content_type without the swift_bytes param and its value as
- the new size.
- :params content_type: Content Type from db
- :params size: # bytes from db, an int
- :returns: tuple: content_type, size
+ Perform any mutations to container listing records that are common to
+ all serialization formats, and returns it as a dict.
+
+ Converts created time to iso timestamp.
+ Replaces size with 'swift_bytes' content type parameter.
+
+ :params record: object entry record
+ :returns: modified record
"""
- if ';' in content_type:
- new_content_type, param = content_type.rsplit(';', 1)
- if param.lstrip().startswith('swift_bytes='):
- key, value = param.split('=')
+ (name, created, size, content_type, etag) = record
+ if content_type is None:
+ return {'subdir': name}
+ response = {'bytes': size, 'hash': etag, 'name': name}
+ last_modified = datetime.utcfromtimestamp(float(created)).isoformat()
+ # python isoformat() doesn't include msecs when zero
+ if len(last_modified) < len("1970-01-01T00:00:00.000000"):
+ last_modified += ".000000"
+ response['last_modified'] = last_modified + 'Z'
+ content_type, params = parse_content_type(content_type)
+ for key, value in params:
+ if key == 'swift_bytes':
try:
- return new_content_type, int(value)
+ response['bytes'] = int(value)
except ValueError:
self.logger.exception("Invalid swift_bytes")
- return content_type, size
+ else:
+ content_type += ';%s=%s' % (key, value)
+ response['content_type'] = content_type
+ return response
@public
@timing_stats()
@@ -398,65 +410,39 @@ class ContainerController(object):
'X-Timestamp': info['created_at'],
'X-PUT-Timestamp': info['put_timestamp'],
}
- resp_headers.update(
- (key, value)
- for key, (value, timestamp) in broker.metadata.iteritems()
- if value != '' and (key.lower() in self.save_headers or
- key.lower().startswith('x-container-meta-')))
+ for key, (value, timestamp) in broker.metadata.iteritems():
+ if value and (key.lower() in self.save_headers or
+ key.lower().startswith('x-container-meta-')):
+ resp_headers[key] = value
+ ret = Response(request=req, headers=resp_headers,
+ content_type=out_content_type, charset='utf-8')
container_list = broker.list_objects_iter(limit, marker, end_marker,
prefix, delimiter, path)
if out_content_type == 'application/json':
- data = []
- for (name, created_at, size, content_type, etag) in container_list:
- if content_type is None:
- data.append({"subdir": name})
- else:
- created_at = datetime.utcfromtimestamp(
- float(created_at)).isoformat()
- # python isoformat() doesn't include msecs when zero
- if len(created_at) < len("1970-01-01T00:00:00.000000"):
- created_at += ".000000"
- created_at += 'Z'
- content_type, size = self.derive_content_type_metadata(
- content_type, size)
- data.append({'last_modified': created_at, 'bytes': size,
- 'content_type': content_type, 'hash': etag,
- 'name': name})
- container_list = json.dumps(data)
+ ret.body = json.dumps([self.update_data_record(record)
+ for record in container_list])
elif out_content_type.endswith('/xml'):
- xml_output = []
- for (name, created_at, size, content_type, etag) in container_list:
- created_at = datetime.utcfromtimestamp(
- float(created_at)).isoformat()
- # python isoformat() doesn't include msecs when zero
- if len(created_at) < len("1970-01-01T00:00:00.000000"):
- created_at += ".000000"
- created_at += 'Z'
- if content_type is None:
- xml_output.append(
- '%s' %
- (saxutils.quoteattr(name), saxutils.escape(name)))
+ doc = Element('container', name=container.decode('utf-8'))
+ for obj in container_list:
+ record = self.update_data_record(obj)
+ if 'subdir' in record:
+ name = record['subdir'].decode('utf-8')
+ sub = SubElement(doc, 'subdir', name=name)
+ SubElement(sub, 'name').text = name
else:
- content_type, size = self.derive_content_type_metadata(
- content_type, size)
- content_type = saxutils.escape(content_type)
- xml_output.append(
- '' %
- (saxutils.escape(name), etag, size, content_type,
- created_at))
- container_list = ''.join([
- '\n',
- '' % saxutils.quoteattr(container),
- ''.join(xml_output), ''])
+ obj_element = SubElement(doc, 'object')
+ for field in ["name", "hash", "bytes", "content_type",
+ "last_modified"]:
+ SubElement(obj_element, field).text = str(
+ record.pop(field)).decode('utf-8')
+ for field in sorted(record.keys()):
+ SubElement(obj_element, field).text = str(
+ record[field]).decode('utf-8')
+ ret.body = tostring(doc, encoding='UTF-8')
else:
if not container_list:
return HTTPNoContent(request=req, headers=resp_headers)
- container_list = '\n'.join(r[0] for r in container_list) + '\n'
- ret = Response(body=container_list, request=req, headers=resp_headers)
- ret.content_type = out_content_type
- ret.charset = 'utf-8'
+ ret.body = '\n'.join(rec[0] for rec in container_list) + '\n'
return ret
@public
diff --git a/test/unit/common/test_swob.py b/test/unit/common/test_swob.py
index aebbd5fbaf..e4ff0ff980 100644
--- a/test/unit/common/test_swob.py
+++ b/test/unit/common/test_swob.py
@@ -952,6 +952,14 @@ class TestResponse(unittest.TestCase):
resp.charset = 'utf16'
self.assertEquals(resp.charset, 'utf16')
+ def test_charset_content_type(self):
+ resp = swift.common.swob.Response(
+ content_type='text/plain', charset='utf-8')
+ self.assertEquals(resp.charset, 'utf-8')
+ resp = swift.common.swob.Response(
+ charset='utf-8', content_type='text/plain')
+ self.assertEquals(resp.charset, 'utf-8')
+
def test_etag(self):
resp = self._get_response()
resp.etag = 'hi'
diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py
index 0e9c1286e7..59631496d0 100644
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -1529,6 +1529,27 @@ log_name = %(yarr)s'''
finally:
shutil.rmtree(tmpdir)
+ def test_parse_content_type(self):
+ self.assertEquals(utils.parse_content_type('text/plain'),
+ ('text/plain', []))
+ self.assertEquals(utils.parse_content_type('text/plain;charset=utf-8'),
+ ('text/plain', [('charset', 'utf-8')]))
+ self.assertEquals(
+ utils.parse_content_type('text/plain;hello="world";charset=utf-8'),
+ ('text/plain', [('hello', '"world"'), ('charset', 'utf-8')]))
+ self.assertEquals(
+ utils.parse_content_type('text/plain; hello="world"; a=b'),
+ ('text/plain', [('hello', '"world"'), ('a', 'b')]))
+ self.assertEquals(
+ utils.parse_content_type(r'text/plain; x="\""; a=b'),
+ ('text/plain', [('x', r'"\""'), ('a', 'b')]))
+ self.assertEquals(
+ utils.parse_content_type(r'text/plain; x; a=b'),
+ ('text/plain', [('x', ''), ('a', 'b')]))
+ self.assertEquals(
+ utils.parse_content_type(r'text/plain; x="\""; a'),
+ ('text/plain', [('x', r'"\""'), ('a', '')]))
+
class TestFileLikeIter(unittest.TestCase):
diff --git a/test/unit/container/test_server.py b/test/unit/container/test_server.py
index e70c70e246..6a3b8b1b10 100644
--- a/test/unit/container/test_server.py
+++ b/test/unit/container/test_server.py
@@ -21,6 +21,7 @@ from contextlib import contextmanager
from shutil import rmtree
from StringIO import StringIO
from tempfile import mkdtemp
+from xml.dom import minidom
from eventlet import spawn, Timeout, listen
import simplejson
@@ -890,7 +891,7 @@ class TestContainerController(unittest.TestCase):
'HTTP_X_SIZE': 0})
resp = self.controller.PUT(req)
self.assertEquals(resp.status_int, 201)
- xml_body = '\n' \
+ xml_body = "\n" \
'' \
'' \
''
+
# tests
req = Request.blank('/sda1/p/a/xmlc?format=xml',
environ={'REQUEST_METHOD': 'GET'})
@@ -961,7 +963,8 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'})
resp = self.controller.PUT(req)
- for i, ctype in enumerate((snowman.encode('utf-8'), 'text/plain; "utf-8"')):
+ for i, ctype in enumerate((snowman.encode('utf-8'),
+ 'text/plain; charset="utf-8"')):
req = Request.blank('/sda1/p/a/c/%s' % i, environ={
'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '1', 'HTTP_X_CONTENT_TYPE': ctype,
@@ -971,7 +974,7 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c?format=json', environ={'REQUEST_METHOD': 'GET'})
resp = self.controller.GET(req)
result = [x['content_type'] for x in simplejson.loads(resp.body)]
- self.assertEquals(result, [u'\u2603', 'text/plain; "utf-8"'])
+ self.assertEquals(result, [u'\u2603', 'text/plain;charset="utf-8"'])
def test_GET_accept_not_valid(self):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
@@ -1071,7 +1074,7 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c?prefix=US-&delimiter=-&format=xml',
environ={'REQUEST_METHOD': 'GET'})
resp = self.controller.GET(req)
- self.assertEquals(resp.body, ''
+ self.assertEquals(resp.body, ""
'\nUS-OK-'
'US-TX-'
'US-UT-')
@@ -1090,11 +1093,17 @@ class TestContainerController(unittest.TestCase):
req = Request.blank('/sda1/p/a/c?delimiter=/&format=xml',
environ={'REQUEST_METHOD': 'GET'})
resp = self.controller.GET(req)
- self.assertEquals(
- resp.body,
- '\n'
- ''
- '<\'sub\' "dir">/')
+ dom = minidom.parseString(resp.body)
+ self.assert_(len(dom.getElementsByTagName('container')) == 1)
+ container = dom.getElementsByTagName('container')[0]
+ self.assert_(len(container.getElementsByTagName('subdir')) == 1)
+ subdir = container.getElementsByTagName('subdir')[0]
+ self.assertEquals(unicode(subdir.attributes['name'].value),
+ u'<\'sub\' "dir">/')
+ self.assert_(len(subdir.getElementsByTagName('name')) == 1)
+ name = subdir.getElementsByTagName('name')[0]
+ self.assertEquals(unicode(name.childNodes[0].data),
+ u'<\'sub\' "dir">/')
def test_GET_path(self):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',