diff --git a/swift/common/swob.py b/swift/common/swob.py index 4713d0deb2..9ec57a026e 100644 --- a/swift/common/swob.py +++ b/swift/common/swob.py @@ -1022,6 +1022,11 @@ class Response(object): self.headers.update(headers) for key, value in kw.iteritems(): setattr(self, key, value) + # When specifying both 'content_type' and 'charset' in the kwargs, + # charset needs to be applied *after* content_type, otherwise charset + # can get wiped out when content_type sorts later in dict order. + if 'charset' in kw and 'content_type' in kw: + self.charset = kw['charset'] def _prepare_for_ranges(self, ranges): """ diff --git a/swift/common/utils.py b/swift/common/utils.py index 98c423251d..acd16c2bcc 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -2211,3 +2211,33 @@ def ismount(path): return True return False + + +_rfc_token = r'[^()<>@,;:\"/\[\]?={}\x00-\x20\x7f]+' +_rfc_extension_pattern = re.compile( + r'(?:\s*;\s*(' + _rfc_token + r")\s*(?:=\s*(" + _rfc_token + + r'|"(?:[^"\\]|\\.)*"))?)') + + +def parse_content_type(content_type): + """ + Parse a content-type and its parameters into values. + RFC 2616 sec 14.17 and 3.7 are pertinent. + Examples: + + 'text/plain; charset=UTF-8' -> ('text/plain', [('charset, 'UTF-8')]) + 'text/plain; charset=UTF-8; level=1' -> + ('text/plain', [('charset, 'UTF-8'), ('level', '1')]) + + :param content_type: content_type to parse + :returns: a typle containing (content type, list of k, v parameter tuples) + """ + parm_list = [] + if ';' in content_type: + content_type, parms = content_type.split(';', 1) + parms = ';' + parms + for m in _rfc_extension_pattern.findall(parms): + key = m[0].strip() + value = m[1].strip() + parm_list.append((key, value)) + return content_type, parm_list diff --git a/swift/container/server.py b/swift/container/server.py index ba5fc5c9a7..e84c0de8d3 100644 --- a/swift/container/server.py +++ b/swift/container/server.py @@ -18,9 +18,9 @@ from __future__ import with_statement import os import time import traceback -from xml.sax import saxutils from datetime import datetime from gettext import gettext as _ +from xml.etree.cElementTree import Element, SubElement, tostring from eventlet import Timeout @@ -30,7 +30,7 @@ from swift.common.request_helpers import get_param from swift.common.utils import get_logger, hash_path, public, \ normalize_timestamp, storage_directory, validate_sync_to, \ config_true_value, validate_device_partition, json, timing_stats, \ - replication + replication, parse_content_type from swift.common.constraints import CONTAINER_LISTING_LIMIT, \ check_mount, check_float, check_utf8, FORMAT2CONTENT_TYPE from swift.common.bufferedhttp import http_connect @@ -330,25 +330,37 @@ class ContainerController(object): headers['Content-Type'] = out_content_type return HTTPNoContent(request=req, headers=headers, charset='utf-8') - def derive_content_type_metadata(self, content_type, size): + def update_data_record(self, record): """ - Will check the last parameter and if it starts with 'swift_bytes=' will - strip it off. Returns either the passed in content_type and size - or the content_type without the swift_bytes param and its value as - the new size. - :params content_type: Content Type from db - :params size: # bytes from db, an int - :returns: tuple: content_type, size + Perform any mutations to container listing records that are common to + all serialization formats, and returns it as a dict. + + Converts created time to iso timestamp. + Replaces size with 'swift_bytes' content type parameter. + + :params record: object entry record + :returns: modified record """ - if ';' in content_type: - new_content_type, param = content_type.rsplit(';', 1) - if param.lstrip().startswith('swift_bytes='): - key, value = param.split('=') + (name, created, size, content_type, etag) = record + if content_type is None: + return {'subdir': name} + response = {'bytes': size, 'hash': etag, 'name': name} + last_modified = datetime.utcfromtimestamp(float(created)).isoformat() + # python isoformat() doesn't include msecs when zero + if len(last_modified) < len("1970-01-01T00:00:00.000000"): + last_modified += ".000000" + response['last_modified'] = last_modified + 'Z' + content_type, params = parse_content_type(content_type) + for key, value in params: + if key == 'swift_bytes': try: - return new_content_type, int(value) + response['bytes'] = int(value) except ValueError: self.logger.exception("Invalid swift_bytes") - return content_type, size + else: + content_type += ';%s=%s' % (key, value) + response['content_type'] = content_type + return response @public @timing_stats() @@ -398,65 +410,39 @@ class ContainerController(object): 'X-Timestamp': info['created_at'], 'X-PUT-Timestamp': info['put_timestamp'], } - resp_headers.update( - (key, value) - for key, (value, timestamp) in broker.metadata.iteritems() - if value != '' and (key.lower() in self.save_headers or - key.lower().startswith('x-container-meta-'))) + for key, (value, timestamp) in broker.metadata.iteritems(): + if value and (key.lower() in self.save_headers or + key.lower().startswith('x-container-meta-')): + resp_headers[key] = value + ret = Response(request=req, headers=resp_headers, + content_type=out_content_type, charset='utf-8') container_list = broker.list_objects_iter(limit, marker, end_marker, prefix, delimiter, path) if out_content_type == 'application/json': - data = [] - for (name, created_at, size, content_type, etag) in container_list: - if content_type is None: - data.append({"subdir": name}) - else: - created_at = datetime.utcfromtimestamp( - float(created_at)).isoformat() - # python isoformat() doesn't include msecs when zero - if len(created_at) < len("1970-01-01T00:00:00.000000"): - created_at += ".000000" - created_at += 'Z' - content_type, size = self.derive_content_type_metadata( - content_type, size) - data.append({'last_modified': created_at, 'bytes': size, - 'content_type': content_type, 'hash': etag, - 'name': name}) - container_list = json.dumps(data) + ret.body = json.dumps([self.update_data_record(record) + for record in container_list]) elif out_content_type.endswith('/xml'): - xml_output = [] - for (name, created_at, size, content_type, etag) in container_list: - created_at = datetime.utcfromtimestamp( - float(created_at)).isoformat() - # python isoformat() doesn't include msecs when zero - if len(created_at) < len("1970-01-01T00:00:00.000000"): - created_at += ".000000" - created_at += 'Z' - if content_type is None: - xml_output.append( - '%s' % - (saxutils.quoteattr(name), saxutils.escape(name))) + doc = Element('container', name=container.decode('utf-8')) + for obj in container_list: + record = self.update_data_record(obj) + if 'subdir' in record: + name = record['subdir'].decode('utf-8') + sub = SubElement(doc, 'subdir', name=name) + SubElement(sub, 'name').text = name else: - content_type, size = self.derive_content_type_metadata( - content_type, size) - content_type = saxutils.escape(content_type) - xml_output.append( - '%s%s' - '%d%s' - '%s' % - (saxutils.escape(name), etag, size, content_type, - created_at)) - container_list = ''.join([ - '\n', - '' % saxutils.quoteattr(container), - ''.join(xml_output), '']) + obj_element = SubElement(doc, 'object') + for field in ["name", "hash", "bytes", "content_type", + "last_modified"]: + SubElement(obj_element, field).text = str( + record.pop(field)).decode('utf-8') + for field in sorted(record.keys()): + SubElement(obj_element, field).text = str( + record[field]).decode('utf-8') + ret.body = tostring(doc, encoding='UTF-8') else: if not container_list: return HTTPNoContent(request=req, headers=resp_headers) - container_list = '\n'.join(r[0] for r in container_list) + '\n' - ret = Response(body=container_list, request=req, headers=resp_headers) - ret.content_type = out_content_type - ret.charset = 'utf-8' + ret.body = '\n'.join(rec[0] for rec in container_list) + '\n' return ret @public diff --git a/test/unit/common/test_swob.py b/test/unit/common/test_swob.py index aebbd5fbaf..e4ff0ff980 100644 --- a/test/unit/common/test_swob.py +++ b/test/unit/common/test_swob.py @@ -952,6 +952,14 @@ class TestResponse(unittest.TestCase): resp.charset = 'utf16' self.assertEquals(resp.charset, 'utf16') + def test_charset_content_type(self): + resp = swift.common.swob.Response( + content_type='text/plain', charset='utf-8') + self.assertEquals(resp.charset, 'utf-8') + resp = swift.common.swob.Response( + charset='utf-8', content_type='text/plain') + self.assertEquals(resp.charset, 'utf-8') + def test_etag(self): resp = self._get_response() resp.etag = 'hi' diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 0e9c1286e7..59631496d0 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -1529,6 +1529,27 @@ log_name = %(yarr)s''' finally: shutil.rmtree(tmpdir) + def test_parse_content_type(self): + self.assertEquals(utils.parse_content_type('text/plain'), + ('text/plain', [])) + self.assertEquals(utils.parse_content_type('text/plain;charset=utf-8'), + ('text/plain', [('charset', 'utf-8')])) + self.assertEquals( + utils.parse_content_type('text/plain;hello="world";charset=utf-8'), + ('text/plain', [('hello', '"world"'), ('charset', 'utf-8')])) + self.assertEquals( + utils.parse_content_type('text/plain; hello="world"; a=b'), + ('text/plain', [('hello', '"world"'), ('a', 'b')])) + self.assertEquals( + utils.parse_content_type(r'text/plain; x="\""; a=b'), + ('text/plain', [('x', r'"\""'), ('a', 'b')])) + self.assertEquals( + utils.parse_content_type(r'text/plain; x; a=b'), + ('text/plain', [('x', ''), ('a', 'b')])) + self.assertEquals( + utils.parse_content_type(r'text/plain; x="\""; a'), + ('text/plain', [('x', r'"\""'), ('a', '')])) + class TestFileLikeIter(unittest.TestCase): diff --git a/test/unit/container/test_server.py b/test/unit/container/test_server.py index e70c70e246..6a3b8b1b10 100644 --- a/test/unit/container/test_server.py +++ b/test/unit/container/test_server.py @@ -21,6 +21,7 @@ from contextlib import contextmanager from shutil import rmtree from StringIO import StringIO from tempfile import mkdtemp +from xml.dom import minidom from eventlet import spawn, Timeout, listen import simplejson @@ -890,7 +891,7 @@ class TestContainerController(unittest.TestCase): 'HTTP_X_SIZE': 0}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 201) - xml_body = '\n' \ + xml_body = "\n" \ '' \ '0x0' \ 'text/plain' \ @@ -905,6 +906,7 @@ class TestContainerController(unittest.TestCase): '1970-01-01T00:00:01.000000Z' \ '' \ '' + # tests req = Request.blank('/sda1/p/a/xmlc?format=xml', environ={'REQUEST_METHOD': 'GET'}) @@ -961,7 +963,8 @@ class TestContainerController(unittest.TestCase): req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '0'}) resp = self.controller.PUT(req) - for i, ctype in enumerate((snowman.encode('utf-8'), 'text/plain; "utf-8"')): + for i, ctype in enumerate((snowman.encode('utf-8'), + 'text/plain; charset="utf-8"')): req = Request.blank('/sda1/p/a/c/%s' % i, environ={ 'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1', 'HTTP_X_CONTENT_TYPE': ctype, @@ -971,7 +974,7 @@ class TestContainerController(unittest.TestCase): req = Request.blank('/sda1/p/a/c?format=json', environ={'REQUEST_METHOD': 'GET'}) resp = self.controller.GET(req) result = [x['content_type'] for x in simplejson.loads(resp.body)] - self.assertEquals(result, [u'\u2603', 'text/plain; "utf-8"']) + self.assertEquals(result, [u'\u2603', 'text/plain;charset="utf-8"']) def test_GET_accept_not_valid(self): req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', @@ -1071,7 +1074,7 @@ class TestContainerController(unittest.TestCase): req = Request.blank('/sda1/p/a/c?prefix=US-&delimiter=-&format=xml', environ={'REQUEST_METHOD': 'GET'}) resp = self.controller.GET(req) - self.assertEquals(resp.body, '' + self.assertEquals(resp.body, "" '\nUS-OK-' 'US-TX-' 'US-UT-') @@ -1090,11 +1093,17 @@ class TestContainerController(unittest.TestCase): req = Request.blank('/sda1/p/a/c?delimiter=/&format=xml', environ={'REQUEST_METHOD': 'GET'}) resp = self.controller.GET(req) - self.assertEquals( - resp.body, - '\n' - '' - '<\'sub\' "dir">/') + dom = minidom.parseString(resp.body) + self.assert_(len(dom.getElementsByTagName('container')) == 1) + container = dom.getElementsByTagName('container')[0] + self.assert_(len(container.getElementsByTagName('subdir')) == 1) + subdir = container.getElementsByTagName('subdir')[0] + self.assertEquals(unicode(subdir.attributes['name'].value), + u'<\'sub\' "dir">/') + self.assert_(len(subdir.getElementsByTagName('name')) == 1) + name = subdir.getElementsByTagName('name')[0] + self.assertEquals(unicode(name.childNodes[0].data), + u'<\'sub\' "dir">/') def test_GET_path(self): req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',