From ce569f46517e10f2ce0d27e9ee0a922ad1d84e2f Mon Sep 17 00:00:00 2001 From: Tim Burke <tim.burke@gmail.com> Date: Mon, 18 May 2015 08:05:02 -0700 Subject: [PATCH] Centralize header parsing All response headers are now exposed as unicode objects. Any url-encoding is interpretted as UTF-8; if that causes decoding to fail, the url-encoded form is returned. As a result, deleting DLOs with unicode characters will no longer raise UnicodeEncodeErrors under Python 2. Related-Bug: #1431866 Change-Id: Idb111c5bf3ac1f5ccfa724b3f4ede8f37d5bfac4 --- swiftclient/client.py | 70 ++++++++++++++++++++++------------ swiftclient/service.py | 8 ++-- tests/unit/test_swiftclient.py | 37 ++++++++++++++++++ tests/unit/utils.py | 4 +- 4 files changed, 88 insertions(+), 31 deletions(-) diff --git a/swiftclient/client.py b/swiftclient/client.py index 8466cc5b..c0af72a2 100644 --- a/swiftclient/client.py +++ b/swiftclient/client.py @@ -24,7 +24,7 @@ import warnings from distutils.version import StrictVersion from requests.exceptions import RequestException, SSLError from six.moves import http_client -from six.moves.urllib.parse import quote as _quote +from six.moves.urllib.parse import quote as _quote, unquote from six.moves.urllib.parse import urlparse, urlunparse from time import sleep, time import six @@ -103,6 +103,36 @@ def http_log(args, kwargs, resp, body): log_method("RESP BODY: %s", body) +def parse_header_string(data): + if six.PY2: + if isinstance(data, six.text_type): + # Under Python2 requests only returns binary_type, but if we get + # some stray text_type input, this should prevent unquote from + # interpretting %-encoded data as raw code-points. + data = data.encode('utf8') + try: + unquoted = unquote(data).decode('utf8') + except UnicodeDecodeError: + try: + return data.decode('utf8') + except UnicodeDecodeError: + return quote(data).decode('utf8') + else: + if isinstance(data, six.binary_type): + # Under Python3 requests only returns text_type and tosses (!) the + # rest of the headers. If that ever changes, this should be a sane + # approach. + try: + data = data.decode('ascii') + except UnicodeDecodeError: + data = quote(data) + try: + unquoted = unquote(data, errors='strict') + except UnicodeDecodeError: + return data + return unquoted + + def quote(value, safe='/'): """ Patched version of urllib.quote that encodes utf8 strings before quoting. @@ -472,6 +502,14 @@ def get_auth(auth_url, user, key, **kwargs): return storage_url, token +def resp_header_dict(resp): + resp_headers = {} + for header, value in resp.getheaders(): + header = parse_header_string(header).lower() + resp_headers[header] = parse_header_string(value) + return resp_headers + + def store_response(resp, response_dict): """ store information about an operation into a dict @@ -482,13 +520,9 @@ def store_response(resp, response_dict): status, reason and a dict of lower-cased headers """ if response_dict is not None: - resp_headers = {} - for header, value in resp.getheaders(): - resp_headers[header.lower()] = value - response_dict['status'] = resp.status response_dict['reason'] = resp.reason - response_dict['headers'] = resp_headers + response_dict['headers'] = resp_header_dict(resp) def get_account(url, token, marker=None, limit=None, prefix=None, @@ -545,9 +579,7 @@ def get_account(url, token, marker=None, limit=None, prefix=None, body = resp.read() http_log(("%s?%s" % (url, qs), method,), {'headers': headers}, resp, body) - resp_headers = {} - for header, value in resp.getheaders(): - resp_headers[header.lower()] = value + resp_headers = resp_header_dict(resp) if resp.status < 200 or resp.status >= 300: raise ClientException('Account GET failed', http_scheme=parsed.scheme, http_host=conn.host, http_path=parsed.path, @@ -589,9 +621,7 @@ def head_account(url, token, http_conn=None, service_token=None): http_host=conn.host, http_path=parsed.path, http_status=resp.status, http_reason=resp.reason, http_response_content=body) - resp_headers = {} - for header, value in resp.getheaders(): - resp_headers[header.lower()] = value + resp_headers = resp_header_dict(resp) return resp_headers @@ -712,9 +742,7 @@ def get_container(url, token, container, marker=None, limit=None, http_path=cont_path, http_query=qs, http_status=resp.status, http_reason=resp.reason, http_response_content=body) - resp_headers = {} - for header, value in resp.getheaders(): - resp_headers[header.lower()] = value + resp_headers = resp_header_dict(resp) if resp.status == 204: return resp_headers, [] return resp_headers, parse_api_response(resp_headers, body) @@ -758,9 +786,7 @@ def head_container(url, token, container, http_conn=None, headers=None, http_path=path, http_status=resp.status, http_reason=resp.reason, http_response_content=body) - resp_headers = {} - for header, value in resp.getheaders(): - resp_headers[header.lower()] = value + resp_headers = resp_header_dict(resp) return resp_headers @@ -992,9 +1018,7 @@ def head_object(url, token, container, name, http_conn=None, http_host=conn.host, http_path=path, http_status=resp.status, http_reason=resp.reason, http_response_content=body) - resp_headers = {} - for header, value in resp.getheaders(): - resp_headers[header.lower()] = value + resp_headers = resp_header_dict(resp) return resp_headers @@ -1224,9 +1248,7 @@ def get_capabilities(http_conn): http_host=conn.host, http_path=parsed.path, http_status=resp.status, http_reason=resp.reason, http_response_content=body) - resp_headers = {} - for header, value in resp.getheaders(): - resp_headers[header.lower()] = value + resp_headers = resp_header_dict(resp) return parse_api_response(resp_headers, body) diff --git a/swiftclient/service.py b/swiftclient/service.py index 4f331c4e..129ca2f9 100644 --- a/swiftclient/service.py +++ b/swiftclient/service.py @@ -27,7 +27,7 @@ from threading import Thread from six import StringIO, text_type from six.moves.queue import Queue from six.moves.queue import Empty as QueueEmpty -from six.moves.urllib.parse import quote, unquote +from six.moves.urllib.parse import quote from six import Iterator, string_types import json @@ -1861,8 +1861,7 @@ class SwiftService(object): delobjsmap = {} if old_manifest: scontainer, sprefix = old_manifest.split('/', 1) - scontainer = unquote(scontainer) - sprefix = unquote(sprefix).rstrip('/') + '/' + sprefix = sprefix.rstrip('/') + '/' delobjsmap[scontainer] = [] for part in self.list(scontainer, {'prefix': sprefix}): if not part["success"]: @@ -2054,8 +2053,7 @@ class SwiftService(object): dlo_segments_deleted = True segment_pool = self.thread_manager.segment_pool s_container, s_prefix = old_manifest.split('/', 1) - s_container = unquote(s_container) - s_prefix = unquote(s_prefix).rstrip('/') + '/' + s_prefix = s_prefix.rstrip('/') + '/' del_segs = [] for part in self.list( diff --git a/tests/unit/test_swiftclient.py b/tests/unit/test_swiftclient.py index 23b31388..28556295 100644 --- a/tests/unit/test_swiftclient.py +++ b/tests/unit/test_swiftclient.py @@ -130,6 +130,31 @@ class TestHttpHelpers(MockHttpTest): value = u'unicode:\xe9\u20ac' self.assertEqual('unicode%3A%C3%A9%E2%82%AC', c.quote(value)) + def test_parse_header_string(self): + value = b'bytes' + self.assertEqual(u'bytes', c.parse_header_string(value)) + value = u'unicode:\xe9\u20ac' + self.assertEqual(u'unicode:\xe9\u20ac', c.parse_header_string(value)) + value = 'native%20string' + self.assertEqual(u'native string', c.parse_header_string(value)) + + value = b'encoded%20bytes%E2%82%AC' + self.assertEqual(u'encoded bytes\u20ac', c.parse_header_string(value)) + value = 'encoded%20unicode%E2%82%AC' + self.assertEqual(u'encoded unicode\u20ac', + c.parse_header_string(value)) + + value = b'bad%20bytes%ff%E2%82%AC' + self.assertEqual(u'bad%20bytes%ff%E2%82%AC', + c.parse_header_string(value)) + value = u'bad%20unicode%ff\u20ac' + self.assertEqual(u'bad%20unicode%ff\u20ac', + c.parse_header_string(value)) + + value = b'really%20bad\xffbytes' + self.assertEqual(u'really%2520bad%FFbytes', + c.parse_header_string(value)) + def test_http_connection(self): url = 'http://www.test.com' _junk, conn = c.http_connection(url) @@ -686,6 +711,18 @@ class TestGetObject(MockHttpTest): }), ]) + def test_response_headers(self): + c.http_connection = self.fake_http_connection( + 200, headers={'X-Utf-8-Header': b't%c3%a9st', + 'X-Non-Utf-8-Header': b'%ff', + 'X-Binary-Header': b'\xff'}) + conn = c.http_connection('http://www.test.com') + headers, data = c.get_object('url_is_irrelevant', 'TOKEN', + 'container', 'object', http_conn=conn) + self.assertEqual(u't\xe9st', headers.get('x-utf-8-header', '')) + self.assertEqual(u'%ff', headers.get('x-non-utf-8-header', '')) + self.assertEqual(u'%FF', headers.get('x-binary-header', '')) + def test_chunk_size_read_method(self): conn = c.Connection('http://auth.url/', 'some_user', 'some_key') with mock.patch('swiftclient.client.get_auth_1_0') as mock_get_auth: diff --git a/tests/unit/utils.py b/tests/unit/utils.py index ac9aefdb..a9759eb3 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -124,7 +124,7 @@ def fake_http_connect(*code_iter, **kwargs): def getheaders(self): if self.headers: return self.headers.items() - headers = {'content-length': len(self.body), + headers = {'content-length': str(len(self.body)), 'content-type': 'x-application/test', 'x-timestamp': self.timestamp, 'last-modified': self.timestamp, @@ -132,7 +132,7 @@ def fake_http_connect(*code_iter, **kwargs): 'etag': self.etag or '"%s"' % EMPTY_ETAG, 'x-works': 'yes', - 'x-account-container-count': 12345} + 'x-account-container-count': '12345'} if not self.timestamp: del headers['x-timestamp'] try: