Centralize header parsing

All response headers are now exposed as unicode objects. Any url-encoding is interpretted as UTF-8; if that causes decoding to fail, the url-encoded form is returned. As a result, deleting DLOs with unicode characters will no longer raise UnicodeEncodeErrors under Python 2. Related-Bug: #1431866 Change-Id: Idb111c5bf3ac1f5ccfa724b3f4ede8f37d5bfac4
2015-05-18 08:05:02 -07:00 · 2015-05-18 08:05:02 -07:00 · ce569f4651
commit ce569f4651
parent ff073ab34a
4 changed files with 88 additions and 31 deletions
--- a/swiftclient/client.py
+++ b/swiftclient/client.py
@ -24,7 +24,7 @@ import warnings
 from distutils.version import StrictVersion
 from requests.exceptions import RequestException, SSLError
 from six.moves import http_client
-from six.moves.urllib.parse import quote as _quote
+from six.moves.urllib.parse import quote as _quote, unquote
 from six.moves.urllib.parse import urlparse, urlunparse
 from time import sleep, time
 import six
@ -103,6 +103,36 @@ def http_log(args, kwargs, resp, body):
        log_method("RESP BODY: %s", body)
 def parse_header_string(data):
    if six.PY2:
        if isinstance(data, six.text_type):
            # Under Python2 requests only returns binary_type, but if we get
            # some stray text_type input, this should prevent unquote from
            # interpretting %-encoded data as raw code-points.
            data = data.encode('utf8')
        try:
            unquoted = unquote(data).decode('utf8')
        except UnicodeDecodeError:
            try:
                return data.decode('utf8')
            except UnicodeDecodeError:
                return quote(data).decode('utf8')
    else:
        if isinstance(data, six.binary_type):
            # Under Python3 requests only returns text_type and tosses (!) the
            # rest of the headers. If that ever changes, this should be a sane
            # approach.
            try:
                data = data.decode('ascii')
            except UnicodeDecodeError:
                data = quote(data)
        try:
            unquoted = unquote(data, errors='strict')
        except UnicodeDecodeError:
            return data
    return unquoted
 def quote(value, safe='/'):
    """
    Patched version of urllib.quote that encodes utf8 strings before quoting.
@ -472,6 +502,14 @@ def get_auth(auth_url, user, key, **kwargs):
        return storage_url, token
 def resp_header_dict(resp):
    resp_headers = {}
    for header, value in resp.getheaders():
        header = parse_header_string(header).lower()
        resp_headers[header] = parse_header_string(value)
    return resp_headers
 def store_response(resp, response_dict):
    """
    store information about an operation into a dict
@ -482,13 +520,9 @@ def store_response(resp, response_dict):
       status, reason and a dict of lower-cased headers
    """
    if response_dict is not None:
        resp_headers = {}
        for header, value in resp.getheaders():
            resp_headers[header.lower()] = value
        response_dict['status'] = resp.status
        response_dict['reason'] = resp.reason
-        response_dict['headers'] = resp_headers
+        response_dict['headers'] = resp_header_dict(resp)
 def get_account(url, token, marker=None, limit=None, prefix=None,
@ -545,9 +579,7 @@ def get_account(url, token, marker=None, limit=None, prefix=None,
    body = resp.read()
    http_log(("%s?%s" % (url, qs), method,), {'headers': headers}, resp, body)
-    resp_headers = {}
+    resp_headers = resp_header_dict(resp)
    for header, value in resp.getheaders():
        resp_headers[header.lower()] = value
    if resp.status < 200 or resp.status >= 300:
        raise ClientException('Account GET failed', http_scheme=parsed.scheme,
                              http_host=conn.host, http_path=parsed.path,
@ -589,9 +621,7 @@ def head_account(url, token, http_conn=None, service_token=None):
                              http_host=conn.host, http_path=parsed.path,
                              http_status=resp.status, http_reason=resp.reason,
                              http_response_content=body)
-    resp_headers = {}
+    resp_headers = resp_header_dict(resp)
    for header, value in resp.getheaders():
        resp_headers[header.lower()] = value
    return resp_headers
@ -712,9 +742,7 @@ def get_container(url, token, container, marker=None, limit=None,
                              http_path=cont_path, http_query=qs,
                              http_status=resp.status, http_reason=resp.reason,
                              http_response_content=body)
-    resp_headers = {}
+    resp_headers = resp_header_dict(resp)
    for header, value in resp.getheaders():
        resp_headers[header.lower()] = value
    if resp.status == 204:
        return resp_headers, []
    return resp_headers, parse_api_response(resp_headers, body)
@ -758,9 +786,7 @@ def head_container(url, token, container, http_conn=None, headers=None,
                              http_path=path, http_status=resp.status,
                              http_reason=resp.reason,
                              http_response_content=body)
-    resp_headers = {}
+    resp_headers = resp_header_dict(resp)
    for header, value in resp.getheaders():
        resp_headers[header.lower()] = value
    return resp_headers
@ -992,9 +1018,7 @@ def head_object(url, token, container, name, http_conn=None,
                              http_host=conn.host, http_path=path,
                              http_status=resp.status, http_reason=resp.reason,
                              http_response_content=body)
-    resp_headers = {}
+    resp_headers = resp_header_dict(resp)
    for header, value in resp.getheaders():
        resp_headers[header.lower()] = value
    return resp_headers
@ -1224,9 +1248,7 @@ def get_capabilities(http_conn):
                              http_host=conn.host, http_path=parsed.path,
                              http_status=resp.status, http_reason=resp.reason,
                              http_response_content=body)
-    resp_headers = {}
+    resp_headers = resp_header_dict(resp)
    for header, value in resp.getheaders():
        resp_headers[header.lower()] = value
    return parse_api_response(resp_headers, body)
--- a/swiftclient/service.py
+++ b/swiftclient/service.py
@ -27,7 +27,7 @@ from threading import Thread
 from six import StringIO, text_type
 from six.moves.queue import Queue
 from six.moves.queue import Empty as QueueEmpty
-from six.moves.urllib.parse import quote, unquote
+from six.moves.urllib.parse import quote
 from six import Iterator, string_types
 import json
@ -1861,8 +1861,7 @@ class SwiftService(object):
                delobjsmap = {}
                if old_manifest:
                    scontainer, sprefix = old_manifest.split('/', 1)
-                    scontainer = unquote(scontainer)
+                    sprefix = sprefix.rstrip('/') + '/'
                    sprefix = unquote(sprefix).rstrip('/') + '/'
                    delobjsmap[scontainer] = []
                    for part in self.list(scontainer, {'prefix': sprefix}):
                        if not part["success"]:
@ -2054,8 +2053,7 @@ class SwiftService(object):
                dlo_segments_deleted = True
                segment_pool = self.thread_manager.segment_pool
                s_container, s_prefix = old_manifest.split('/', 1)
-                s_container = unquote(s_container)
+                s_prefix = s_prefix.rstrip('/') + '/'
                s_prefix = unquote(s_prefix).rstrip('/') + '/'
                del_segs = []
                for part in self.list(
--- a/tests/unit/test_swiftclient.py
+++ b/tests/unit/test_swiftclient.py
@ -130,6 +130,31 @@ class TestHttpHelpers(MockHttpTest):
        value = u'unicode:\xe9\u20ac'
        self.assertEqual('unicode%3A%C3%A9%E2%82%AC', c.quote(value))
    def test_parse_header_string(self):
        value = b'bytes'
        self.assertEqual(u'bytes', c.parse_header_string(value))
        value = u'unicode:\xe9\u20ac'
        self.assertEqual(u'unicode:\xe9\u20ac', c.parse_header_string(value))
        value = 'native%20string'
        self.assertEqual(u'native string', c.parse_header_string(value))
        value = b'encoded%20bytes%E2%82%AC'
        self.assertEqual(u'encoded bytes\u20ac', c.parse_header_string(value))
        value = 'encoded%20unicode%E2%82%AC'
        self.assertEqual(u'encoded unicode\u20ac',
                         c.parse_header_string(value))
        value = b'bad%20bytes%ff%E2%82%AC'
        self.assertEqual(u'bad%20bytes%ff%E2%82%AC',
                         c.parse_header_string(value))
        value = u'bad%20unicode%ff\u20ac'
        self.assertEqual(u'bad%20unicode%ff\u20ac',
                         c.parse_header_string(value))
        value = b'really%20bad\xffbytes'
        self.assertEqual(u'really%2520bad%FFbytes',
                         c.parse_header_string(value))
    def test_http_connection(self):
        url = 'http://www.test.com'
        _junk, conn = c.http_connection(url)
@ -686,6 +711,18 @@ class TestGetObject(MockHttpTest):
            }),
        ])
    def test_response_headers(self):
        c.http_connection = self.fake_http_connection(
            200, headers={'X-Utf-8-Header': b't%c3%a9st',
                          'X-Non-Utf-8-Header': b'%ff',
                          'X-Binary-Header': b'\xff'})
        conn = c.http_connection('http://www.test.com')
        headers, data = c.get_object('url_is_irrelevant', 'TOKEN',
                                     'container', 'object', http_conn=conn)
        self.assertEqual(u't\xe9st', headers.get('x-utf-8-header', ''))
        self.assertEqual(u'%ff', headers.get('x-non-utf-8-header', ''))
        self.assertEqual(u'%FF', headers.get('x-binary-header', ''))
    def test_chunk_size_read_method(self):
        conn = c.Connection('http://auth.url/', 'some_user', 'some_key')
        with mock.patch('swiftclient.client.get_auth_1_0') as mock_get_auth:
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@ -124,7 +124,7 @@ def fake_http_connect(*code_iter, **kwargs):
        def getheaders(self):
            if self.headers:
                return self.headers.items()
-            headers = {'content-length': len(self.body),
+            headers = {'content-length': str(len(self.body)),
                       'content-type': 'x-application/test',
                       'x-timestamp': self.timestamp,
                       'last-modified': self.timestamp,
@ -132,7 +132,7 @@ def fake_http_connect(*code_iter, **kwargs):
                       'etag':
                       self.etag or '"%s"' % EMPTY_ETAG,
                       'x-works': 'yes',
-                       'x-account-container-count': 12345}
+                       'x-account-container-count': '12345'}
            if not self.timestamp:
                del headers['x-timestamp']
            try: