From f4a2b16c2cc65410765abdff7a45532305a4548f Mon Sep 17 00:00:00 2001
From: Timur Alperovich <timur@timuralp.com>
Date: Tue, 17 Apr 2018 14:36:57 -0700
Subject: [PATCH] Properly handle unicode headers.

Fix unicode handling in Python 3 and Python 2. There are currently two
failure modes. In python 2, swiftclient fails to log in debug mode if
the account name has a non-ASCII character. This is because the account
name will appear in the storage URL, which we attempt to pass to the
logger as a byte string (whereas it should be a unicode string). This
patch changes the behavior to convert the path strings into unicode by
calling the parse_header_string() function.

The second failure mode is with Python 3, where http_lib returns headers
that are latin-1 encoded, but swiftclient expects UTF-8. The patch
automatically converts headers from latin-1 (iso-8859-1) to UTF-8, so
that we can properly handle non-ASCII headers in responses.

Change-Id: Ifa7f3d5af71bde8127129f1f8603772d80d063c1
---
 swiftclient/client.py          | 29 +++++++++++++---
 tests/unit/test_swiftclient.py | 61 ++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/swiftclient/client.py b/swiftclient/client.py
index 8cbdf452..a518d321 100644
--- a/swiftclient/client.py
+++ b/swiftclient/client.py
@@ -151,7 +151,7 @@ def http_log(args, kwargs, resp, body):
         elif element in ('GET', 'POST', 'PUT'):
             string_parts.append(' -X %s' % element)
         else:
-            string_parts.append(' %s' % element)
+            string_parts.append(' %s' % parse_header_string(element))
     if 'headers' in kwargs:
         headers = scrub_headers(kwargs['headers'])
         for element in headers:
@@ -455,11 +455,23 @@ class HTTPConnection(object):
         self.resp.status = self.resp.status_code
         old_getheader = self.resp.raw.getheader
 
+        def _decode_header(string):
+            if string is None or six.PY2:
+                return string
+            return string.encode('iso-8859-1').decode('utf-8')
+
+        def _encode_header(string):
+            if string is None or six.PY2:
+                return string
+            return string.encode('utf-8').decode('iso-8859-1')
+
         def getheaders():
-            return self.resp.headers.items()
+            return [(_decode_header(k), _decode_header(v))
+                    for k, v in self.resp.headers.items()]
 
         def getheader(k, v=None):
-            return old_getheader(k.lower(), v)
+            return _decode_header(old_getheader(
+                _encode_header(k.lower()), _encode_header(v)))
 
         def releasing_read(*args, **kwargs):
             chunk = self.resp.raw.read(*args, **kwargs)
@@ -513,8 +525,11 @@ def get_auth_1_0(url, user, key, snet, **kwargs):
         netloc = parsed[1]
         parsed[1] = 'snet-' + netloc
         url = urlunparse(parsed)
-    return url, resp.getheader('x-storage-token',
-                               resp.getheader('x-auth-token'))
+
+    auth_token = resp.getheader('x-auth-token')
+    if auth_token is not None:
+        auth_token = parse_header_string(auth_token)
+    return url, resp.getheader('x-storage-token', auth_token)
 
 
 def get_keystoneclient_2_0(auth_url, user, key, os_options, **kwargs):
@@ -694,10 +709,14 @@ def get_auth(auth_url, user, key, **kwargs):
         raise ClientException('Unknown auth_version %s specified and no '
                               'session found.' % auth_version)
 
+    if token is not None:
+        token = parse_header_string(token)
     # Override storage url, if necessary
     if os_options.get('object_storage_url'):
         return os_options['object_storage_url'], token
     else:
+        if storage_url is not None:
+            return parse_header_string(storage_url), token
         return storage_url, token
 
 
diff --git a/tests/unit/test_swiftclient.py b/tests/unit/test_swiftclient.py
index b6d68568..3303372d 100644
--- a/tests/unit/test_swiftclient.py
+++ b/tests/unit/test_swiftclient.py
@@ -1896,6 +1896,57 @@ class TestHTTPConnection(MockHttpTest):
         self.assertFalse(resp.read())
         self.assertTrue(resp.closed)
 
+    @unittest.skipIf(six.PY3, 'python2 specific test')
+    def test_response_python2_headers(self):
+        '''Test utf-8 headers in Python 2.
+        '''
+        _, conn = c.http_connection(u'http://www.test.com/')
+        conn.resp = MockHttpResponse(
+            status=200,
+            headers={
+                '\xd8\xaa-unicode': '\xd8\xaa-value',
+                'empty-header': ''
+            }
+        )
+
+        resp = conn.getresponse()
+        self.assertEqual(
+            '\xd8\xaa-value', resp.getheader('\xd8\xaa-unicode'))
+        self.assertEqual(
+            '\xd8\xaa-value', resp.getheader('\xd8\xaa-UNICODE'))
+        self.assertEqual('', resp.getheader('empty-header'))
+        self.assertEqual(
+            dict([('\xd8\xaa-unicode', '\xd8\xaa-value'),
+                  ('empty-header', ''),
+                  ('etag', '"%s"' % EMPTY_ETAG)]),
+            dict(resp.getheaders()))
+
+    @unittest.skipIf(six.PY2, 'python3 specific test')
+    def test_response_python3_headers(self):
+        '''Test latin1-encoded headers in Python 3.
+        '''
+        _, conn = c.http_connection(u'http://www.test.com/')
+        conn.resp = MockHttpResponse(
+            status=200,
+            headers={
+                b'\xd8\xaa-unicode'.decode('iso-8859-1'):
+                b'\xd8\xaa-value'.decode('iso-8859-1'),
+                'empty-header': ''
+            }
+        )
+
+        resp = conn.getresponse()
+        self.assertEqual(
+            '\u062a-value', resp.getheader('\u062a-unicode'))
+        self.assertEqual(
+            '\u062a-value', resp.getheader('\u062a-UNICODE'))
+        self.assertEqual('', resp.getheader('empty-header'))
+        self.assertEqual(
+            dict([('\u062a-unicode', '\u062a-value'),
+                  ('empty-header', ''),
+                  ('etag', ('"%s"' % EMPTY_ETAG))]),
+            dict(resp.getheaders()))
+
 
 class TestConnection(MockHttpTest):
 
@@ -2839,6 +2890,16 @@ class TestLogging(MockHttpTest):
             self.assertIn('X-Storage-Token', output)
             self.assertIn(unicode_token_value, output)
 
+    @mock.patch('swiftclient.client.logger.debug')
+    def test_unicode_path(self, mock_log):
+        path = u'http://swift/v1/AUTH_account-\u062a'.encode('utf-8')
+        c.http_log(['GET', path], {},
+                   MockHttpResponse(status=200, headers=[]), '')
+        request_log_line = mock_log.mock_calls[0]
+        self.assertEqual('REQ: %s', request_log_line[1][0])
+        self.assertEqual(u'curl -i -X GET %s' % path.decode('utf-8'),
+                         request_log_line[1][1])
+
 
 class TestCloseConnection(MockHttpTest):