From da42fe01d99eaeb0cda5387efefeb9e284c6e20b Mon Sep 17 00:00:00 2001
From: Yuan Zhou <yuan.zhou@intel.com>
Date: Fri, 16 Aug 2013 16:55:39 +0800
Subject: [PATCH] Clean up utf8ness quote

Move common codes on utf8ness and quote to common/utils.py

Change-Id: I91d98a06fa94ac608119a0d70adedc6d73337c64
---
 swift/common/direct_client.py          |  8 +-------
 swift/common/internal_client.py        | 16 ++--------------
 swift/common/middleware/staticweb.py   | 26 ++++++--------------------
 swift/common/utils.py                  |  9 ++++++++-
 test/unit/common/test_direct_client.py |  8 --------
 test/unit/common/test_utils.py         | 17 +++++++++++++++++
 6 files changed, 34 insertions(+), 50 deletions(-)

diff --git a/swift/common/direct_client.py b/swift/common/direct_client.py
index 8b571be5de..d85c7016de 100644
--- a/swift/common/direct_client.py
+++ b/swift/common/direct_client.py
@@ -22,7 +22,6 @@ import os
 import socket
 from httplib import HTTPException
 from time import time
-from urllib import quote as _quote
 
 from eventlet import sleep, Timeout
 
@@ -32,6 +31,7 @@ from swift.common.utils import normalize_timestamp, FileLikeIter
 from swift.common.http import HTTP_NO_CONTENT, HTTP_INSUFFICIENT_STORAGE, \
     is_success, is_server_error
 from swift.common.swob import HeaderKeyDict
+from swift.common.utils import quote
 
 
 def _get_direct_account_container(path, stype, node, part,
@@ -77,12 +77,6 @@ def _get_direct_account_container(path, stype, node, part,
     return resp_headers, json_loads(resp.read())
 
 
-def quote(value, safe='/'):
-    if isinstance(value, unicode):
-        value = value.encode('utf8')
-    return _quote(value, safe)
-
-
 def gen_headers(hdrs_in=None, add_ts=False):
     hdrs_out = HeaderKeyDict(hdrs_in) if hdrs_in else HeaderKeyDict()
     if add_ts:
diff --git a/swift/common/internal_client.py b/swift/common/internal_client.py
index 695017ceea..32f6af4553 100644
--- a/swift/common/internal_client.py
+++ b/swift/common/internal_client.py
@@ -18,11 +18,11 @@ import json
 from paste.deploy import loadapp
 import struct
 from sys import exc_info
-from urllib import quote
 import zlib
 from gettext import gettext as _
 from zlib import compressobj
 
+from swift.common.utils import quote
 from swift.common.http import HTTP_NOT_FOUND
 from swift.common.swob import Request
 
@@ -219,10 +219,7 @@ class InternalClient(object):
         :raises Exception: Exception is raised when code fails in an
                            unexpected way.
         """
-        if isinstance(marker, unicode):
-            marker = marker.encode('utf8')
-        if isinstance(end_marker, unicode):
-            end_marker = end_marker.encode('utf8')
+
         while True:
             resp = self.make_request(
                 'GET', '%s?format=json&marker=%s&end_marker=%s' %
@@ -250,15 +247,6 @@ class InternalClient(object):
                             not.
         """
 
-        if isinstance(account, unicode):
-            account = account.encode('utf-8')
-
-        if isinstance(container, unicode):
-            container = container.encode('utf-8')
-
-        if isinstance(obj, unicode):
-            obj = obj.encode('utf-8')
-
         path = '/v1/%s' % quote(account)
         if container:
             path += '/%s' % quote(container)
diff --git a/swift/common/middleware/staticweb.py b/swift/common/middleware/staticweb.py
index cdef7b1c00..08b3a19d95 100644
--- a/swift/common/middleware/staticweb.py
+++ b/swift/common/middleware/staticweb.py
@@ -118,29 +118,15 @@ Example usage of this middleware via ``swift``:
 
 import cgi
 import time
-from urllib import quote as urllib_quote
 
 from swift.common.utils import human_readable, split_path, config_true_value, \
-    json
+    json, quote, get_valid_utf8_str
 from swift.common.wsgi import make_pre_authed_env, WSGIContext
 from swift.common.http import is_success, is_redirection, HTTP_NOT_FOUND
 from swift.common.swob import Response, HTTPMovedPermanently, HTTPNotFound
 from swift.proxy.controllers.base import get_container_info
 
 
-def ensure_utf8_bytes(value):
-    if isinstance(value, unicode):
-        value = value.encode('utf-8')
-    return value
-
-
-def quote(value, safe='/'):
-    """
-    Patched version of urllib.quote that encodes utf-8 strings before quoting
-    """
-    return urllib_quote(ensure_utf8_bytes(value), safe)
-
-
 class _StaticWebContext(WSGIContext):
     """
     The Static Web WSGI middleware filter; serves container data as a
@@ -279,7 +265,7 @@ class _StaticWebContext(WSGIContext):
                     '   </tr>\n'
         for item in listing:
             if 'subdir' in item:
-                subdir = ensure_utf8_bytes(item['subdir'])
+                subdir = get_valid_utf8_str(item['subdir'])
                 if prefix:
                     subdir = subdir[len(prefix):]
                 body += '   <tr class="item subdir">\n' \
@@ -290,11 +276,11 @@ class _StaticWebContext(WSGIContext):
                         (quote(subdir), cgi.escape(subdir))
         for item in listing:
             if 'name' in item:
-                name = ensure_utf8_bytes(item['name'])
+                name = get_valid_utf8_str(item['name'])
                 if prefix:
                     name = name[len(prefix):]
-                content_type = ensure_utf8_bytes(item['content_type'])
-                bytes = ensure_utf8_bytes(human_readable(item['bytes']))
+                content_type = get_valid_utf8_str(item['content_type'])
+                bytes = get_valid_utf8_str(human_readable(item['bytes']))
                 last_modified = (cgi.escape(item['last_modified']).
                                  split('.')[0].replace('T', ' '))
                 body += '   <tr class="item %s">\n' \
@@ -305,7 +291,7 @@ class _StaticWebContext(WSGIContext):
                         (' '.join('type-' + cgi.escape(t.lower(), quote=True)
                                   for t in content_type.split('/')),
                          quote(name), cgi.escape(name),
-                         bytes, ensure_utf8_bytes(last_modified))
+                         bytes, get_valid_utf8_str(last_modified))
         body += '  </table>\n' \
                 ' </body>\n' \
                 '</html>\n'
diff --git a/swift/common/utils.py b/swift/common/utils.py
index 4540858f8e..cfc390461e 100644
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@@ -28,7 +28,7 @@ import uuid
 import functools
 from hashlib import md5
 from random import random, shuffle
-from urllib import quote
+from urllib import quote as _quote
 from contextlib import contextmanager, closing
 from gettext import gettext as _
 import ctypes
@@ -2253,3 +2253,10 @@ def parse_content_type(content_type):
             value = m[1].strip()
             parm_list.append((key, value))
     return content_type, parm_list
+
+
+def quote(value, safe='/'):
+    """
+    Patched version of urllib.quote that encodes utf-8 strings before quoting
+    """
+    return _quote(get_valid_utf8_str(value), safe)
diff --git a/test/unit/common/test_direct_client.py b/test/unit/common/test_direct_client.py
index e5cf18d4e5..a06a94d2e9 100644
--- a/test/unit/common/test_direct_client.py
+++ b/test/unit/common/test_direct_client.py
@@ -72,14 +72,6 @@ def mock_http_connect(status, fake_headers=None, body=None):
 
 class TestDirectClient(unittest.TestCase):
 
-    def test_quote(self):
-        res = direct_client.quote('123')
-        assert res == '123'
-        res = direct_client.quote('1&2&/3')
-        assert res == '1%262%26/3'
-        res = direct_client.quote('1&2&3', safe='&')
-        assert res == '1&2&3'
-
     def test_gen_headers(self):
         hdrs = direct_client.gen_headers()
         assert 'user-agent' in hdrs
diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py
index c9bab18d02..718a982c80 100644
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -1570,6 +1570,23 @@ log_name = %(yarr)s'''
             utils.parse_content_type(r'text/plain; x="\""; a'),
             ('text/plain', [('x', r'"\""'), ('a', '')]))
 
+    def test_quote(self):
+        res = utils.quote('/v1/a/c3/subdirx/')
+        assert res == '/v1/a/c3/subdirx/'
+        res = utils.quote('/v1/a&b/c3/subdirx/')
+        assert res == '/v1/a%26b/c3/subdirx/'
+        res = utils.quote('/v1/a&b/c3/subdirx/', safe='&')
+        assert res == '%2Fv1%2Fa&b%2Fc3%2Fsubdirx%2F'
+        unicode_sample = u'\uc77c\uc601'
+        account = 'abc_' + unicode_sample
+        valid_utf8_str = utils.get_valid_utf8_str(account)
+        account = 'abc_' + unicode_sample.encode('utf-8')[::-1]
+        invalid_utf8_str = utils.get_valid_utf8_str(account)
+        self.assertEquals('abc_%EC%9D%BC%EC%98%81',
+                          utils.quote(valid_utf8_str))
+        self.assertEquals('abc_%EF%BF%BD%EF%BF%BD%EC%BC%9D%EF%BF%BD',
+                          utils.quote(invalid_utf8_str))
+
 
 class TestFileLikeIter(unittest.TestCase):