Centralize header parsing

All response headers are now exposed as unicode objects. Any
url-encoding is interpretted as UTF-8; if that causes decoding to fail,
the url-encoded form is returned.

As a result, deleting DLOs with unicode characters will no longer raise
UnicodeEncodeErrors under Python 2.

Related-Bug: #1431866
Change-Id: Idb111c5bf3ac1f5ccfa724b3f4ede8f37d5bfac4
This commit is contained in:
Tim Burke 2015-05-18 08:05:02 -07:00
parent ff073ab34a
commit ce569f4651
4 changed files with 88 additions and 31 deletions

@ -24,7 +24,7 @@ import warnings
from distutils.version import StrictVersion from distutils.version import StrictVersion
from requests.exceptions import RequestException, SSLError from requests.exceptions import RequestException, SSLError
from six.moves import http_client from six.moves import http_client
from six.moves.urllib.parse import quote as _quote from six.moves.urllib.parse import quote as _quote, unquote
from six.moves.urllib.parse import urlparse, urlunparse from six.moves.urllib.parse import urlparse, urlunparse
from time import sleep, time from time import sleep, time
import six import six
@ -103,6 +103,36 @@ def http_log(args, kwargs, resp, body):
log_method("RESP BODY: %s", body) log_method("RESP BODY: %s", body)
def parse_header_string(data):
if six.PY2:
if isinstance(data, six.text_type):
# Under Python2 requests only returns binary_type, but if we get
# some stray text_type input, this should prevent unquote from
# interpretting %-encoded data as raw code-points.
data = data.encode('utf8')
try:
unquoted = unquote(data).decode('utf8')
except UnicodeDecodeError:
try:
return data.decode('utf8')
except UnicodeDecodeError:
return quote(data).decode('utf8')
else:
if isinstance(data, six.binary_type):
# Under Python3 requests only returns text_type and tosses (!) the
# rest of the headers. If that ever changes, this should be a sane
# approach.
try:
data = data.decode('ascii')
except UnicodeDecodeError:
data = quote(data)
try:
unquoted = unquote(data, errors='strict')
except UnicodeDecodeError:
return data
return unquoted
def quote(value, safe='/'): def quote(value, safe='/'):
""" """
Patched version of urllib.quote that encodes utf8 strings before quoting. Patched version of urllib.quote that encodes utf8 strings before quoting.
@ -472,6 +502,14 @@ def get_auth(auth_url, user, key, **kwargs):
return storage_url, token return storage_url, token
def resp_header_dict(resp):
resp_headers = {}
for header, value in resp.getheaders():
header = parse_header_string(header).lower()
resp_headers[header] = parse_header_string(value)
return resp_headers
def store_response(resp, response_dict): def store_response(resp, response_dict):
""" """
store information about an operation into a dict store information about an operation into a dict
@ -482,13 +520,9 @@ def store_response(resp, response_dict):
status, reason and a dict of lower-cased headers status, reason and a dict of lower-cased headers
""" """
if response_dict is not None: if response_dict is not None:
resp_headers = {}
for header, value in resp.getheaders():
resp_headers[header.lower()] = value
response_dict['status'] = resp.status response_dict['status'] = resp.status
response_dict['reason'] = resp.reason response_dict['reason'] = resp.reason
response_dict['headers'] = resp_headers response_dict['headers'] = resp_header_dict(resp)
def get_account(url, token, marker=None, limit=None, prefix=None, def get_account(url, token, marker=None, limit=None, prefix=None,
@ -545,9 +579,7 @@ def get_account(url, token, marker=None, limit=None, prefix=None,
body = resp.read() body = resp.read()
http_log(("%s?%s" % (url, qs), method,), {'headers': headers}, resp, body) http_log(("%s?%s" % (url, qs), method,), {'headers': headers}, resp, body)
resp_headers = {} resp_headers = resp_header_dict(resp)
for header, value in resp.getheaders():
resp_headers[header.lower()] = value
if resp.status < 200 or resp.status >= 300: if resp.status < 200 or resp.status >= 300:
raise ClientException('Account GET failed', http_scheme=parsed.scheme, raise ClientException('Account GET failed', http_scheme=parsed.scheme,
http_host=conn.host, http_path=parsed.path, http_host=conn.host, http_path=parsed.path,
@ -589,9 +621,7 @@ def head_account(url, token, http_conn=None, service_token=None):
http_host=conn.host, http_path=parsed.path, http_host=conn.host, http_path=parsed.path,
http_status=resp.status, http_reason=resp.reason, http_status=resp.status, http_reason=resp.reason,
http_response_content=body) http_response_content=body)
resp_headers = {} resp_headers = resp_header_dict(resp)
for header, value in resp.getheaders():
resp_headers[header.lower()] = value
return resp_headers return resp_headers
@ -712,9 +742,7 @@ def get_container(url, token, container, marker=None, limit=None,
http_path=cont_path, http_query=qs, http_path=cont_path, http_query=qs,
http_status=resp.status, http_reason=resp.reason, http_status=resp.status, http_reason=resp.reason,
http_response_content=body) http_response_content=body)
resp_headers = {} resp_headers = resp_header_dict(resp)
for header, value in resp.getheaders():
resp_headers[header.lower()] = value
if resp.status == 204: if resp.status == 204:
return resp_headers, [] return resp_headers, []
return resp_headers, parse_api_response(resp_headers, body) return resp_headers, parse_api_response(resp_headers, body)
@ -758,9 +786,7 @@ def head_container(url, token, container, http_conn=None, headers=None,
http_path=path, http_status=resp.status, http_path=path, http_status=resp.status,
http_reason=resp.reason, http_reason=resp.reason,
http_response_content=body) http_response_content=body)
resp_headers = {} resp_headers = resp_header_dict(resp)
for header, value in resp.getheaders():
resp_headers[header.lower()] = value
return resp_headers return resp_headers
@ -992,9 +1018,7 @@ def head_object(url, token, container, name, http_conn=None,
http_host=conn.host, http_path=path, http_host=conn.host, http_path=path,
http_status=resp.status, http_reason=resp.reason, http_status=resp.status, http_reason=resp.reason,
http_response_content=body) http_response_content=body)
resp_headers = {} resp_headers = resp_header_dict(resp)
for header, value in resp.getheaders():
resp_headers[header.lower()] = value
return resp_headers return resp_headers
@ -1224,9 +1248,7 @@ def get_capabilities(http_conn):
http_host=conn.host, http_path=parsed.path, http_host=conn.host, http_path=parsed.path,
http_status=resp.status, http_reason=resp.reason, http_status=resp.status, http_reason=resp.reason,
http_response_content=body) http_response_content=body)
resp_headers = {} resp_headers = resp_header_dict(resp)
for header, value in resp.getheaders():
resp_headers[header.lower()] = value
return parse_api_response(resp_headers, body) return parse_api_response(resp_headers, body)

@ -27,7 +27,7 @@ from threading import Thread
from six import StringIO, text_type from six import StringIO, text_type
from six.moves.queue import Queue from six.moves.queue import Queue
from six.moves.queue import Empty as QueueEmpty from six.moves.queue import Empty as QueueEmpty
from six.moves.urllib.parse import quote, unquote from six.moves.urllib.parse import quote
from six import Iterator, string_types from six import Iterator, string_types
import json import json
@ -1861,8 +1861,7 @@ class SwiftService(object):
delobjsmap = {} delobjsmap = {}
if old_manifest: if old_manifest:
scontainer, sprefix = old_manifest.split('/', 1) scontainer, sprefix = old_manifest.split('/', 1)
scontainer = unquote(scontainer) sprefix = sprefix.rstrip('/') + '/'
sprefix = unquote(sprefix).rstrip('/') + '/'
delobjsmap[scontainer] = [] delobjsmap[scontainer] = []
for part in self.list(scontainer, {'prefix': sprefix}): for part in self.list(scontainer, {'prefix': sprefix}):
if not part["success"]: if not part["success"]:
@ -2054,8 +2053,7 @@ class SwiftService(object):
dlo_segments_deleted = True dlo_segments_deleted = True
segment_pool = self.thread_manager.segment_pool segment_pool = self.thread_manager.segment_pool
s_container, s_prefix = old_manifest.split('/', 1) s_container, s_prefix = old_manifest.split('/', 1)
s_container = unquote(s_container) s_prefix = s_prefix.rstrip('/') + '/'
s_prefix = unquote(s_prefix).rstrip('/') + '/'
del_segs = [] del_segs = []
for part in self.list( for part in self.list(

@ -130,6 +130,31 @@ class TestHttpHelpers(MockHttpTest):
value = u'unicode:\xe9\u20ac' value = u'unicode:\xe9\u20ac'
self.assertEqual('unicode%3A%C3%A9%E2%82%AC', c.quote(value)) self.assertEqual('unicode%3A%C3%A9%E2%82%AC', c.quote(value))
def test_parse_header_string(self):
value = b'bytes'
self.assertEqual(u'bytes', c.parse_header_string(value))
value = u'unicode:\xe9\u20ac'
self.assertEqual(u'unicode:\xe9\u20ac', c.parse_header_string(value))
value = 'native%20string'
self.assertEqual(u'native string', c.parse_header_string(value))
value = b'encoded%20bytes%E2%82%AC'
self.assertEqual(u'encoded bytes\u20ac', c.parse_header_string(value))
value = 'encoded%20unicode%E2%82%AC'
self.assertEqual(u'encoded unicode\u20ac',
c.parse_header_string(value))
value = b'bad%20bytes%ff%E2%82%AC'
self.assertEqual(u'bad%20bytes%ff%E2%82%AC',
c.parse_header_string(value))
value = u'bad%20unicode%ff\u20ac'
self.assertEqual(u'bad%20unicode%ff\u20ac',
c.parse_header_string(value))
value = b'really%20bad\xffbytes'
self.assertEqual(u'really%2520bad%FFbytes',
c.parse_header_string(value))
def test_http_connection(self): def test_http_connection(self):
url = 'http://www.test.com' url = 'http://www.test.com'
_junk, conn = c.http_connection(url) _junk, conn = c.http_connection(url)
@ -686,6 +711,18 @@ class TestGetObject(MockHttpTest):
}), }),
]) ])
def test_response_headers(self):
c.http_connection = self.fake_http_connection(
200, headers={'X-Utf-8-Header': b't%c3%a9st',
'X-Non-Utf-8-Header': b'%ff',
'X-Binary-Header': b'\xff'})
conn = c.http_connection('http://www.test.com')
headers, data = c.get_object('url_is_irrelevant', 'TOKEN',
'container', 'object', http_conn=conn)
self.assertEqual(u't\xe9st', headers.get('x-utf-8-header', ''))
self.assertEqual(u'%ff', headers.get('x-non-utf-8-header', ''))
self.assertEqual(u'%FF', headers.get('x-binary-header', ''))
def test_chunk_size_read_method(self): def test_chunk_size_read_method(self):
conn = c.Connection('http://auth.url/', 'some_user', 'some_key') conn = c.Connection('http://auth.url/', 'some_user', 'some_key')
with mock.patch('swiftclient.client.get_auth_1_0') as mock_get_auth: with mock.patch('swiftclient.client.get_auth_1_0') as mock_get_auth:

@ -124,7 +124,7 @@ def fake_http_connect(*code_iter, **kwargs):
def getheaders(self): def getheaders(self):
if self.headers: if self.headers:
return self.headers.items() return self.headers.items()
headers = {'content-length': len(self.body), headers = {'content-length': str(len(self.body)),
'content-type': 'x-application/test', 'content-type': 'x-application/test',
'x-timestamp': self.timestamp, 'x-timestamp': self.timestamp,
'last-modified': self.timestamp, 'last-modified': self.timestamp,
@ -132,7 +132,7 @@ def fake_http_connect(*code_iter, **kwargs):
'etag': 'etag':
self.etag or '"%s"' % EMPTY_ETAG, self.etag or '"%s"' % EMPTY_ETAG,
'x-works': 'yes', 'x-works': 'yes',
'x-account-container-count': 12345} 'x-account-container-count': '12345'}
if not self.timestamp: if not self.timestamp:
del headers['x-timestamp'] del headers['x-timestamp']
try: try: