py3: Be able to read and write non-ASCII headers

Apparently Python's stdlib got more picky about what a header should
look like. As a result, if an account, container, or object had a
non-ASCII metadata name (values were fine), the proxy-server wouldn't
parse all of the headers. See https://bugs.python.org/issue37093 for
more information.

This presented several problems:
- Since the non-ASCII header aborts parsing, we may lose important
  HTTP-level information like Content-Length or Transfer-Encoding.
- Since the offending header wouldn't get parsed, the client wouldn't
  even know what the problem was.
- Even if the client knew what the bad header was, it would have no way
  to clear it, as the server uses the same logic to parse incoming
  requests.

So, hack in our own header parsing if we detect that parsing was
aborted. Note that we also have to mangle bufferedhttp's putheader so we
can get non-ASCII headers to the backend servers.

Now, we can run the test_unicode_metadata tests in
test/functional/test_account.py and test/functional/test_container.py
under py2 against services running under py3.

Change-Id: I0f03c211f35a9a49e047a5718a9907b515ca88d7
This commit is contained in:
Tim Burke 2019-06-17 09:25:52 -07:00 committed by Matthew Oliver
parent bf3e2548b3
commit 76fde89261
2 changed files with 48 additions and 0 deletions

View File

@ -83,6 +83,23 @@ class BufferedHTTPResponse(HTTPResponse):
self.will_close = _UNKNOWN # conn will close at end of response
self._readline_buffer = b''
if not six.PY2:
def begin(self):
HTTPResponse.begin(self)
header_payload = self.headers.get_payload()
if header_payload:
# This shouldn't be here. We must've bumped up against
# https://bugs.python.org/issue37093
for line in header_payload.rstrip('\r\n').split('\n'):
if ':' not in line or line[:1] in ' \t':
# Well, we're no more broken than we were before...
# Should we support line folding?
# How can/should we handle a bad header line?
break
header, value = line.split(':', 1)
value = value.strip(' \t\n\r')
self.headers.add_header(header, value)
def expect_response(self):
if self.fp:
self.fp.close()
@ -198,6 +215,11 @@ class BufferedHTTPConnection(HTTPConnection):
return HTTPConnection.putrequest(self, method, url, skip_host,
skip_accept_encoding)
def putheader(self, header, value):
if not isinstance(header, bytes):
header = header.encode('latin-1')
HTTPConnection.putheader(self, header, value)
def getexpect(self):
kwargs = {'method': self._method}
if hasattr(self, 'strict'):

View File

@ -464,6 +464,32 @@ class SwiftHttpProtocol(wsgi.HttpProtocol):
# else, mangled protocol, most likely; let base class deal with it
return wsgi.HttpProtocol.parse_request(self)
if not six.PY2:
def get_environ(self, *args, **kwargs):
environ = wsgi.HttpProtocol.get_environ(self, *args, **kwargs)
header_payload = self.headers.get_payload()
if header_payload:
# This shouldn't be here. We must've bumped up against
# https://bugs.python.org/issue37093
headers_raw = list(environ['headers_raw'])
for line in header_payload.rstrip('\r\n').split('\n'):
if ':' not in line or line[:1] in ' \t':
# Well, we're no more broken than we were before...
# Should we support line folding?
# Should we 400 a bad header line?
break
header, value = line.split(':', 1)
value = value.strip(' \t\n\r')
headers_raw.append((header, value))
wsgi_key = 'HTTP_' + header.replace('-', '_').encode(
'latin1').upper().decode('latin1')
if wsgi_key in ('HTTP_CONTENT_LENGTH',
'HTTP_CONTENT_TYPE'):
wsgi_key = wsgi_key[5:]
environ[wsgi_key] = value
environ['headers_raw'] = tuple(headers_raw)
return environ
class SwiftHttpProxiedProtocol(SwiftHttpProtocol):
"""