py3: Be able to read and write non-ASCII headers

Apparently Python's stdlib got more picky about what a header should look like. As a result, if an account, container, or object had a non-ASCII metadata name (values were fine), the proxy-server wouldn't parse all of the headers. See https://bugs.python.org/issue37093 for more information. This presented several problems: - Since the non-ASCII header aborts parsing, we may lose important HTTP-level information like Content-Length or Transfer-Encoding. - Since the offending header wouldn't get parsed, the client wouldn't even know what the problem was. - Even if the client knew what the bad header was, it would have no way to clear it, as the server uses the same logic to parse incoming requests. So, hack in our own header parsing if we detect that parsing was aborted. Note that we also have to mangle bufferedhttp's putheader so we can get non-ASCII headers to the backend servers. Now, we can run the test_unicode_metadata tests in test/functional/test_account.py and test/functional/test_container.py under py2 against services running under py3. Change-Id: I0f03c211f35a9a49e047a5718a9907b515ca88d7
2019-06-17 09:25:52 -07:00
parent bf3e2548b3
commit 76fde89261
2 changed files with 48 additions and 0 deletions
--- a/swift/common/bufferedhttp.py
+++ b/swift/common/bufferedhttp.py
@@ -83,6 +83,23 @@ class BufferedHTTPResponse(HTTPResponse):
        self.will_close = _UNKNOWN      # conn will close at end of response
        self._readline_buffer = b''

+    if not six.PY2:
+        def begin(self):
+            HTTPResponse.begin(self)
+            header_payload = self.headers.get_payload()
+            if header_payload:
+                # This shouldn't be here. We must've bumped up against
+                # https://bugs.python.org/issue37093
+                for line in header_payload.rstrip('\r\n').split('\n'):
+                    if ':' not in line or line[:1] in ' \t':
+                        # Well, we're no more broken than we were before...
+                        # Should we support line folding?
+                        # How can/should we handle a bad header line?
+                        break
+                    header, value = line.split(':', 1)
+                    value = value.strip(' \t\n\r')
+                    self.headers.add_header(header, value)
+
    def expect_response(self):
        if self.fp:
            self.fp.close()
@@ -198,6 +215,11 @@ class BufferedHTTPConnection(HTTPConnection):
        return HTTPConnection.putrequest(self, method, url, skip_host,
                                         skip_accept_encoding)

+    def putheader(self, header, value):
+        if not isinstance(header, bytes):
+            header = header.encode('latin-1')
+        HTTPConnection.putheader(self, header, value)
+
    def getexpect(self):
        kwargs = {'method': self._method}
        if hasattr(self, 'strict'):
--- a/swift/common/wsgi.py
+++ b/swift/common/wsgi.py
@@ -464,6 +464,32 @@ class SwiftHttpProtocol(wsgi.HttpProtocol):
            # else, mangled protocol, most likely; let base class deal with it
        return wsgi.HttpProtocol.parse_request(self)

+    if not six.PY2:
+        def get_environ(self, *args, **kwargs):
+            environ = wsgi.HttpProtocol.get_environ(self, *args, **kwargs)
+            header_payload = self.headers.get_payload()
+            if header_payload:
+                # This shouldn't be here. We must've bumped up against
+                # https://bugs.python.org/issue37093
+                headers_raw = list(environ['headers_raw'])
+                for line in header_payload.rstrip('\r\n').split('\n'):
+                    if ':' not in line or line[:1] in ' \t':
+                        # Well, we're no more broken than we were before...
+                        # Should we support line folding?
+                        # Should we 400 a bad header line?
+                        break
+                    header, value = line.split(':', 1)
+                    value = value.strip(' \t\n\r')
+                    headers_raw.append((header, value))
+                    wsgi_key = 'HTTP_' + header.replace('-', '_').encode(
+                        'latin1').upper().decode('latin1')
+                    if wsgi_key in ('HTTP_CONTENT_LENGTH',
+                                    'HTTP_CONTENT_TYPE'):
+                        wsgi_key = wsgi_key[5:]
+                    environ[wsgi_key] = value
+                environ['headers_raw'] = tuple(headers_raw)
+            return environ
+

 class SwiftHttpProxiedProtocol(SwiftHttpProtocol):
    """