From f3a933aad1f29a90bd2dbbabe24bf7d1b62ede9b Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Thu, 21 Jul 2016 23:12:36 +0000 Subject: [PATCH] Add support for more characters in header keys Some S3 clients rely on AWS's ability to preserve underscores in header names. This doesn't mesh well with WSGI, which treats dashes and underscores equivalently. Starting in eventlet 0.19.0, however, the raw headers from the client are available in a `headers_raw` environment key. If available, use it. Change-Id: If96fc2f0713e3ec18764766225446a16a7c07f94 --- swift3/request.py | 99 ++++++++++++++++++++------- swift3/response.py | 5 +- swift3/test/functional/test_object.py | 21 +++++- swift3/test/unit/test_request.py | 20 +++--- 4 files changed, 108 insertions(+), 37 deletions(-) diff --git a/swift3/request.py b/swift3/request.py index 8733f79c..84c4b33f 100644 --- a/swift3/request.py +++ b/swift3/request.py @@ -14,6 +14,7 @@ # limitations under the License. import base64 +from collections import defaultdict from email.header import Header from hashlib import sha1, sha256, md5 import hmac @@ -265,9 +266,18 @@ class SigV4Mixin(object): :return : dict of headers to sign, the keys are all lower case """ - headers_lower_dict = dict( - (k.lower().strip(), ' '.join(_header_strip(v or '').split())) - for (k, v) in six.iteritems(self.headers)) + if 'headers_raw' in self.environ: # eventlet >= 0.19.0 + # See https://github.com/eventlet/eventlet/commit/67ec999 + headers_lower_dict = defaultdict(list) + for key, value in self.environ['headers_raw']: + headers_lower_dict[key.lower().strip()].append( + ' '.join(_header_strip(value or '').split())) + headers_lower_dict = {k: ','.join(v) + for k, v in headers_lower_dict.items()} + else: # mostly-functional fallback + headers_lower_dict = dict( + (k.lower().strip(), ' '.join(_header_strip(v or '').split())) + for (k, v) in six.iteritems(self.headers)) if 'host' in headers_lower_dict and re.match( 'Boto/2.[0-9].[0-2]', @@ -279,7 +289,7 @@ class SigV4Mixin(object): headers_lower_dict['host'].split(':')[0] headers_to_sign = [ - (key, value) for key, value in headers_lower_dict.items() + (key, value) for key, value in sorted(headers_lower_dict.items()) if key in self._signed_headers] if len(headers_to_sign) != len(self._signed_headers): @@ -290,7 +300,7 @@ class SigV4Mixin(object): # process. raise SignatureDoesNotMatch() - return dict(headers_to_sign) + return headers_to_sign def _canonical_uri(self): """ @@ -328,13 +338,12 @@ class SigV4Mixin(object): # host:iam.amazonaws.com # x-amz-date:20150830T123600Z headers_to_sign = self._headers_to_sign() - cr.append('\n'.join( - ['%s:%s' % (key, value) for key, value in - sorted(headers_to_sign.items())]) + '\n') + cr.append(''.join('%s:%s\n' % (key, value) + for key, value in headers_to_sign)) # 5. Add signed headers into canonical request like # content-type;host;x-amz-date - cr.append(';'.join(sorted(headers_to_sign))) + cr.append(';'.join(k for k, v in headers_to_sign)) # 6. Add payload string at the tail if 'X-Amz-Credential' in self.params: @@ -780,9 +789,20 @@ class Request(swob.Request): _header_strip(self.headers.get('Content-MD5')) or '', _header_strip(self.headers.get('Content-Type')) or ''] - for amz_header in sorted((key.lower() for key in self.headers - if key.lower().startswith('x-amz-'))): - amz_headers[amz_header] = self.headers[amz_header] + if 'headers_raw' in self.environ: # eventlet >= 0.19.0 + # See https://github.com/eventlet/eventlet/commit/67ec999 + amz_headers = defaultdict(list) + for key, value in self.environ['headers_raw']: + key = key.lower() + if not key.startswith('x-amz-'): + continue + amz_headers[key.strip()].append(value.strip()) + amz_headers = dict((key, ','.join(value)) + for key, value in amz_headers.items()) + else: # mostly-functional fallback + amz_headers = dict((key.lower(), value) + for key, value in self.headers.items() + if key.lower().startswith('x-amz-')) if self._is_header_auth: if 'x-amz-date' in amz_headers: @@ -796,8 +816,8 @@ class Request(swob.Request): # but as a sanity check... raise AccessDenied() - for k in sorted(key.lower() for key in amz_headers): - buf.append("%s:%s" % (k, amz_headers[k])) + for key, value in sorted(amz_headers.items()): + buf.append("%s:%s" % (key, value)) path = self._canonical_uri() if self.query_string: @@ -883,15 +903,48 @@ class Request(swob.Request): env = self.environ.copy() - for key in self.environ: - if key.startswith('HTTP_X_AMZ_META_'): - if not(set(env[key]).issubset(string.printable)): - env[key] = Header(env[key], 'UTF-8').encode() - if env[key].startswith('=?utf-8?q?'): - env[key] = '=?UTF-8?Q?' + env[key][10:] - elif env[key].startswith('=?utf-8?b?'): - env[key] = '=?UTF-8?B?' + env[key][10:] - env['HTTP_X_OBJECT_META_' + key[16:]] = env[key] + def sanitize(value): + if set(value).issubset(string.printable): + return value + + value = Header(value, 'UTF-8').encode() + if value.startswith('=?utf-8?q?'): + return '=?UTF-8?Q?' + value[10:] + elif value.startswith('=?utf-8?b?'): + return '=?UTF-8?B?' + value[10:] + else: + return value + + if 'headers_raw' in env: # eventlet >= 0.19.0 + # See https://github.com/eventlet/eventlet/commit/67ec999 + for key, value in env['headers_raw']: + if not key.lower().startswith('x-amz-meta-'): + continue + # AWS ignores user-defined headers with these characters + if any(c in key for c in ' "),/;<=>?@[\\]{}'): + # NB: apparently, '(' *is* allowed + continue + # Note that this may have already been deleted, e.g. if the + # client sent multiple headers with the same name, or both + # x-amz-meta-foo-bar and x-amz-meta-foo_bar + env.pop('HTTP_' + key.replace('-', '_').upper(), None) + # Need to preserve underscores. Since we know '=' can't be + # present, quoted-printable seems appropriate. + key = key.replace('_', '=5F').replace('-', '_').upper() + key = 'HTTP_X_OBJECT_META_' + key[11:] + if key in env: + env[key] += ',' + sanitize(value) + else: + env[key] = sanitize(value) + else: # mostly-functional fallback + for key in self.environ: + if not key.startswith('HTTP_X_AMZ_META_'): + continue + # AWS ignores user-defined headers with these characters + if any(c in key for c in ' "),/;<=>?@[\\]{}'): + # NB: apparently, '(' *is* allowed + continue + env['HTTP_X_OBJECT_META_' + key[16:]] = sanitize(env[key]) del env[key] if 'HTTP_X_AMZ_COPY_SOURCE' in env: diff --git a/swift3/response.py b/swift3/response.py index 203292f4..c0006c35 100644 --- a/swift3/response.py +++ b/swift3/response.py @@ -100,7 +100,10 @@ class Response(ResponseBase, swob.Response): _key = key.lower() if _key.startswith('x-object-meta-'): - headers['x-amz-meta-' + _key[14:]] = val + # Note that AWS ignores user-defined headers with '=' in the + # header name. We translated underscores to '=5F' on the way + # in, though. + headers['x-amz-meta-' + _key[14:].replace('=5f', '_')] = val elif _key in ('content-length', 'content-type', 'content-range', 'content-encoding', 'content-disposition', 'content-language', diff --git a/swift3/test/functional/test_object.py b/swift3/test/functional/test_object.py index 636cc94a..665c24ee 100644 --- a/swift3/test/functional/test_object.py +++ b/swift3/test/functional/test_object.py @@ -320,7 +320,9 @@ class TestSwift3Object(Swift3FunctionalTestCase): self.assertCommonResponseHeaders(headers) self._assertObjectEtag(self.bucket, obj, etag) - def _test_put_object_headers(self, req_headers): + def _test_put_object_headers(self, req_headers, expected_headers=None): + if expected_headers is None: + expected_headers = req_headers obj = 'object' content = 'abcdefghij' etag = md5(content).hexdigest() @@ -330,7 +332,7 @@ class TestSwift3Object(Swift3FunctionalTestCase): self.assertEqual(status, 200) status, headers, body = \ self.conn.make_request('HEAD', self.bucket, obj) - for header, value in req_headers.items(): + for header, value in expected_headers.items(): self.assertIn(header.lower(), headers) self.assertEqual(headers[header.lower()], value) self.assertCommonResponseHeaders(headers) @@ -341,6 +343,21 @@ class TestSwift3Object(Swift3FunctionalTestCase): 'X-Amz-Meta-Bar': 'foo', 'X-Amz-Meta-Bar2': 'foo2'}) + def test_put_object_weird_metadata(self): + req_headers = dict( + ('x-amz-meta-' + c, c) + for c in '!"#$%&\'()*+-./<=>?@[\\]^`{|}~') + exp_headers = dict( + ('x-amz-meta-' + c, c) + for c in '!#$%&\'(*+-.^`|~') + self._test_put_object_headers(req_headers, exp_headers) + + def test_put_object_underscore_in_metadata(self): + # Break this out separately for ease of testing pre-0.19.0 eventlet + self._test_put_object_headers({ + 'X-Amz-Meta-Foo-Bar': 'baz', + 'X-Amz-Meta-Foo_Bar': 'also baz'}) + def test_put_object_content_headers(self): self._test_put_object_headers({ 'Content-Type': 'foo/bar', diff --git a/swift3/test/unit/test_request.py b/swift3/test/unit/test_request.py index a3ba5f7c..bde7bd27 100644 --- a/swift3/test/unit/test_request.py +++ b/swift3/test/unit/test_request.py @@ -391,8 +391,8 @@ class TestRequest(Swift3TestCase): 'Authorization': 'AWS4-HMAC-SHA256 ' 'Credential=test/20130524/US/s3/aws4_request, ' - 'SignedHeaders=host;%s,' - 'Signature=X' % included_header, + 'SignedHeaders=%s,' + 'Signature=X' % ';'.join(sorted(['host', included_header])), 'X-Amz-Content-SHA256': '0123456789'} headers.update(date_header) @@ -551,11 +551,10 @@ class TestRequest(Swift3TestCase): sigv4_req = SigV4Request(req.environ) headers_to_sign = sigv4_req._headers_to_sign() - self.assertEqual(['host', 'x-amz-content-sha256', 'x-amz-date'], - sorted(headers_to_sign.keys())) - self.assertEqual(headers_to_sign['host'], 'localhost:80') - self.assertEqual(headers_to_sign['x-amz-date'], x_amz_date) - self.assertEqual(headers_to_sign['x-amz-content-sha256'], '0123456789') + self.assertEqual(headers_to_sign, [ + ('host', 'localhost:80'), + ('x-amz-content-sha256', '0123456789'), + ('x-amz-date', x_amz_date)]) # no x-amz-date headers = { @@ -571,10 +570,9 @@ class TestRequest(Swift3TestCase): sigv4_req = SigV4Request(req.environ) headers_to_sign = sigv4_req._headers_to_sign() - self.assertEqual(['host', 'x-amz-content-sha256'], - sorted(headers_to_sign.keys())) - self.assertEqual(headers_to_sign['host'], 'localhost:80') - self.assertEqual(headers_to_sign['x-amz-content-sha256'], '0123456789') + self.assertEqual(headers_to_sign, [ + ('host', 'localhost:80'), + ('x-amz-content-sha256', '0123456789')]) # SignedHeaders says, host and x-amz-date included but there is not # X-Amz-Date header