diff --git a/swift/common/constraints.py b/swift/common/constraints.py index 1d4d80cf05..47088d35ea 100644 --- a/swift/common/constraints.py +++ b/swift/common/constraints.py @@ -24,7 +24,7 @@ from six.moves import urllib from swift.common import utils, exceptions from swift.common.swob import HTTPBadRequest, HTTPLengthRequired, \ HTTPRequestEntityTooLarge, HTTPPreconditionFailed, HTTPNotImplemented, \ - HTTPException + HTTPException, str_from_wsgi MAX_FILE_SIZE = 5368709122 MAX_META_NAME_LENGTH = 128 @@ -141,8 +141,8 @@ def check_metadata(req, target_type): if not key: return HTTPBadRequest(body='Metadata name cannot be empty', request=req, content_type='text/plain') - bad_key = not check_utf8(key) - bad_value = value and not check_utf8(value) + bad_key = not check_utf8(str_from_wsgi(key)) + bad_value = value and not check_utf8(str_from_wsgi(value)) if target_type in ('account', 'container') and (bad_key or bad_value): return HTTPBadRequest(body='Metadata must be valid UTF-8', request=req, content_type='text/plain') @@ -215,7 +215,7 @@ def check_object_creation(req, object_name): return HTTPBadRequest(request=req, body=e.body, content_type='text/plain') - if not check_utf8(req.headers['Content-Type']): + if not check_utf8(str_from_wsgi(req.headers['Content-Type'])): return HTTPBadRequest(request=req, body='Invalid Content-Type', content_type='text/plain') return check_metadata(req, 'object') @@ -359,16 +359,18 @@ def check_utf8(string): return False try: if isinstance(string, six.text_type): - string.encode('utf-8') + encoded = string.encode('utf-8') + decoded = string else: + encoded = string decoded = string.decode('UTF-8') - if decoded.encode('UTF-8') != string: + if decoded.encode('UTF-8') != encoded: return False - # A UTF-8 string with surrogates in it is invalid. - if any(0xD800 <= ord(codepoint) <= 0xDFFF - for codepoint in decoded): - return False - return '\x00' not in string + # A UTF-8 string with surrogates in it is invalid. + if any(0xD800 <= ord(codepoint) <= 0xDFFF + for codepoint in decoded): + return False + return b'\x00' not in encoded # If string is unicode, decode() will raise UnicodeEncodeError # So, we should catch both UnicodeDecodeError & UnicodeEncodeError except UnicodeError: @@ -392,7 +394,7 @@ def check_name_format(req, name, target_type): body='%s name cannot be empty' % target_type) if isinstance(name, six.text_type): name = name.encode('utf-8') - if '/' in name: + if b'/' in name: raise HTTPPreconditionFailed( request=req, body='%s name cannot contain slashes' % target_type) diff --git a/swift/common/swob.py b/swift/common/swob.py index b7163065d2..2b097d1d5d 100644 --- a/swift/common/swob.py +++ b/swift/common/swob.py @@ -219,7 +219,11 @@ def _header_int_property(header): def header_to_environ_key(header_name): - header_name = 'HTTP_' + header_name.replace('-', '_').upper() + # Why the to/from wsgi dance? Headers that include something like b'\xff' + # on the wire get translated to u'\u00ff' on py3, which gets upper()ed to + # u'\u0178', which is nonsense in a WSGI string. + real_header = str_from_wsgi(header_name) + header_name = 'HTTP_' + str_to_wsgi(real_header.upper()).replace('-', '_') if header_name == 'HTTP_CONTENT_LENGTH': return 'CONTENT_LENGTH' if header_name == 'HTTP_CONTENT_TYPE': @@ -251,8 +255,10 @@ class HeaderEnvironProxy(MutableMapping): def __setitem__(self, key, value): if value is None: self.environ.pop(header_to_environ_key(key), None) - elif isinstance(value, six.text_type): + elif six.PY2 and isinstance(value, six.text_type): self.environ[header_to_environ_key(key)] = value.encode('utf-8') + elif not six.PY2 and isinstance(value, six.binary_type): + self.environ[header_to_environ_key(key)] = value.decode('latin1') else: self.environ[header_to_environ_key(key)] = str(value) @@ -263,7 +269,8 @@ class HeaderEnvironProxy(MutableMapping): del self.environ[header_to_environ_key(key)] def keys(self): - keys = [key[5:].replace('_', '-').title() + # See the to/from WSGI comment in header_to_environ_key + keys = [str_to_wsgi(str_from_wsgi(key[5:]).replace('_', '-').title()) for key in self.environ if key.startswith('HTTP_')] if 'CONTENT_LENGTH' in self.environ: keys.append('Content-Length') @@ -272,6 +279,18 @@ class HeaderEnvironProxy(MutableMapping): return keys +def str_from_wsgi(wsgi_str): + if six.PY2: + return wsgi_str + return wsgi_str.encode('latin1').decode('utf8', errors='surrogateescape') + + +def str_to_wsgi(native_str): + if six.PY2: + return native_str + return native_str.encode('utf8', errors='surrogateescape').decode('latin1') + + def _resp_status_property(): """ Set and retrieve the value of Response.status diff --git a/test/unit/common/test_constraints.py b/test/unit/common/test_constraints.py index 18bbc66846..0e7bfdb754 100644 --- a/test/unit/common/test_constraints.py +++ b/test/unit/common/test_constraints.py @@ -44,22 +44,23 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_metadata(Request.blank( '/', headers=headers), 'object') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Metadata name cannot be empty', resp.body) + self.assertIn(b'Metadata name cannot be empty', resp.body) def test_check_metadata_non_utf8(self): - headers = {'X-Account-Meta-Foo': b'\xff'} + # Consciously using native "WSGI strings" in headers + headers = {'X-Account-Meta-Foo': '\xff'} resp = constraints.check_metadata(Request.blank( '/', headers=headers), 'account') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Metadata must be valid UTF-8', resp.body) + self.assertIn(b'Metadata must be valid UTF-8', resp.body) - headers = {b'X-Container-Meta-\xff': 'foo'} + headers = {'X-Container-Meta-\xff': 'foo'} resp = constraints.check_metadata(Request.blank( '/', headers=headers), 'container') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Metadata must be valid UTF-8', resp.body) + self.assertIn(b'Metadata must be valid UTF-8', resp.body) # Object's OK; its metadata isn't serialized as JSON - headers = {'X-Object-Meta-Foo': b'\xff'} + headers = {'X-Object-Meta-Foo': '\xff'} self.assertIsNone(constraints.check_metadata(Request.blank( '/', headers=headers), 'object')) @@ -75,8 +76,8 @@ class TestConstraints(unittest.TestCase): '/', headers=headers), 'object') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) self.assertIn( - ('X-Object-Meta-%s' % name).lower(), resp.body.lower()) - self.assertIn('Metadata name too long', resp.body) + b'x-object-meta-%s' % name.encode('ascii'), resp.body.lower()) + self.assertIn(b'Metadata name too long', resp.body) def test_check_metadata_value_length(self): value = 'a' * constraints.MAX_META_VALUE_LENGTH @@ -89,10 +90,10 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_metadata(Request.blank( '/', headers=headers), 'object') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('x-object-meta-name', resp.body.lower()) + self.assertIn(b'x-object-meta-name', resp.body.lower()) self.assertIn( - str(constraints.MAX_META_VALUE_LENGTH), resp.body) - self.assertIn('Metadata value longer than 256', resp.body) + str(constraints.MAX_META_VALUE_LENGTH).encode('ascii'), resp.body) + self.assertIn(b'Metadata value longer than 256', resp.body) def test_check_metadata_count(self): headers = {} @@ -105,7 +106,7 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_metadata(Request.blank( '/', headers=headers), 'object') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Too many metadata items', resp.body) + self.assertIn(b'Too many metadata items', resp.body) def test_check_metadata_size(self): headers = {} @@ -132,7 +133,7 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_metadata(Request.blank( '/', headers=headers), 'object') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Total metadata too large', resp.body) + self.assertIn(b'Total metadata too large', resp.body) def test_check_object_creation_content_length(self): headers = {'Content-Length': str(constraints.MAX_FILE_SIZE), @@ -160,7 +161,7 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_object_creation(Request.blank( '/', headers=headers), 'object_name') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Invalid Transfer-Encoding header value', resp.body) + self.assertIn(b'Invalid Transfer-Encoding header value', resp.body) headers = {'Content-Type': 'text/plain', 'X-Timestamp': str(time.time())} @@ -174,7 +175,7 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_object_creation(Request.blank( '/', headers=headers), 'object_name') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Invalid Content-Length header value', resp.body) + self.assertIn(b'Invalid Content-Length header value', resp.body) headers = {'Transfer-Encoding': 'gzip,chunked', 'Content-Type': 'text/plain', @@ -195,7 +196,7 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_object_creation( Request.blank('/', headers=headers), name) self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Object name length of %d longer than %d' % + self.assertIn(b'Object name length of %d longer than %d' % (MAX_OBJECT_NAME_LENGTH + 1, MAX_OBJECT_NAME_LENGTH), resp.body) @@ -211,7 +212,7 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_object_creation( Request.blank('/', headers=headers), 'object_name') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('No content type', resp.body) + self.assertIn(b'No content type', resp.body) def test_check_object_creation_bad_content_type(self): headers = {'Transfer-Encoding': 'chunked', @@ -220,7 +221,7 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_object_creation( Request.blank('/', headers=headers), 'object_name') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Content-Type', resp.body) + self.assertIn(b'Content-Type', resp.body) def test_check_object_creation_bad_delete_headers(self): headers = {'Transfer-Encoding': 'chunked', @@ -230,7 +231,7 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_object_creation( Request.blank('/', headers=headers), 'object_name') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('Non-integer X-Delete-After', resp.body) + self.assertIn(b'Non-integer X-Delete-After', resp.body) t = str(int(time.time() - 60)) headers = {'Transfer-Encoding': 'chunked', @@ -240,7 +241,7 @@ class TestConstraints(unittest.TestCase): resp = constraints.check_object_creation( Request.blank('/', headers=headers), 'object_name') self.assertEqual(resp.status_int, HTTP_BAD_REQUEST) - self.assertIn('X-Delete-At in past', resp.body) + self.assertIn(b'X-Delete-At in past', resp.body) def test_check_delete_headers(self): # x-delete-at value should be relative to the request timestamp rather @@ -265,7 +266,7 @@ class TestConstraints(unittest.TestCase): constraints.check_delete_headers( Request.blank('/', headers=headers)) self.assertEqual(cm.exception.status_int, HTTP_BAD_REQUEST) - self.assertIn('Non-integer X-Delete-After', cm.exception.body) + self.assertIn(b'Non-integer X-Delete-After', cm.exception.body) headers = {'X-Delete-After': '60.1', 'X-Timestamp': ts.internal} @@ -273,7 +274,7 @@ class TestConstraints(unittest.TestCase): constraints.check_delete_headers( Request.blank('/', headers=headers)) self.assertEqual(cm.exception.status_int, HTTP_BAD_REQUEST) - self.assertIn('Non-integer X-Delete-After', cm.exception.body) + self.assertIn(b'Non-integer X-Delete-After', cm.exception.body) headers = {'X-Delete-After': '-1', 'X-Timestamp': ts.internal} @@ -281,7 +282,7 @@ class TestConstraints(unittest.TestCase): constraints.check_delete_headers( Request.blank('/', headers=headers)) self.assertEqual(cm.exception.status_int, HTTP_BAD_REQUEST) - self.assertIn('X-Delete-After in past', cm.exception.body) + self.assertIn(b'X-Delete-After in past', cm.exception.body) headers = {'X-Delete-After': '0', 'X-Timestamp': ts.internal} @@ -289,7 +290,7 @@ class TestConstraints(unittest.TestCase): constraints.check_delete_headers( Request.blank('/', headers=headers)) self.assertEqual(cm.exception.status_int, HTTP_BAD_REQUEST) - self.assertIn('X-Delete-After in past', cm.exception.body) + self.assertIn(b'X-Delete-After in past', cm.exception.body) # x-delete-after = 0 disallowed when it results in x-delete-at equal to # the timestamp @@ -299,7 +300,7 @@ class TestConstraints(unittest.TestCase): constraints.check_delete_headers( Request.blank('/', headers=headers)) self.assertEqual(cm.exception.status_int, HTTP_BAD_REQUEST) - self.assertIn('X-Delete-After in past', cm.exception.body) + self.assertIn(b'X-Delete-After in past', cm.exception.body) # X-Delete-At delete_at = str(int(ts) + 100) @@ -317,7 +318,7 @@ class TestConstraints(unittest.TestCase): constraints.check_delete_headers( Request.blank('/', headers=headers)) self.assertEqual(cm.exception.status_int, HTTP_BAD_REQUEST) - self.assertIn('Non-integer X-Delete-At', cm.exception.body) + self.assertIn(b'Non-integer X-Delete-At', cm.exception.body) delete_at = str(int(ts) + 100) + '.1' headers = {'X-Delete-At': delete_at, @@ -326,7 +327,7 @@ class TestConstraints(unittest.TestCase): constraints.check_delete_headers( Request.blank('/', headers=headers)) self.assertEqual(cm.exception.status_int, HTTP_BAD_REQUEST) - self.assertIn('Non-integer X-Delete-At', cm.exception.body) + self.assertIn(b'Non-integer X-Delete-At', cm.exception.body) delete_at = str(int(ts) - 1) headers = {'X-Delete-At': delete_at, @@ -335,7 +336,7 @@ class TestConstraints(unittest.TestCase): constraints.check_delete_headers( Request.blank('/', headers=headers)) self.assertEqual(cm.exception.status_int, HTTP_BAD_REQUEST) - self.assertIn('X-Delete-At in past', cm.exception.body) + self.assertIn(b'X-Delete-At in past', cm.exception.body) # x-delete-at disallowed when exactly equal to timestamp delete_at = str(int(ts)) @@ -345,7 +346,7 @@ class TestConstraints(unittest.TestCase): constraints.check_delete_headers( Request.blank('/', headers=headers)) self.assertEqual(cm.exception.status_int, HTTP_BAD_REQUEST) - self.assertIn('X-Delete-At in past', cm.exception.body) + self.assertIn(b'X-Delete-At in past', cm.exception.body) def test_check_delete_headers_removes_delete_after(self): ts = utils.Timestamp.now() @@ -468,30 +469,45 @@ class TestConstraints(unittest.TestCase): def test_check_utf8(self): unicode_sample = u'\uc77c\uc601' - valid_utf8_str = unicode_sample.encode('utf-8') - invalid_utf8_str = unicode_sample.encode('utf-8')[::-1] unicode_with_null = u'abc\u0000def' - utf8_with_null = unicode_with_null.encode('utf-8') - for false_argument in [None, - '', - invalid_utf8_str, - unicode_with_null, - utf8_with_null]: - self.assertFalse(constraints.check_utf8(false_argument)) + # Some false-y values + self.assertFalse(constraints.check_utf8(None)) + self.assertFalse(constraints.check_utf8('')) + self.assertFalse(constraints.check_utf8(b'')) + self.assertFalse(constraints.check_utf8(u'')) - for true_argument in ['this is ascii and utf-8, too', - unicode_sample, - valid_utf8_str]: - self.assertTrue(constraints.check_utf8(true_argument)) + # invalid utf8 bytes + self.assertFalse(constraints.check_utf8( + unicode_sample.encode('utf-8')[::-1])) + # unicode with null + self.assertFalse(constraints.check_utf8(unicode_with_null)) + # utf8 bytes with null + self.assertFalse(constraints.check_utf8( + unicode_with_null.encode('utf8'))) + + self.assertTrue(constraints.check_utf8('this is ascii and utf-8, too')) + self.assertTrue(constraints.check_utf8(unicode_sample)) + self.assertTrue(constraints.check_utf8(unicode_sample.encode('utf8'))) def test_check_utf8_non_canonical(self): - self.assertFalse(constraints.check_utf8('\xed\xa0\xbc\xed\xbc\xb8')) - self.assertFalse(constraints.check_utf8('\xed\xa0\xbd\xed\xb9\x88')) + self.assertFalse(constraints.check_utf8(b'\xed\xa0\xbc\xed\xbc\xb8')) + self.assertTrue(constraints.check_utf8(u'\U0001f338')) + self.assertTrue(constraints.check_utf8(b'\xf0\x9f\x8c\xb8')) + self.assertTrue(constraints.check_utf8(u'\U0001f338'.encode('utf8'))) + self.assertFalse(constraints.check_utf8(b'\xed\xa0\xbd\xed\xb9\x88')) + self.assertTrue(constraints.check_utf8(u'\U0001f648')) def test_check_utf8_lone_surrogates(self): - self.assertFalse(constraints.check_utf8('\xed\xa0\xbc')) - self.assertFalse(constraints.check_utf8('\xed\xb9\x88')) + self.assertFalse(constraints.check_utf8(b'\xed\xa0\xbc')) + self.assertFalse(constraints.check_utf8(u'\ud83c')) + self.assertFalse(constraints.check_utf8(b'\xed\xb9\x88')) + self.assertFalse(constraints.check_utf8(u'\ude48')) + + self.assertFalse(constraints.check_utf8(u'\ud800')) + self.assertFalse(constraints.check_utf8(u'\udc00')) + self.assertFalse(constraints.check_utf8(u'\udcff')) + self.assertFalse(constraints.check_utf8(u'\udfff')) def test_validate_bad_meta(self): req = Request.blank( @@ -500,8 +516,9 @@ class TestConstraints(unittest.TestCase): 'ab' * constraints.MAX_HEADER_SIZE}) self.assertEqual(constraints.check_metadata(req, 'object').status_int, HTTP_BAD_REQUEST) - self.assertIn('x-object-meta-hello', constraints.check_metadata(req, - 'object').body.lower()) + resp = constraints.check_metadata(req, 'object') + self.assertIsNotNone(resp) + self.assertIn(b'x-object-meta-hello', resp.body.lower()) def test_validate_constraints(self): c = constraints @@ -537,7 +554,7 @@ class TestConstraints(unittest.TestCase): constraints.check_container_format( req, req.headers['X-Versions-Location']) self.assertTrue(cm.exception.body.startswith( - 'Container name cannot')) + b'Container name cannot')) def test_valid_api_version(self): version = 'v1' @@ -579,10 +596,10 @@ class TestConstraintsConfig(unittest.TestCase): def test_override_constraints(self): try: with tempfile.NamedTemporaryFile() as f: - f.write('[swift-constraints]\n') + f.write(b'[swift-constraints]\n') # set everything to 1 for key in constraints.DEFAULT_CONSTRAINTS: - f.write('%s = 1\n' % key) + f.write(b'%s = 1\n' % key.encode('ascii')) f.flush() with mock.patch.object(utils, 'SWIFT_CONF_FILE', f.name): constraints.reload_constraints() @@ -603,10 +620,10 @@ class TestConstraintsConfig(unittest.TestCase): def test_reload_reset(self): try: with tempfile.NamedTemporaryFile() as f: - f.write('[swift-constraints]\n') + f.write(b'[swift-constraints]\n') # set everything to 1 for key in constraints.DEFAULT_CONSTRAINTS: - f.write('%s = 1\n' % key) + f.write(b'%s = 1\n' % key.encode('ascii')) f.flush() with mock.patch.object(utils, 'SWIFT_CONF_FILE', f.name): constraints.reload_constraints() @@ -619,7 +636,7 @@ class TestConstraintsConfig(unittest.TestCase): # no constraints have been loaded from non-existent swift.conf self.assertFalse(constraints.SWIFT_CONSTRAINTS_LOADED) # no constraints are in OVERRIDE - self.assertEqual([], constraints.OVERRIDE_CONSTRAINTS.keys()) + self.assertEqual([], list(constraints.OVERRIDE_CONSTRAINTS.keys())) # the EFFECTIVE constraints mirror DEFAULT self.assertEqual(constraints.EFFECTIVE_CONSTRAINTS, constraints.DEFAULT_CONSTRAINTS) diff --git a/tox.ini b/tox.ini index b6d2db7885..9071675e3a 100644 --- a/tox.ini +++ b/tox.ini @@ -38,6 +38,7 @@ commands = test/unit/cli/test_ringbuilder.py \ test/unit/common/middleware/test_gatekeeper.py \ test/unit/common/ring \ + test/unit/common/test_constraints.py \ test/unit/common/test_daemon.py \ test/unit/common/test_exceptions.py \ test/unit/common/test_header_key_dict.py \