Merge "Reject object names with Unicode surrogates"

This commit is contained in:
Jenkins 2014-11-27 19:40:11 +00:00 committed by Gerrit Code Review
commit f48037030e
2 changed files with 10 additions and 1 deletions

View File

@ -306,7 +306,12 @@ def check_utf8(string):
if isinstance(string, unicode):
string.encode('utf-8')
else:
if string.decode('UTF-8').encode('UTF-8') != string:
decoded = string.decode('UTF-8')
if decoded.encode('UTF-8') != string:
return False
# A UTF-8 string with surrogates in it is invalid.
if any(0xD800 <= ord(codepoint) <= 0xDFFF
for codepoint in decoded):
return False
return '\x00' not in string
# If string is unicode, decode() will raise UnicodeEncodeError

View File

@ -420,6 +420,10 @@ class TestConstraints(unittest.TestCase):
self.assertFalse(constraints.check_utf8('\xed\xa0\xbc\xed\xbc\xb8'))
self.assertFalse(constraints.check_utf8('\xed\xa0\xbd\xed\xb9\x88'))
def test_check_utf8_lone_surrogates(self):
self.assertFalse(constraints.check_utf8('\xed\xa0\xbc'))
self.assertFalse(constraints.check_utf8('\xed\xb9\x88'))
def test_validate_bad_meta(self):
req = Request.blank(
'/v/a/c/o',