try..except on UTF8Type serialize for pre-encoded strings

Wrap ustr.decode() with a try..except block to handle failure to encode strings of already-encoded unicode.
This commit is contained in:
nisanharamati
2014-02-07 16:30:21 -08:00
parent b186ecd834
commit 61a1c71aa9

View File

@@ -528,15 +528,11 @@ class UTF8Type(_CassandraType):
@staticmethod
def serialize(ustr):
# ustr.encode('utf8') fails when the string is already encoded
# this is common if your data comes through other database drivers (e.g. odbc, psycopg2, etc.)
if isinstance(ustr, unicode): # check type explicitly. Unicode will encode successfuly.
return ustr.encode('utf8')
# otherwise, our input string is either already encoded or not unicode to begin with.
# since all cassandra strings are utf-8, we can validate that the ustr is already encoded utf-8 by decoding it
else:
ustr.decode('utf-8') # will raise UnicodeDecodeError if not utf8 encoded byte string.
return ustr # definitely valid :)
try:
return ustr.encode('utf-8')
except UnicodeDecodeError:
# already utf-8
return ustr
class VarcharType(UTF8Type):