try..except on UTF8Type serialize for pre-encoded strings
Wrap ustr.decode() with a try..except block to handle failure to encode strings of already-encoded unicode.
This commit is contained in:
@@ -528,15 +528,11 @@ class UTF8Type(_CassandraType):
|
||||
|
||||
@staticmethod
|
||||
def serialize(ustr):
|
||||
# ustr.encode('utf8') fails when the string is already encoded
|
||||
# this is common if your data comes through other database drivers (e.g. odbc, psycopg2, etc.)
|
||||
if isinstance(ustr, unicode): # check type explicitly. Unicode will encode successfuly.
|
||||
return ustr.encode('utf8')
|
||||
# otherwise, our input string is either already encoded or not unicode to begin with.
|
||||
# since all cassandra strings are utf-8, we can validate that the ustr is already encoded utf-8 by decoding it
|
||||
else:
|
||||
ustr.decode('utf-8') # will raise UnicodeDecodeError if not utf8 encoded byte string.
|
||||
return ustr # definitely valid :)
|
||||
try:
|
||||
return ustr.encode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
# already utf-8
|
||||
return ustr
|
||||
|
||||
|
||||
class VarcharType(UTF8Type):
|
||||
|
Reference in New Issue
Block a user