From 61a1c71aa9c1930512dc8ce4051253c349df24e2 Mon Sep 17 00:00:00 2001 From: nisanharamati Date: Fri, 7 Feb 2014 16:30:21 -0800 Subject: [PATCH] try..except on UTF8Type serialize for pre-encoded strings Wrap ustr.decode() with a try..except block to handle failure to encode strings of already-encoded unicode. --- cassandra/cqltypes.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py index 216f9164..39294822 100644 --- a/cassandra/cqltypes.py +++ b/cassandra/cqltypes.py @@ -528,15 +528,11 @@ class UTF8Type(_CassandraType): @staticmethod def serialize(ustr): - # ustr.encode('utf8') fails when the string is already encoded - # this is common if your data comes through other database drivers (e.g. odbc, psycopg2, etc.) - if isinstance(ustr, unicode): # check type explicitly. Unicode will encode successfuly. - return ustr.encode('utf8') - # otherwise, our input string is either already encoded or not unicode to begin with. - # since all cassandra strings are utf-8, we can validate that the ustr is already encoded utf-8 by decoding it - else: - ustr.decode('utf-8') # will raise UnicodeDecodeError if not utf8 encoded byte string. - return ustr # definitely valid :) + try: + return ustr.encode('utf-8') + except UnicodeDecodeError: + # already utf-8 + return ustr class VarcharType(UTF8Type):