Use kafka.util.kafka_bytestring to encode utf-8 when necessary

This commit is contained in:
Dana Powers
2014-12-11 16:35:15 -08:00
parent 209a8f28e6
commit ed893c3bcf
2 changed files with 18 additions and 15 deletions

View File

@@ -15,6 +15,7 @@ from kafka.common import (
OffsetOutOfRangeError, RequestTimedOutError, KafkaMessage, ConsumerTimeout, OffsetOutOfRangeError, RequestTimedOutError, KafkaMessage, ConsumerTimeout,
FailedPayloadsError, KafkaUnavailableError, KafkaConfigurationError FailedPayloadsError, KafkaUnavailableError, KafkaConfigurationError
) )
from kafka.util import kafka_bytestring
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -225,18 +226,14 @@ class KafkaConsumer(object):
# Topic name str -- all partitions # Topic name str -- all partitions
if isinstance(arg, (six.string_types, six.binary_type)): if isinstance(arg, (six.string_types, six.binary_type)):
topic = arg topic = kafka_bytestring(arg)
if isinstance(topic, six.string_types):
topic = topic.encode('utf-8')
for partition in self._client.get_partition_ids_for_topic(arg): for partition in self._client.get_partition_ids_for_topic(arg):
self._consume_topic_partition(topic, partition) self._consume_topic_partition(topic, partition)
# (topic, partition [, offset]) tuple # (topic, partition [, offset]) tuple
elif isinstance(arg, tuple): elif isinstance(arg, tuple):
topic = arg[0] topic = kafka_bytestring(arg[0])
if isinstance(topic, six.string_types):
topic = topic.encode('utf-8')
partition = arg[1] partition = arg[1]
if len(arg) == 3: if len(arg) == 3:
offset = arg[2] offset = arg[2]
@@ -249,9 +246,7 @@ class KafkaConsumer(object):
# key can be string (a topic) # key can be string (a topic)
if isinstance(key, (six.string_types, six.binary_type)): if isinstance(key, (six.string_types, six.binary_type)):
topic = key topic = kafka_bytestring(key)
if isinstance(topic, six.string_types):
topic = topic.encode('utf-8')
# topic: partition # topic: partition
if isinstance(value, int): if isinstance(value, int):
@@ -267,9 +262,7 @@ class KafkaConsumer(object):
# (topic, partition): offset # (topic, partition): offset
elif isinstance(key, tuple): elif isinstance(key, tuple):
topic = key[0] topic = kafka_bytestring(key[0])
if isinstance(topic, six.string_types):
topic = topic.encode('utf-8')
partition = key[1] partition = key[1]
self._consume_topic_partition(topic, partition) self._consume_topic_partition(topic, partition)
self._offsets.fetch[key] = value self._offsets.fetch[key] = value
@@ -562,9 +555,7 @@ class KafkaConsumer(object):
# #
def _consume_topic_partition(self, topic, partition): def _consume_topic_partition(self, topic, partition):
if not isinstance(topic, six.binary_type): topic = kafka_bytestring(topic)
raise KafkaConfigurationError('Unknown topic type (%s) '
'-- expected bytes' % type(topic))
if not isinstance(partition, int): if not isinstance(partition, int):
raise KafkaConfigurationError('Unknown partition type (%s) ' raise KafkaConfigurationError('Unknown partition type (%s) '
'-- expected int' % type(partition)) '-- expected int' % type(partition))

View File

@@ -86,6 +86,18 @@ def group_by_topic_and_partition(tuples):
return out return out
def kafka_bytestring(s):
"""
Takes a string or bytes instance
Returns bytes, encoding strings in utf-8 as necessary
"""
if isinstance(s, six.binary_type):
return s
if isinstance(s, six.string_types):
return s.encode('utf-8')
raise TypeError(s)
class ReentrantTimer(object): class ReentrantTimer(object):
""" """
A timer that can be restarted, unlike threading.Timer A timer that can be restarted, unlike threading.Timer