Add kafka.serializer interfaces (#912)

This commit is contained in:
Dana Powers
2016-12-19 11:27:23 -08:00
committed by GitHub
parent f6291e655d
commit 655953fdac
5 changed files with 67 additions and 24 deletions

View File

@@ -26,6 +26,7 @@ from kafka.protocol import (
create_message, create_gzip_message, create_snappy_message)
from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner
from kafka.structs import TopicPartition, OffsetAndMetadata
from kafka.serializer import Serializer, Deserializer
# To be deprecated when KafkaProducer interface is released
from kafka.client import SimpleClient

View File

@@ -15,6 +15,7 @@ from kafka.metrics.stats import Avg, Count, Max, Rate
from kafka.protocol.fetch import FetchRequest
from kafka.protocol.message import PartialMessage
from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy
from kafka.serializer import Deserializer
from kafka.structs import TopicPartition
log = logging.getLogger(__name__)
@@ -507,7 +508,12 @@ class Fetcher(six.Iterator):
if absolute_base_offset >= 0:
inner_offset += absolute_base_offset
key, value = self._deserialize(inner_msg)
key = self._deserialize(
self.config['key_deserializer'],
tp.topic, inner_msg.key)
value = self._deserialize(
self.config['value_deserializer'],
tp.topic, inner_msg.value)
yield ConsumerRecord(tp.topic, tp.partition, inner_offset,
inner_timestamp, msg.timestamp_type,
key, value, inner_msg.crc,
@@ -515,7 +521,12 @@ class Fetcher(six.Iterator):
len(inner_msg.value) if inner_msg.value is not None else -1)
else:
key, value = self._deserialize(msg)
key = self._deserialize(
self.config['key_deserializer'],
tp.topic, msg.key)
value = self._deserialize(
self.config['value_deserializer'],
tp.topic, msg.value)
yield ConsumerRecord(tp.topic, tp.partition, offset,
msg.timestamp, msg.timestamp_type,
key, value, msg.crc,
@@ -541,16 +552,12 @@ class Fetcher(six.Iterator):
self._iterator = None
raise
def _deserialize(self, msg):
if self.config['key_deserializer']:
key = self.config['key_deserializer'](msg.key) # pylint: disable-msg=not-callable
else:
key = msg.key
if self.config['value_deserializer']:
value = self.config['value_deserializer'](msg.value) # pylint: disable-msg=not-callable
else:
value = msg.value
return key, value
def _deserialize(self, f, topic, bytes_):
if not f:
return bytes_
if isinstance(f, Deserializer):
return f.deserialize(topic, bytes_)
return f(bytes_)
def _send_offset_request(self, partition, timestamp):
"""Fetch a single offset before the given timestamp for the partition.

View File

@@ -13,6 +13,7 @@ from ..client_async import KafkaClient, selectors
from ..metrics import MetricConfig, Metrics
from ..partitioner.default import DefaultPartitioner
from ..protocol.message import Message, MessageSet
from ..serializer import Serializer
from ..structs import TopicPartition
from .future import FutureRecordMetadata, FutureProduceResult
from .record_accumulator import AtomicInteger, RecordAccumulator
@@ -485,7 +486,12 @@ class KafkaProducer(object):
# available
self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0)
key_bytes, value_bytes = self._serialize(topic, key, value)
key_bytes = self._serialize(
self.config['key_serializer'],
topic, key)
value_bytes = self._serialize(
self.config['value_serializer'],
topic, value)
partition = self._partition(topic, partition, key, value,
key_bytes, value_bytes)
@@ -606,17 +612,12 @@ class KafkaProducer(object):
else:
log.debug("_wait_on_metadata woke after %s secs.", elapsed)
def _serialize(self, topic, key, value):
# pylint: disable-msg=not-callable
if self.config['key_serializer']:
serialized_key = self.config['key_serializer'](key)
else:
serialized_key = key
if self.config['value_serializer']:
serialized_value = self.config['value_serializer'](value)
else:
serialized_value = value
return serialized_key, serialized_value
def _serialize(self, f, topic, data):
if not f:
return data
if isinstance(f, Serializer):
return f.serialize(topic, data)
return f(data)
def _partition(self, topic, partition, key, value,
serialized_key, serialized_value):

View File

@@ -0,0 +1,3 @@
from __future__ import absolute_import
from .abstract import Serializer, Deserializer

View File

@@ -0,0 +1,31 @@
from __future__ import absolute_import
import abc
class Serializer(object):
__meta__ = abc.ABCMeta
def __init__(self, **config):
pass
@abc.abstractmethod
def serialize(self, topic, value):
pass
def close(self):
pass
class Deserializer(object):
__meta__ = abc.ABCMeta
def __init__(self, **config):
pass
@abc.abstractmethod
def deserialize(self, topic, bytes_):
pass
def close(self):
pass