Add kafka.serializer interfaces (#912)

This commit is contained in:
Dana Powers
2016-12-19 11:27:23 -08:00
committed by GitHub
parent f6291e655d
commit 655953fdac
5 changed files with 67 additions and 24 deletions

View File

@@ -26,6 +26,7 @@ from kafka.protocol import (
create_message, create_gzip_message, create_snappy_message) create_message, create_gzip_message, create_snappy_message)
from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner
from kafka.structs import TopicPartition, OffsetAndMetadata from kafka.structs import TopicPartition, OffsetAndMetadata
from kafka.serializer import Serializer, Deserializer
# To be deprecated when KafkaProducer interface is released # To be deprecated when KafkaProducer interface is released
from kafka.client import SimpleClient from kafka.client import SimpleClient

View File

@@ -15,6 +15,7 @@ from kafka.metrics.stats import Avg, Count, Max, Rate
from kafka.protocol.fetch import FetchRequest from kafka.protocol.fetch import FetchRequest
from kafka.protocol.message import PartialMessage from kafka.protocol.message import PartialMessage
from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy
from kafka.serializer import Deserializer
from kafka.structs import TopicPartition from kafka.structs import TopicPartition
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -507,7 +508,12 @@ class Fetcher(six.Iterator):
if absolute_base_offset >= 0: if absolute_base_offset >= 0:
inner_offset += absolute_base_offset inner_offset += absolute_base_offset
key, value = self._deserialize(inner_msg) key = self._deserialize(
self.config['key_deserializer'],
tp.topic, inner_msg.key)
value = self._deserialize(
self.config['value_deserializer'],
tp.topic, inner_msg.value)
yield ConsumerRecord(tp.topic, tp.partition, inner_offset, yield ConsumerRecord(tp.topic, tp.partition, inner_offset,
inner_timestamp, msg.timestamp_type, inner_timestamp, msg.timestamp_type,
key, value, inner_msg.crc, key, value, inner_msg.crc,
@@ -515,7 +521,12 @@ class Fetcher(six.Iterator):
len(inner_msg.value) if inner_msg.value is not None else -1) len(inner_msg.value) if inner_msg.value is not None else -1)
else: else:
key, value = self._deserialize(msg) key = self._deserialize(
self.config['key_deserializer'],
tp.topic, msg.key)
value = self._deserialize(
self.config['value_deserializer'],
tp.topic, msg.value)
yield ConsumerRecord(tp.topic, tp.partition, offset, yield ConsumerRecord(tp.topic, tp.partition, offset,
msg.timestamp, msg.timestamp_type, msg.timestamp, msg.timestamp_type,
key, value, msg.crc, key, value, msg.crc,
@@ -541,16 +552,12 @@ class Fetcher(six.Iterator):
self._iterator = None self._iterator = None
raise raise
def _deserialize(self, msg): def _deserialize(self, f, topic, bytes_):
if self.config['key_deserializer']: if not f:
key = self.config['key_deserializer'](msg.key) # pylint: disable-msg=not-callable return bytes_
else: if isinstance(f, Deserializer):
key = msg.key return f.deserialize(topic, bytes_)
if self.config['value_deserializer']: return f(bytes_)
value = self.config['value_deserializer'](msg.value) # pylint: disable-msg=not-callable
else:
value = msg.value
return key, value
def _send_offset_request(self, partition, timestamp): def _send_offset_request(self, partition, timestamp):
"""Fetch a single offset before the given timestamp for the partition. """Fetch a single offset before the given timestamp for the partition.

View File

@@ -13,6 +13,7 @@ from ..client_async import KafkaClient, selectors
from ..metrics import MetricConfig, Metrics from ..metrics import MetricConfig, Metrics
from ..partitioner.default import DefaultPartitioner from ..partitioner.default import DefaultPartitioner
from ..protocol.message import Message, MessageSet from ..protocol.message import Message, MessageSet
from ..serializer import Serializer
from ..structs import TopicPartition from ..structs import TopicPartition
from .future import FutureRecordMetadata, FutureProduceResult from .future import FutureRecordMetadata, FutureProduceResult
from .record_accumulator import AtomicInteger, RecordAccumulator from .record_accumulator import AtomicInteger, RecordAccumulator
@@ -485,7 +486,12 @@ class KafkaProducer(object):
# available # available
self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0) self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0)
key_bytes, value_bytes = self._serialize(topic, key, value) key_bytes = self._serialize(
self.config['key_serializer'],
topic, key)
value_bytes = self._serialize(
self.config['value_serializer'],
topic, value)
partition = self._partition(topic, partition, key, value, partition = self._partition(topic, partition, key, value,
key_bytes, value_bytes) key_bytes, value_bytes)
@@ -606,17 +612,12 @@ class KafkaProducer(object):
else: else:
log.debug("_wait_on_metadata woke after %s secs.", elapsed) log.debug("_wait_on_metadata woke after %s secs.", elapsed)
def _serialize(self, topic, key, value): def _serialize(self, f, topic, data):
# pylint: disable-msg=not-callable if not f:
if self.config['key_serializer']: return data
serialized_key = self.config['key_serializer'](key) if isinstance(f, Serializer):
else: return f.serialize(topic, data)
serialized_key = key return f(data)
if self.config['value_serializer']:
serialized_value = self.config['value_serializer'](value)
else:
serialized_value = value
return serialized_key, serialized_value
def _partition(self, topic, partition, key, value, def _partition(self, topic, partition, key, value,
serialized_key, serialized_value): serialized_key, serialized_value):

View File

@@ -0,0 +1,3 @@
from __future__ import absolute_import
from .abstract import Serializer, Deserializer

View File

@@ -0,0 +1,31 @@
from __future__ import absolute_import
import abc
class Serializer(object):
__meta__ = abc.ABCMeta
def __init__(self, **config):
pass
@abc.abstractmethod
def serialize(self, topic, value):
pass
def close(self):
pass
class Deserializer(object):
__meta__ = abc.ABCMeta
def __init__(self, **config):
pass
@abc.abstractmethod
def deserialize(self, topic, bytes_):
pass
def close(self):
pass