Add kafka.serializer interfaces (#912)
This commit is contained in:
@@ -26,6 +26,7 @@ from kafka.protocol import (
|
|||||||
create_message, create_gzip_message, create_snappy_message)
|
create_message, create_gzip_message, create_snappy_message)
|
||||||
from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner
|
from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner
|
||||||
from kafka.structs import TopicPartition, OffsetAndMetadata
|
from kafka.structs import TopicPartition, OffsetAndMetadata
|
||||||
|
from kafka.serializer import Serializer, Deserializer
|
||||||
|
|
||||||
# To be deprecated when KafkaProducer interface is released
|
# To be deprecated when KafkaProducer interface is released
|
||||||
from kafka.client import SimpleClient
|
from kafka.client import SimpleClient
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from kafka.metrics.stats import Avg, Count, Max, Rate
|
|||||||
from kafka.protocol.fetch import FetchRequest
|
from kafka.protocol.fetch import FetchRequest
|
||||||
from kafka.protocol.message import PartialMessage
|
from kafka.protocol.message import PartialMessage
|
||||||
from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy
|
from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy
|
||||||
|
from kafka.serializer import Deserializer
|
||||||
from kafka.structs import TopicPartition
|
from kafka.structs import TopicPartition
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@@ -507,7 +508,12 @@ class Fetcher(six.Iterator):
|
|||||||
if absolute_base_offset >= 0:
|
if absolute_base_offset >= 0:
|
||||||
inner_offset += absolute_base_offset
|
inner_offset += absolute_base_offset
|
||||||
|
|
||||||
key, value = self._deserialize(inner_msg)
|
key = self._deserialize(
|
||||||
|
self.config['key_deserializer'],
|
||||||
|
tp.topic, inner_msg.key)
|
||||||
|
value = self._deserialize(
|
||||||
|
self.config['value_deserializer'],
|
||||||
|
tp.topic, inner_msg.value)
|
||||||
yield ConsumerRecord(tp.topic, tp.partition, inner_offset,
|
yield ConsumerRecord(tp.topic, tp.partition, inner_offset,
|
||||||
inner_timestamp, msg.timestamp_type,
|
inner_timestamp, msg.timestamp_type,
|
||||||
key, value, inner_msg.crc,
|
key, value, inner_msg.crc,
|
||||||
@@ -515,7 +521,12 @@ class Fetcher(six.Iterator):
|
|||||||
len(inner_msg.value) if inner_msg.value is not None else -1)
|
len(inner_msg.value) if inner_msg.value is not None else -1)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
key, value = self._deserialize(msg)
|
key = self._deserialize(
|
||||||
|
self.config['key_deserializer'],
|
||||||
|
tp.topic, msg.key)
|
||||||
|
value = self._deserialize(
|
||||||
|
self.config['value_deserializer'],
|
||||||
|
tp.topic, msg.value)
|
||||||
yield ConsumerRecord(tp.topic, tp.partition, offset,
|
yield ConsumerRecord(tp.topic, tp.partition, offset,
|
||||||
msg.timestamp, msg.timestamp_type,
|
msg.timestamp, msg.timestamp_type,
|
||||||
key, value, msg.crc,
|
key, value, msg.crc,
|
||||||
@@ -541,16 +552,12 @@ class Fetcher(six.Iterator):
|
|||||||
self._iterator = None
|
self._iterator = None
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _deserialize(self, msg):
|
def _deserialize(self, f, topic, bytes_):
|
||||||
if self.config['key_deserializer']:
|
if not f:
|
||||||
key = self.config['key_deserializer'](msg.key) # pylint: disable-msg=not-callable
|
return bytes_
|
||||||
else:
|
if isinstance(f, Deserializer):
|
||||||
key = msg.key
|
return f.deserialize(topic, bytes_)
|
||||||
if self.config['value_deserializer']:
|
return f(bytes_)
|
||||||
value = self.config['value_deserializer'](msg.value) # pylint: disable-msg=not-callable
|
|
||||||
else:
|
|
||||||
value = msg.value
|
|
||||||
return key, value
|
|
||||||
|
|
||||||
def _send_offset_request(self, partition, timestamp):
|
def _send_offset_request(self, partition, timestamp):
|
||||||
"""Fetch a single offset before the given timestamp for the partition.
|
"""Fetch a single offset before the given timestamp for the partition.
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from ..client_async import KafkaClient, selectors
|
|||||||
from ..metrics import MetricConfig, Metrics
|
from ..metrics import MetricConfig, Metrics
|
||||||
from ..partitioner.default import DefaultPartitioner
|
from ..partitioner.default import DefaultPartitioner
|
||||||
from ..protocol.message import Message, MessageSet
|
from ..protocol.message import Message, MessageSet
|
||||||
|
from ..serializer import Serializer
|
||||||
from ..structs import TopicPartition
|
from ..structs import TopicPartition
|
||||||
from .future import FutureRecordMetadata, FutureProduceResult
|
from .future import FutureRecordMetadata, FutureProduceResult
|
||||||
from .record_accumulator import AtomicInteger, RecordAccumulator
|
from .record_accumulator import AtomicInteger, RecordAccumulator
|
||||||
@@ -485,7 +486,12 @@ class KafkaProducer(object):
|
|||||||
# available
|
# available
|
||||||
self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0)
|
self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0)
|
||||||
|
|
||||||
key_bytes, value_bytes = self._serialize(topic, key, value)
|
key_bytes = self._serialize(
|
||||||
|
self.config['key_serializer'],
|
||||||
|
topic, key)
|
||||||
|
value_bytes = self._serialize(
|
||||||
|
self.config['value_serializer'],
|
||||||
|
topic, value)
|
||||||
partition = self._partition(topic, partition, key, value,
|
partition = self._partition(topic, partition, key, value,
|
||||||
key_bytes, value_bytes)
|
key_bytes, value_bytes)
|
||||||
|
|
||||||
@@ -606,17 +612,12 @@ class KafkaProducer(object):
|
|||||||
else:
|
else:
|
||||||
log.debug("_wait_on_metadata woke after %s secs.", elapsed)
|
log.debug("_wait_on_metadata woke after %s secs.", elapsed)
|
||||||
|
|
||||||
def _serialize(self, topic, key, value):
|
def _serialize(self, f, topic, data):
|
||||||
# pylint: disable-msg=not-callable
|
if not f:
|
||||||
if self.config['key_serializer']:
|
return data
|
||||||
serialized_key = self.config['key_serializer'](key)
|
if isinstance(f, Serializer):
|
||||||
else:
|
return f.serialize(topic, data)
|
||||||
serialized_key = key
|
return f(data)
|
||||||
if self.config['value_serializer']:
|
|
||||||
serialized_value = self.config['value_serializer'](value)
|
|
||||||
else:
|
|
||||||
serialized_value = value
|
|
||||||
return serialized_key, serialized_value
|
|
||||||
|
|
||||||
def _partition(self, topic, partition, key, value,
|
def _partition(self, topic, partition, key, value,
|
||||||
serialized_key, serialized_value):
|
serialized_key, serialized_value):
|
||||||
|
|||||||
3
kafka/serializer/__init__.py
Normal file
3
kafka/serializer/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
from .abstract import Serializer, Deserializer
|
||||||
31
kafka/serializer/abstract.py
Normal file
31
kafka/serializer/abstract.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import abc
|
||||||
|
|
||||||
|
|
||||||
|
class Serializer(object):
|
||||||
|
__meta__ = abc.ABCMeta
|
||||||
|
|
||||||
|
def __init__(self, **config):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def serialize(self, topic, value):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Deserializer(object):
|
||||||
|
__meta__ = abc.ABCMeta
|
||||||
|
|
||||||
|
def __init__(self, **config):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def deserialize(self, topic, bytes_):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
pass
|
||||||
Reference in New Issue
Block a user