Merge branch 'teach_producers_about_compression' into producer_compression
Conflicts: servers/0.8.0/kafka-src test/test_unit.py
This commit is contained in:
@@ -11,7 +11,10 @@ from multiprocessing import Queue, Process
|
||||
|
||||
from kafka.common import ProduceRequest, TopicAndPartition
|
||||
from kafka.partitioner import HashedPartitioner
|
||||
from kafka.protocol import create_message
|
||||
from kafka.protocol import (
|
||||
CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, ALL_CODECS,
|
||||
create_message, create_gzip_message, create_snappy_message,
|
||||
)
|
||||
|
||||
log = logging.getLogger("kafka")
|
||||
|
||||
@@ -21,7 +24,7 @@ BATCH_SEND_MSG_COUNT = 20
|
||||
STOP_ASYNC_PRODUCER = -1
|
||||
|
||||
|
||||
def _send_upstream(queue, client, batch_time, batch_size,
|
||||
def _send_upstream(queue, client, codec, batch_time, batch_size,
|
||||
req_acks, ack_timeout):
|
||||
"""
|
||||
Listen on the queue for a specified number of messages or till
|
||||
@@ -62,7 +65,14 @@ def _send_upstream(queue, client, batch_time, batch_size,
|
||||
|
||||
# Send collected requests upstream
|
||||
reqs = []
|
||||
for topic_partition, messages in msgset.items():
|
||||
for topic_partition, msg in msgset.items():
|
||||
if codec == CODEC_GZIP:
|
||||
messages = [create_gzip_message(msg)]
|
||||
elif codec == CODEC_SNAPPY:
|
||||
messages = [create_snappy_message(msg)]
|
||||
else:
|
||||
messages = [create_message(m) for m in msg]
|
||||
|
||||
req = ProduceRequest(topic_partition.topic,
|
||||
topic_partition.partition,
|
||||
messages)
|
||||
@@ -102,6 +112,7 @@ class Producer(object):
|
||||
def __init__(self, client, async=False,
|
||||
req_acks=ACK_AFTER_LOCAL_WRITE,
|
||||
ack_timeout=DEFAULT_ACK_TIMEOUT,
|
||||
codec=None,
|
||||
batch_send=False,
|
||||
batch_send_every_n=BATCH_SEND_MSG_COUNT,
|
||||
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
|
||||
@@ -119,11 +130,17 @@ class Producer(object):
|
||||
self.req_acks = req_acks
|
||||
self.ack_timeout = ack_timeout
|
||||
|
||||
if codec is None:
|
||||
codec = CODEC_NONE
|
||||
assert codec in ALL_CODECS
|
||||
self.codec = codec
|
||||
|
||||
if self.async:
|
||||
self.queue = Queue() # Messages are sent through this queue
|
||||
self.proc = Process(target=_send_upstream,
|
||||
args=(self.queue,
|
||||
self.client.copy(),
|
||||
self.codec,
|
||||
batch_send_every_t,
|
||||
batch_send_every_n,
|
||||
self.req_acks,
|
||||
@@ -139,11 +156,16 @@ class Producer(object):
|
||||
"""
|
||||
if self.async:
|
||||
for m in msg:
|
||||
self.queue.put((TopicAndPartition(topic, partition),
|
||||
create_message(m)))
|
||||
self.queue.put((TopicAndPartition(topic, partition), m))
|
||||
resp = []
|
||||
else:
|
||||
messages = [create_message(m) for m in msg]
|
||||
if self.codec == CODEC_GZIP:
|
||||
messages = [create_gzip_message(msg)]
|
||||
elif self.codec == CODEC_SNAPPY:
|
||||
messages = [create_snappy_message(msg)]
|
||||
else:
|
||||
messages = [create_message(m) for m in msg]
|
||||
|
||||
req = ProduceRequest(topic, partition, messages)
|
||||
try:
|
||||
resp = self.client.send_produce_request([req], acks=self.req_acks,
|
||||
@@ -168,7 +190,7 @@ class Producer(object):
|
||||
|
||||
class SimpleProducer(Producer):
|
||||
"""
|
||||
A simple, round-robbin producer. Each message goes to exactly one partition
|
||||
A simple, round-robin producer. Each message goes to exactly one partition
|
||||
|
||||
Params:
|
||||
client - The Kafka client instance to use
|
||||
@@ -189,6 +211,7 @@ class SimpleProducer(Producer):
|
||||
def __init__(self, client, async=False,
|
||||
req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
|
||||
ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
|
||||
codec=None,
|
||||
batch_send=False,
|
||||
batch_send_every_n=BATCH_SEND_MSG_COUNT,
|
||||
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL,
|
||||
@@ -196,7 +219,7 @@ class SimpleProducer(Producer):
|
||||
self.partition_cycles = {}
|
||||
self.random_start = random_start
|
||||
super(SimpleProducer, self).__init__(client, async, req_acks,
|
||||
ack_timeout, batch_send,
|
||||
ack_timeout, codec, batch_send,
|
||||
batch_send_every_n,
|
||||
batch_send_every_t)
|
||||
|
||||
@@ -241,6 +264,7 @@ class KeyedProducer(Producer):
|
||||
def __init__(self, client, partitioner=None, async=False,
|
||||
req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
|
||||
ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
|
||||
codec=None,
|
||||
batch_send=False,
|
||||
batch_send_every_n=BATCH_SEND_MSG_COUNT,
|
||||
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
|
||||
@@ -250,7 +274,7 @@ class KeyedProducer(Producer):
|
||||
self.partitioners = {}
|
||||
|
||||
super(KeyedProducer, self).__init__(client, async, req_acks,
|
||||
ack_timeout, batch_send,
|
||||
ack_timeout, codec, batch_send,
|
||||
batch_send_every_n,
|
||||
batch_send_every_t)
|
||||
|
||||
|
||||
@@ -18,6 +18,12 @@ from kafka.util import (
|
||||
|
||||
log = logging.getLogger("kafka")
|
||||
|
||||
ATTRIBUTE_CODEC_MASK = 0x03
|
||||
CODEC_NONE = 0x00
|
||||
CODEC_GZIP = 0x01
|
||||
CODEC_SNAPPY = 0x02
|
||||
ALL_CODECS = (CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY)
|
||||
|
||||
|
||||
class KafkaProtocol(object):
|
||||
"""
|
||||
@@ -32,11 +38,6 @@ class KafkaProtocol(object):
|
||||
OFFSET_COMMIT_KEY = 8
|
||||
OFFSET_FETCH_KEY = 9
|
||||
|
||||
ATTRIBUTE_CODEC_MASK = 0x03
|
||||
CODEC_NONE = 0x00
|
||||
CODEC_GZIP = 0x01
|
||||
CODEC_SNAPPY = 0x02
|
||||
|
||||
###################
|
||||
# Private API #
|
||||
###################
|
||||
@@ -150,17 +151,17 @@ class KafkaProtocol(object):
|
||||
(key, cur) = read_int_string(data, cur)
|
||||
(value, cur) = read_int_string(data, cur)
|
||||
|
||||
codec = att & KafkaProtocol.ATTRIBUTE_CODEC_MASK
|
||||
codec = att & ATTRIBUTE_CODEC_MASK
|
||||
|
||||
if codec == KafkaProtocol.CODEC_NONE:
|
||||
if codec == CODEC_NONE:
|
||||
yield (offset, Message(magic, att, key, value))
|
||||
|
||||
elif codec == KafkaProtocol.CODEC_GZIP:
|
||||
elif codec == CODEC_GZIP:
|
||||
gz = gzip_decode(value)
|
||||
for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz):
|
||||
yield (offset, msg)
|
||||
|
||||
elif codec == KafkaProtocol.CODEC_SNAPPY:
|
||||
elif codec == CODEC_SNAPPY:
|
||||
snp = snappy_decode(value)
|
||||
for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp):
|
||||
yield (offset, msg)
|
||||
@@ -543,7 +544,7 @@ def create_gzip_message(payloads, key=None):
|
||||
[create_message(payload) for payload in payloads])
|
||||
|
||||
gzipped = gzip_encode(message_set)
|
||||
codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP
|
||||
codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP
|
||||
|
||||
return Message(0, 0x00 | codec, key, gzipped)
|
||||
|
||||
@@ -564,6 +565,6 @@ def create_snappy_message(payloads, key=None):
|
||||
[create_message(payload) for payload in payloads])
|
||||
|
||||
snapped = snappy_encode(message_set)
|
||||
codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY
|
||||
codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY
|
||||
|
||||
return Message(0, 0x00 | codec, key, snapped)
|
||||
|
||||
@@ -16,7 +16,8 @@ from kafka.codec import (
|
||||
snappy_encode, snappy_decode
|
||||
)
|
||||
from kafka.protocol import (
|
||||
create_gzip_message, create_message, create_snappy_message, KafkaProtocol
|
||||
create_gzip_message, create_message, create_snappy_message, KafkaProtocol,
|
||||
ATTRIBUTE_CODEC_MASK, CODEC_GZIP, CODEC_SNAPPY
|
||||
)
|
||||
|
||||
class TestProtocol(unittest2.TestCase):
|
||||
@@ -33,8 +34,7 @@ class TestProtocol(unittest2.TestCase):
|
||||
payloads = ["v1", "v2"]
|
||||
msg = create_gzip_message(payloads)
|
||||
self.assertEqual(msg.magic, 0)
|
||||
self.assertEqual(msg.attributes, KafkaProtocol.ATTRIBUTE_CODEC_MASK &
|
||||
KafkaProtocol.CODEC_GZIP)
|
||||
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
|
||||
self.assertEqual(msg.key, None)
|
||||
# Need to decode to check since gzipped payload is non-deterministic
|
||||
decoded = gzip_decode(msg.value)
|
||||
@@ -63,8 +63,7 @@ class TestProtocol(unittest2.TestCase):
|
||||
payloads = ["v1", "v2"]
|
||||
msg = create_snappy_message(payloads)
|
||||
self.assertEqual(msg.magic, 0)
|
||||
self.assertEqual(msg.attributes, KafkaProtocol.ATTRIBUTE_CODEC_MASK &
|
||||
KafkaProtocol.CODEC_SNAPPY)
|
||||
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
|
||||
self.assertEqual(msg.key, None)
|
||||
decoded = snappy_decode(msg.value)
|
||||
expect = "".join([
|
||||
|
||||
Reference in New Issue
Block a user