Merge pull request #329 from vshlapakov/feature-batch-msg-keys

Correct message keys for async batching mode
This commit is contained in:
Dana Powers
2015-03-29 15:36:57 -07:00
4 changed files with 76 additions and 16 deletions

View File

@@ -58,7 +58,7 @@ def _send_upstream(queue, client, codec, batch_time, batch_size,
# Adjust the timeout to match the remaining period # Adjust the timeout to match the remaining period
count -= 1 count -= 1
timeout = send_at - time.time() timeout = send_at - time.time()
msgset[topic_partition].append(msg) msgset[topic_partition].append((msg, key))
# Send collected requests upstream # Send collected requests upstream
reqs = [] reqs = []
@@ -192,7 +192,7 @@ class Producer(object):
self.queue.put((TopicAndPartition(topic, partition), m, key)) self.queue.put((TopicAndPartition(topic, partition), m, key))
resp = [] resp = []
else: else:
messages = create_message_set(msg, self.codec, key) messages = create_message_set([(m, key) for m in msg], self.codec, key)
req = ProduceRequest(topic, partition, messages) req = ProduceRequest(topic, partition, messages)
try: try:
resp = self.client.send_produce_request([req], acks=self.req_acks, resp = self.client.send_produce_request([req], acks=self.req_acks,

View File

@@ -559,7 +559,7 @@ def create_gzip_message(payloads, key=None):
""" """
message_set = KafkaProtocol._encode_message_set( message_set = KafkaProtocol._encode_message_set(
[create_message(payload, key) for payload in payloads]) [create_message(payload, pl_key) for payload, pl_key in payloads])
gzipped = gzip_encode(message_set) gzipped = gzip_encode(message_set)
codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP
@@ -580,7 +580,7 @@ def create_snappy_message(payloads, key=None):
""" """
message_set = KafkaProtocol._encode_message_set( message_set = KafkaProtocol._encode_message_set(
[create_message(payload, key) for payload in payloads]) [create_message(payload, pl_key) for payload, pl_key in payloads])
snapped = snappy_encode(message_set) snapped = snappy_encode(message_set)
codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY
@@ -595,7 +595,7 @@ def create_message_set(messages, codec=CODEC_NONE, key=None):
return a list containing a single codec-encoded message. return a list containing a single codec-encoded message.
""" """
if codec == CODEC_NONE: if codec == CODEC_NONE:
return [create_message(m, key) for m in messages] return [create_message(m, k) for m, k in messages]
elif codec == CODEC_GZIP: elif codec == CODEC_GZIP:
return [create_gzip_message(messages, key)] return [create_gzip_message(messages, key)]
elif codec == CODEC_SNAPPY: elif codec == CODEC_SNAPPY:

View File

@@ -71,9 +71,9 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
start_offset = self.current_offset(self.topic, 0) start_offset = self.current_offset(self.topic, 0)
message1 = create_gzip_message([ message1 = create_gzip_message([
("Gzipped 1 %d" % i).encode('utf-8') for i in range(100)]) (("Gzipped 1 %d" % i).encode('utf-8'), None) for i in range(100)])
message2 = create_gzip_message([ message2 = create_gzip_message([
("Gzipped 2 %d" % i).encode('utf-8') for i in range(100)]) (("Gzipped 2 %d" % i).encode('utf-8'), None) for i in range(100)])
self.assert_produce_request( self.assert_produce_request(
[ message1, message2 ], [ message1, message2 ],
@@ -87,8 +87,8 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
start_offset = self.current_offset(self.topic, 0) start_offset = self.current_offset(self.topic, 0)
self.assert_produce_request([ self.assert_produce_request([
create_snappy_message(["Snappy 1 %d" % i for i in range(100)]), create_snappy_message([("Snappy 1 %d" % i, None) for i in range(100)]),
create_snappy_message(["Snappy 2 %d" % i for i in range(100)]), create_snappy_message([("Snappy 2 %d" % i, None) for i in range(100)]),
], ],
start_offset, start_offset,
200, 200,
@@ -102,13 +102,13 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
messages = [ messages = [
create_message(b"Just a plain message"), create_message(b"Just a plain message"),
create_gzip_message([ create_gzip_message([
("Gzipped %d" % i).encode('utf-8') for i in range(100)]), (("Gzipped %d" % i).encode('utf-8'), None) for i in range(100)]),
] ]
# All snappy integration tests fail with nosnappyjava # All snappy integration tests fail with nosnappyjava
if False and has_snappy(): if False and has_snappy():
msg_count += 100 msg_count += 100
messages.append(create_snappy_message(["Snappy %d" % i for i in range(100)])) messages.append(create_snappy_message([("Snappy %d" % i, None) for i in range(100)]))
self.assert_produce_request(messages, start_offset, msg_count) self.assert_produce_request(messages, start_offset, msg_count)
@@ -118,7 +118,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
self.assert_produce_request([ self.assert_produce_request([
create_gzip_message([ create_gzip_message([
("Gzipped batch 1, message %d" % i).encode('utf-8') (("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
for i in range(50000)]) for i in range(50000)])
], ],
start_offset, start_offset,
@@ -127,7 +127,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
self.assert_produce_request([ self.assert_produce_request([
create_gzip_message([ create_gzip_message([
("Gzipped batch 1, message %d" % i).encode('utf-8') (("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
for i in range(50000)]) for i in range(50000)])
], ],
start_offset+50000, start_offset+50000,

View File

@@ -32,7 +32,7 @@ class TestProtocol(unittest.TestCase):
self.assertEqual(msg.value, payload) self.assertEqual(msg.value, payload)
def test_create_gzip(self): def test_create_gzip(self):
payloads = [b"v1", b"v2"] payloads = [(b"v1", None), (b"v2", None)]
msg = create_gzip_message(payloads) msg = create_gzip_message(payloads)
self.assertEqual(msg.magic, 0) self.assertEqual(msg.magic, 0)
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP) self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
@@ -59,9 +59,39 @@ class TestProtocol(unittest.TestCase):
self.assertEqual(decoded, expect) self.assertEqual(decoded, expect)
def test_create_gzip_keyed(self):
payloads = [(b"v1", b"k1"), (b"v2", b"k2")]
msg = create_gzip_message(payloads)
self.assertEqual(msg.magic, 0)
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
self.assertEqual(msg.key, None)
# Need to decode to check since gzipped payload is non-deterministic
decoded = gzip_decode(msg.value)
expect = b"".join([
struct.pack(">q", 0), # MsgSet Offset
struct.pack(">i", 18), # Msg Size
struct.pack(">i", 1474775406), # CRC
struct.pack(">bb", 0, 0), # Magic, flags
struct.pack(">i", 2), # Length of key
b"k1", # Key
struct.pack(">i", 2), # Length of value
b"v1", # Value
struct.pack(">q", 0), # MsgSet Offset
struct.pack(">i", 18), # Msg Size
struct.pack(">i", -16383415), # CRC
struct.pack(">bb", 0, 0), # Magic, flags
struct.pack(">i", 2), # Length of key
b"k2", # Key
struct.pack(">i", 2), # Length of value
b"v2", # Value
])
self.assertEqual(decoded, expect)
@unittest.skipUnless(has_snappy(), "Snappy not available") @unittest.skipUnless(has_snappy(), "Snappy not available")
def test_create_snappy(self): def test_create_snappy(self):
payloads = [b"v1", b"v2"] payloads = [(b"v1", None), (b"v2", None)]
msg = create_snappy_message(payloads) msg = create_snappy_message(payloads)
self.assertEqual(msg.magic, 0) self.assertEqual(msg.magic, 0)
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY) self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
@@ -87,6 +117,36 @@ class TestProtocol(unittest.TestCase):
self.assertEqual(decoded, expect) self.assertEqual(decoded, expect)
@unittest.skipUnless(has_snappy(), "Snappy not available")
def test_create_snappy_keyed(self):
payloads = [(b"v1", b"k1"), (b"v2", b"k2")]
msg = create_snappy_message(payloads)
self.assertEqual(msg.magic, 0)
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
self.assertEqual(msg.key, None)
decoded = snappy_decode(msg.value)
expect = b"".join([
struct.pack(">q", 0), # MsgSet Offset
struct.pack(">i", 18), # Msg Size
struct.pack(">i", 1474775406), # CRC
struct.pack(">bb", 0, 0), # Magic, flags
struct.pack(">i", 2), # Length of key
b"k1", # Key
struct.pack(">i", 2), # Length of value
b"v1", # Value
struct.pack(">q", 0), # MsgSet Offset
struct.pack(">i", 18), # Msg Size
struct.pack(">i", -16383415), # CRC
struct.pack(">bb", 0, 0), # Magic, flags
struct.pack(">i", 2), # Length of key
b"k2", # Key
struct.pack(">i", 2), # Length of value
b"v2", # Value
])
self.assertEqual(decoded, expect)
def test_encode_message_header(self): def test_encode_message_header(self):
expect = b"".join([ expect = b"".join([
struct.pack(">h", 10), # API Key struct.pack(">h", 10), # API Key
@@ -701,7 +761,7 @@ class TestProtocol(unittest.TestCase):
yield yield
def test_create_message_set(self): def test_create_message_set(self):
messages = [1, 2, 3] messages = [(1, "k1"), (2, "k2"), (3, "k3")]
# Default codec is CODEC_NONE. Expect list of regular messages. # Default codec is CODEC_NONE. Expect list of regular messages.
expect = [sentinel.message] * len(messages) expect = [sentinel.message] * len(messages)