Merge pull request #329 from vshlapakov/feature-batch-msg-keys
Correct message keys for async batching mode
This commit is contained in:
@@ -58,7 +58,7 @@ def _send_upstream(queue, client, codec, batch_time, batch_size,
|
|||||||
# Adjust the timeout to match the remaining period
|
# Adjust the timeout to match the remaining period
|
||||||
count -= 1
|
count -= 1
|
||||||
timeout = send_at - time.time()
|
timeout = send_at - time.time()
|
||||||
msgset[topic_partition].append(msg)
|
msgset[topic_partition].append((msg, key))
|
||||||
|
|
||||||
# Send collected requests upstream
|
# Send collected requests upstream
|
||||||
reqs = []
|
reqs = []
|
||||||
@@ -192,7 +192,7 @@ class Producer(object):
|
|||||||
self.queue.put((TopicAndPartition(topic, partition), m, key))
|
self.queue.put((TopicAndPartition(topic, partition), m, key))
|
||||||
resp = []
|
resp = []
|
||||||
else:
|
else:
|
||||||
messages = create_message_set(msg, self.codec, key)
|
messages = create_message_set([(m, key) for m in msg], self.codec, key)
|
||||||
req = ProduceRequest(topic, partition, messages)
|
req = ProduceRequest(topic, partition, messages)
|
||||||
try:
|
try:
|
||||||
resp = self.client.send_produce_request([req], acks=self.req_acks,
|
resp = self.client.send_produce_request([req], acks=self.req_acks,
|
||||||
|
@@ -559,7 +559,7 @@ def create_gzip_message(payloads, key=None):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
message_set = KafkaProtocol._encode_message_set(
|
message_set = KafkaProtocol._encode_message_set(
|
||||||
[create_message(payload, key) for payload in payloads])
|
[create_message(payload, pl_key) for payload, pl_key in payloads])
|
||||||
|
|
||||||
gzipped = gzip_encode(message_set)
|
gzipped = gzip_encode(message_set)
|
||||||
codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP
|
codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP
|
||||||
@@ -580,7 +580,7 @@ def create_snappy_message(payloads, key=None):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
message_set = KafkaProtocol._encode_message_set(
|
message_set = KafkaProtocol._encode_message_set(
|
||||||
[create_message(payload, key) for payload in payloads])
|
[create_message(payload, pl_key) for payload, pl_key in payloads])
|
||||||
|
|
||||||
snapped = snappy_encode(message_set)
|
snapped = snappy_encode(message_set)
|
||||||
codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY
|
codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY
|
||||||
@@ -595,7 +595,7 @@ def create_message_set(messages, codec=CODEC_NONE, key=None):
|
|||||||
return a list containing a single codec-encoded message.
|
return a list containing a single codec-encoded message.
|
||||||
"""
|
"""
|
||||||
if codec == CODEC_NONE:
|
if codec == CODEC_NONE:
|
||||||
return [create_message(m, key) for m in messages]
|
return [create_message(m, k) for m, k in messages]
|
||||||
elif codec == CODEC_GZIP:
|
elif codec == CODEC_GZIP:
|
||||||
return [create_gzip_message(messages, key)]
|
return [create_gzip_message(messages, key)]
|
||||||
elif codec == CODEC_SNAPPY:
|
elif codec == CODEC_SNAPPY:
|
||||||
|
@@ -71,9 +71,9 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
start_offset = self.current_offset(self.topic, 0)
|
start_offset = self.current_offset(self.topic, 0)
|
||||||
|
|
||||||
message1 = create_gzip_message([
|
message1 = create_gzip_message([
|
||||||
("Gzipped 1 %d" % i).encode('utf-8') for i in range(100)])
|
(("Gzipped 1 %d" % i).encode('utf-8'), None) for i in range(100)])
|
||||||
message2 = create_gzip_message([
|
message2 = create_gzip_message([
|
||||||
("Gzipped 2 %d" % i).encode('utf-8') for i in range(100)])
|
(("Gzipped 2 %d" % i).encode('utf-8'), None) for i in range(100)])
|
||||||
|
|
||||||
self.assert_produce_request(
|
self.assert_produce_request(
|
||||||
[ message1, message2 ],
|
[ message1, message2 ],
|
||||||
@@ -87,8 +87,8 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
start_offset = self.current_offset(self.topic, 0)
|
start_offset = self.current_offset(self.topic, 0)
|
||||||
|
|
||||||
self.assert_produce_request([
|
self.assert_produce_request([
|
||||||
create_snappy_message(["Snappy 1 %d" % i for i in range(100)]),
|
create_snappy_message([("Snappy 1 %d" % i, None) for i in range(100)]),
|
||||||
create_snappy_message(["Snappy 2 %d" % i for i in range(100)]),
|
create_snappy_message([("Snappy 2 %d" % i, None) for i in range(100)]),
|
||||||
],
|
],
|
||||||
start_offset,
|
start_offset,
|
||||||
200,
|
200,
|
||||||
@@ -102,13 +102,13 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
messages = [
|
messages = [
|
||||||
create_message(b"Just a plain message"),
|
create_message(b"Just a plain message"),
|
||||||
create_gzip_message([
|
create_gzip_message([
|
||||||
("Gzipped %d" % i).encode('utf-8') for i in range(100)]),
|
(("Gzipped %d" % i).encode('utf-8'), None) for i in range(100)]),
|
||||||
]
|
]
|
||||||
|
|
||||||
# All snappy integration tests fail with nosnappyjava
|
# All snappy integration tests fail with nosnappyjava
|
||||||
if False and has_snappy():
|
if False and has_snappy():
|
||||||
msg_count += 100
|
msg_count += 100
|
||||||
messages.append(create_snappy_message(["Snappy %d" % i for i in range(100)]))
|
messages.append(create_snappy_message([("Snappy %d" % i, None) for i in range(100)]))
|
||||||
|
|
||||||
self.assert_produce_request(messages, start_offset, msg_count)
|
self.assert_produce_request(messages, start_offset, msg_count)
|
||||||
|
|
||||||
@@ -118,7 +118,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
self.assert_produce_request([
|
self.assert_produce_request([
|
||||||
create_gzip_message([
|
create_gzip_message([
|
||||||
("Gzipped batch 1, message %d" % i).encode('utf-8')
|
(("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
|
||||||
for i in range(50000)])
|
for i in range(50000)])
|
||||||
],
|
],
|
||||||
start_offset,
|
start_offset,
|
||||||
@@ -127,7 +127,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
self.assert_produce_request([
|
self.assert_produce_request([
|
||||||
create_gzip_message([
|
create_gzip_message([
|
||||||
("Gzipped batch 1, message %d" % i).encode('utf-8')
|
(("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
|
||||||
for i in range(50000)])
|
for i in range(50000)])
|
||||||
],
|
],
|
||||||
start_offset+50000,
|
start_offset+50000,
|
||||||
|
@@ -32,7 +32,7 @@ class TestProtocol(unittest.TestCase):
|
|||||||
self.assertEqual(msg.value, payload)
|
self.assertEqual(msg.value, payload)
|
||||||
|
|
||||||
def test_create_gzip(self):
|
def test_create_gzip(self):
|
||||||
payloads = [b"v1", b"v2"]
|
payloads = [(b"v1", None), (b"v2", None)]
|
||||||
msg = create_gzip_message(payloads)
|
msg = create_gzip_message(payloads)
|
||||||
self.assertEqual(msg.magic, 0)
|
self.assertEqual(msg.magic, 0)
|
||||||
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
|
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
|
||||||
@@ -59,9 +59,39 @@ class TestProtocol(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(decoded, expect)
|
self.assertEqual(decoded, expect)
|
||||||
|
|
||||||
|
def test_create_gzip_keyed(self):
|
||||||
|
payloads = [(b"v1", b"k1"), (b"v2", b"k2")]
|
||||||
|
msg = create_gzip_message(payloads)
|
||||||
|
self.assertEqual(msg.magic, 0)
|
||||||
|
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
|
||||||
|
self.assertEqual(msg.key, None)
|
||||||
|
# Need to decode to check since gzipped payload is non-deterministic
|
||||||
|
decoded = gzip_decode(msg.value)
|
||||||
|
expect = b"".join([
|
||||||
|
struct.pack(">q", 0), # MsgSet Offset
|
||||||
|
struct.pack(">i", 18), # Msg Size
|
||||||
|
struct.pack(">i", 1474775406), # CRC
|
||||||
|
struct.pack(">bb", 0, 0), # Magic, flags
|
||||||
|
struct.pack(">i", 2), # Length of key
|
||||||
|
b"k1", # Key
|
||||||
|
struct.pack(">i", 2), # Length of value
|
||||||
|
b"v1", # Value
|
||||||
|
|
||||||
|
struct.pack(">q", 0), # MsgSet Offset
|
||||||
|
struct.pack(">i", 18), # Msg Size
|
||||||
|
struct.pack(">i", -16383415), # CRC
|
||||||
|
struct.pack(">bb", 0, 0), # Magic, flags
|
||||||
|
struct.pack(">i", 2), # Length of key
|
||||||
|
b"k2", # Key
|
||||||
|
struct.pack(">i", 2), # Length of value
|
||||||
|
b"v2", # Value
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertEqual(decoded, expect)
|
||||||
|
|
||||||
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
||||||
def test_create_snappy(self):
|
def test_create_snappy(self):
|
||||||
payloads = [b"v1", b"v2"]
|
payloads = [(b"v1", None), (b"v2", None)]
|
||||||
msg = create_snappy_message(payloads)
|
msg = create_snappy_message(payloads)
|
||||||
self.assertEqual(msg.magic, 0)
|
self.assertEqual(msg.magic, 0)
|
||||||
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
|
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
|
||||||
@@ -87,6 +117,36 @@ class TestProtocol(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(decoded, expect)
|
self.assertEqual(decoded, expect)
|
||||||
|
|
||||||
|
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
||||||
|
def test_create_snappy_keyed(self):
|
||||||
|
payloads = [(b"v1", b"k1"), (b"v2", b"k2")]
|
||||||
|
msg = create_snappy_message(payloads)
|
||||||
|
self.assertEqual(msg.magic, 0)
|
||||||
|
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
|
||||||
|
self.assertEqual(msg.key, None)
|
||||||
|
decoded = snappy_decode(msg.value)
|
||||||
|
expect = b"".join([
|
||||||
|
struct.pack(">q", 0), # MsgSet Offset
|
||||||
|
struct.pack(">i", 18), # Msg Size
|
||||||
|
struct.pack(">i", 1474775406), # CRC
|
||||||
|
struct.pack(">bb", 0, 0), # Magic, flags
|
||||||
|
struct.pack(">i", 2), # Length of key
|
||||||
|
b"k1", # Key
|
||||||
|
struct.pack(">i", 2), # Length of value
|
||||||
|
b"v1", # Value
|
||||||
|
|
||||||
|
struct.pack(">q", 0), # MsgSet Offset
|
||||||
|
struct.pack(">i", 18), # Msg Size
|
||||||
|
struct.pack(">i", -16383415), # CRC
|
||||||
|
struct.pack(">bb", 0, 0), # Magic, flags
|
||||||
|
struct.pack(">i", 2), # Length of key
|
||||||
|
b"k2", # Key
|
||||||
|
struct.pack(">i", 2), # Length of value
|
||||||
|
b"v2", # Value
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertEqual(decoded, expect)
|
||||||
|
|
||||||
def test_encode_message_header(self):
|
def test_encode_message_header(self):
|
||||||
expect = b"".join([
|
expect = b"".join([
|
||||||
struct.pack(">h", 10), # API Key
|
struct.pack(">h", 10), # API Key
|
||||||
@@ -701,7 +761,7 @@ class TestProtocol(unittest.TestCase):
|
|||||||
yield
|
yield
|
||||||
|
|
||||||
def test_create_message_set(self):
|
def test_create_message_set(self):
|
||||||
messages = [1, 2, 3]
|
messages = [(1, "k1"), (2, "k2"), (3, "k3")]
|
||||||
|
|
||||||
# Default codec is CODEC_NONE. Expect list of regular messages.
|
# Default codec is CODEC_NONE. Expect list of regular messages.
|
||||||
expect = [sentinel.message] * len(messages)
|
expect = [sentinel.message] * len(messages)
|
||||||
|
Reference in New Issue
Block a user