Auto-adjusting consumer fetch size
Related to #42 Adds new ConsumerFetchSizeTooSmall exception that is thrown when `_decode_message_set_iter` gets a BufferUnderflowError but has not yet yielded a message In this event, SimpleConsumer will increase the fetch size by 1.5 and continue the fetching loop while _not_ increasing the offset (basically just retries the request with a larger fetch size) Once the consumer fetch size has been increased, it will remain increased while SimpleConsumer fetches from that partition
This commit is contained in:
@@ -12,7 +12,7 @@ from kafka.common import (
|
||||
)
|
||||
|
||||
from kafka.util import (
|
||||
ReentrantTimer
|
||||
ReentrantTimer, ConsumerFetchSizeTooSmall
|
||||
)
|
||||
|
||||
log = logging.getLogger("kafka")
|
||||
@@ -357,17 +357,20 @@ class SimpleConsumer(Consumer):
|
||||
if self.fetch_started[partition]:
|
||||
offset += 1
|
||||
|
||||
fetch_size = self.fetch_min_bytes
|
||||
|
||||
while True:
|
||||
req = FetchRequest(self.topic, partition, offset, self.fetch_min_bytes)
|
||||
req = FetchRequest(self.topic, partition, offset, fetch_size)
|
||||
|
||||
(resp,) = self.client.send_fetch_request([req],
|
||||
max_wait_time=self.fetch_max_wait_time,
|
||||
min_bytes=self.fetch_min_bytes)
|
||||
min_bytes=fetch_size)
|
||||
|
||||
assert resp.topic == self.topic
|
||||
assert resp.partition == partition
|
||||
|
||||
next_offset = None
|
||||
try:
|
||||
for message in resp.messages:
|
||||
next_offset = message.offset
|
||||
|
||||
@@ -380,6 +383,13 @@ class SimpleConsumer(Consumer):
|
||||
self.fetch_started[partition] = True
|
||||
self.offsets[partition] = message.offset
|
||||
yield message
|
||||
except ConsumerFetchSizeTooSmall, e:
|
||||
log.warn("Fetch size is too small, increasing by 1.5x and retrying")
|
||||
fetch_size *= 1.5
|
||||
continue
|
||||
except ConsumerNoMoreData, e:
|
||||
log.debug("Iteration was ended by %r", e)
|
||||
|
||||
if next_offset is None:
|
||||
break
|
||||
else:
|
||||
|
@@ -13,7 +13,7 @@ from kafka.common import (
|
||||
from kafka.util import (
|
||||
read_short_string, read_int_string, relative_unpack,
|
||||
write_short_string, write_int_string, group_by_topic_and_partition,
|
||||
BufferUnderflowError, ChecksumError
|
||||
BufferUnderflowError, ChecksumError, ConsumerFetchSizeTooSmall
|
||||
)
|
||||
|
||||
log = logging.getLogger("kafka")
|
||||
@@ -110,16 +110,20 @@ class KafkaProtocol(object):
|
||||
recurse easily.
|
||||
"""
|
||||
cur = 0
|
||||
read_message = False
|
||||
while cur < len(data):
|
||||
try:
|
||||
((offset, ), cur) = relative_unpack('>q', data, cur)
|
||||
(msg, cur) = read_int_string(data, cur)
|
||||
for (offset, message) in KafkaProtocol._decode_message(msg,
|
||||
offset):
|
||||
for (offset, message) in KafkaProtocol._decode_message(msg, offset):
|
||||
read_message = True
|
||||
yield OffsetAndMessage(offset, message)
|
||||
|
||||
except BufferUnderflowError:
|
||||
# If we get a partial read of a message, stop
|
||||
if read_message is False:
|
||||
# If we get a partial read of a message, but haven't yielded anyhting
|
||||
# there's a problem
|
||||
raise ConsumerFetchSizeTooSmall()
|
||||
else:
|
||||
raise StopIteration()
|
||||
|
||||
@classmethod
|
||||
|
@@ -73,6 +73,8 @@ class BufferUnderflowError(Exception):
|
||||
class ChecksumError(Exception):
|
||||
pass
|
||||
|
||||
class ConsumerFetchSizeTooSmall(Exception):
|
||||
pass
|
||||
|
||||
class ReentrantTimer(object):
|
||||
"""
|
||||
|
@@ -8,7 +8,6 @@ import random
|
||||
from kafka import * # noqa
|
||||
from kafka.common import * # noqa
|
||||
from kafka.codec import has_gzip, has_snappy
|
||||
|
||||
from .fixtures import ZookeeperFixture, KafkaFixture
|
||||
|
||||
|
||||
@@ -757,20 +756,15 @@ class TestConsumer(unittest.TestCase):
|
||||
self.assertEquals(resp.error, 0)
|
||||
self.assertEquals(resp.offset, 10)
|
||||
|
||||
# Consumer should still get all of them
|
||||
consumer = SimpleConsumer(self.client, "group1", "test_large_messages")
|
||||
it = consumer.__iter__()
|
||||
for i in range(10):
|
||||
self.assertEquals(messages1[i], it.next().message)
|
||||
|
||||
consumer = SimpleConsumer(self.client, "group2", "test_large_messages", fetch_size_bytes=5120)
|
||||
it = consumer.__iter__()
|
||||
for i in range(10):
|
||||
self.assertEquals(messages1[i], it.next().message)
|
||||
for i in range(10):
|
||||
self.assertEquals(messages2[i], it.next().message)
|
||||
all_messages = messages1 + messages2
|
||||
for i, message in enumerate(consumer):
|
||||
self.assertEquals(all_messages[i], message.message)
|
||||
self.assertEquals(i, 19)
|
||||
|
||||
def random_string(l):
|
||||
s = "".join(random.choice(string.printable) for i in xrange(l))
|
||||
s = "".join(random.choice(string.letters) for i in xrange(l))
|
||||
return s
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Reference in New Issue
Block a user