Auto-adjusting consumer fetch size
Related to #42 Adds new ConsumerFetchSizeTooSmall exception that is thrown when `_decode_message_set_iter` gets a BufferUnderflowError but has not yet yielded a message In this event, SimpleConsumer will increase the fetch size by 1.5 and continue the fetching loop while _not_ increasing the offset (basically just retries the request with a larger fetch size) Once the consumer fetch size has been increased, it will remain increased while SimpleConsumer fetches from that partition
This commit is contained in:
@@ -12,7 +12,7 @@ from kafka.common import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from kafka.util import (
|
from kafka.util import (
|
||||||
ReentrantTimer
|
ReentrantTimer, ConsumerFetchSizeTooSmall
|
||||||
)
|
)
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
log = logging.getLogger("kafka")
|
||||||
@@ -357,17 +357,20 @@ class SimpleConsumer(Consumer):
|
|||||||
if self.fetch_started[partition]:
|
if self.fetch_started[partition]:
|
||||||
offset += 1
|
offset += 1
|
||||||
|
|
||||||
|
fetch_size = self.fetch_min_bytes
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
req = FetchRequest(self.topic, partition, offset, self.fetch_min_bytes)
|
req = FetchRequest(self.topic, partition, offset, fetch_size)
|
||||||
|
|
||||||
(resp,) = self.client.send_fetch_request([req],
|
(resp,) = self.client.send_fetch_request([req],
|
||||||
max_wait_time=self.fetch_max_wait_time,
|
max_wait_time=self.fetch_max_wait_time,
|
||||||
min_bytes=self.fetch_min_bytes)
|
min_bytes=fetch_size)
|
||||||
|
|
||||||
assert resp.topic == self.topic
|
assert resp.topic == self.topic
|
||||||
assert resp.partition == partition
|
assert resp.partition == partition
|
||||||
|
|
||||||
next_offset = None
|
next_offset = None
|
||||||
|
try:
|
||||||
for message in resp.messages:
|
for message in resp.messages:
|
||||||
next_offset = message.offset
|
next_offset = message.offset
|
||||||
|
|
||||||
@@ -380,6 +383,13 @@ class SimpleConsumer(Consumer):
|
|||||||
self.fetch_started[partition] = True
|
self.fetch_started[partition] = True
|
||||||
self.offsets[partition] = message.offset
|
self.offsets[partition] = message.offset
|
||||||
yield message
|
yield message
|
||||||
|
except ConsumerFetchSizeTooSmall, e:
|
||||||
|
log.warn("Fetch size is too small, increasing by 1.5x and retrying")
|
||||||
|
fetch_size *= 1.5
|
||||||
|
continue
|
||||||
|
except ConsumerNoMoreData, e:
|
||||||
|
log.debug("Iteration was ended by %r", e)
|
||||||
|
|
||||||
if next_offset is None:
|
if next_offset is None:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
@@ -13,7 +13,7 @@ from kafka.common import (
|
|||||||
from kafka.util import (
|
from kafka.util import (
|
||||||
read_short_string, read_int_string, relative_unpack,
|
read_short_string, read_int_string, relative_unpack,
|
||||||
write_short_string, write_int_string, group_by_topic_and_partition,
|
write_short_string, write_int_string, group_by_topic_and_partition,
|
||||||
BufferUnderflowError, ChecksumError
|
BufferUnderflowError, ChecksumError, ConsumerFetchSizeTooSmall
|
||||||
)
|
)
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
log = logging.getLogger("kafka")
|
||||||
@@ -110,16 +110,20 @@ class KafkaProtocol(object):
|
|||||||
recurse easily.
|
recurse easily.
|
||||||
"""
|
"""
|
||||||
cur = 0
|
cur = 0
|
||||||
|
read_message = False
|
||||||
while cur < len(data):
|
while cur < len(data):
|
||||||
try:
|
try:
|
||||||
((offset, ), cur) = relative_unpack('>q', data, cur)
|
((offset, ), cur) = relative_unpack('>q', data, cur)
|
||||||
(msg, cur) = read_int_string(data, cur)
|
(msg, cur) = read_int_string(data, cur)
|
||||||
for (offset, message) in KafkaProtocol._decode_message(msg,
|
for (offset, message) in KafkaProtocol._decode_message(msg, offset):
|
||||||
offset):
|
read_message = True
|
||||||
yield OffsetAndMessage(offset, message)
|
yield OffsetAndMessage(offset, message)
|
||||||
|
|
||||||
except BufferUnderflowError:
|
except BufferUnderflowError:
|
||||||
# If we get a partial read of a message, stop
|
if read_message is False:
|
||||||
|
# If we get a partial read of a message, but haven't yielded anyhting
|
||||||
|
# there's a problem
|
||||||
|
raise ConsumerFetchSizeTooSmall()
|
||||||
|
else:
|
||||||
raise StopIteration()
|
raise StopIteration()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@@ -73,6 +73,8 @@ class BufferUnderflowError(Exception):
|
|||||||
class ChecksumError(Exception):
|
class ChecksumError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class ConsumerFetchSizeTooSmall(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
class ReentrantTimer(object):
|
class ReentrantTimer(object):
|
||||||
"""
|
"""
|
||||||
|
@@ -8,7 +8,6 @@ import random
|
|||||||
from kafka import * # noqa
|
from kafka import * # noqa
|
||||||
from kafka.common import * # noqa
|
from kafka.common import * # noqa
|
||||||
from kafka.codec import has_gzip, has_snappy
|
from kafka.codec import has_gzip, has_snappy
|
||||||
|
|
||||||
from .fixtures import ZookeeperFixture, KafkaFixture
|
from .fixtures import ZookeeperFixture, KafkaFixture
|
||||||
|
|
||||||
|
|
||||||
@@ -757,20 +756,15 @@ class TestConsumer(unittest.TestCase):
|
|||||||
self.assertEquals(resp.error, 0)
|
self.assertEquals(resp.error, 0)
|
||||||
self.assertEquals(resp.offset, 10)
|
self.assertEquals(resp.offset, 10)
|
||||||
|
|
||||||
|
# Consumer should still get all of them
|
||||||
consumer = SimpleConsumer(self.client, "group1", "test_large_messages")
|
consumer = SimpleConsumer(self.client, "group1", "test_large_messages")
|
||||||
it = consumer.__iter__()
|
all_messages = messages1 + messages2
|
||||||
for i in range(10):
|
for i, message in enumerate(consumer):
|
||||||
self.assertEquals(messages1[i], it.next().message)
|
self.assertEquals(all_messages[i], message.message)
|
||||||
|
self.assertEquals(i, 19)
|
||||||
consumer = SimpleConsumer(self.client, "group2", "test_large_messages", fetch_size_bytes=5120)
|
|
||||||
it = consumer.__iter__()
|
|
||||||
for i in range(10):
|
|
||||||
self.assertEquals(messages1[i], it.next().message)
|
|
||||||
for i in range(10):
|
|
||||||
self.assertEquals(messages2[i], it.next().message)
|
|
||||||
|
|
||||||
def random_string(l):
|
def random_string(l):
|
||||||
s = "".join(random.choice(string.printable) for i in xrange(l))
|
s = "".join(random.choice(string.letters) for i in xrange(l))
|
||||||
return s
|
return s
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Reference in New Issue
Block a user