Add skip_double_compressed_messages option to KafkaConsumer
This commit is contained in:
@@ -39,6 +39,7 @@ class Fetcher(six.Iterator):
|
||||
'fetch_max_wait_ms': 500,
|
||||
'max_partition_fetch_bytes': 1048576,
|
||||
'check_crcs': True,
|
||||
'skip_double_compressed_messages': False,
|
||||
'iterator_refetch_records': 1, # undocumented -- interface may change
|
||||
'api_version': (0, 8, 0),
|
||||
}
|
||||
@@ -71,6 +72,13 @@ class Fetcher(six.Iterator):
|
||||
consumed. This ensures no on-the-wire or on-disk corruption to
|
||||
the messages occurred. This check adds some overhead, so it may
|
||||
be disabled in cases seeking extreme performance. Default: True
|
||||
skip_double_compressed_messages (bool): A bug in KafkaProducer
|
||||
caused some messages to be corrupted via double-compression.
|
||||
By default, the fetcher will return the messages as a compressed
|
||||
blob of bytes with a single offset, i.e. how the message was
|
||||
actually published to the cluster. If you prefer to have the
|
||||
fetcher automatically detect corrupt messages and skip them,
|
||||
set this option to True. Default: False.
|
||||
"""
|
||||
self.config = copy.copy(self.DEFAULT_CONFIG)
|
||||
for key in self.config:
|
||||
@@ -368,6 +376,10 @@ class Fetcher(six.Iterator):
|
||||
' double-compressed. This should not'
|
||||
' happen -- check your producers!',
|
||||
tp, offset)
|
||||
if self.config['skip_double_compressed_messages']:
|
||||
log.warning('Skipping double-compressed message at'
|
||||
' %s %d', tp, offset)
|
||||
continue
|
||||
|
||||
if msg.magic > 0:
|
||||
last_offset, _, _ = inner_mset[-1]
|
||||
|
@@ -123,6 +123,13 @@ class KafkaConsumer(six.Iterator):
|
||||
consumer_timeout_ms (int): number of milliseconds to block during
|
||||
message iteration before raising StopIteration (i.e., ending the
|
||||
iterator). Default -1 (block forever).
|
||||
skip_double_compressed_messages (bool): A bug in KafkaProducer <= 1.2.4
|
||||
caused some messages to be corrupted via double-compression.
|
||||
By default, the fetcher will return these messages as a compressed
|
||||
blob of bytes with a single offset, i.e. how the message was
|
||||
actually published to the cluster. If you prefer to have the
|
||||
fetcher automatically detect corrupt messages and skip them,
|
||||
set this option to True. Default: False.
|
||||
security_protocol (str): Protocol used to communicate with brokers.
|
||||
Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT.
|
||||
ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping
|
||||
@@ -189,6 +196,7 @@ class KafkaConsumer(six.Iterator):
|
||||
'send_buffer_bytes': None,
|
||||
'receive_buffer_bytes': None,
|
||||
'consumer_timeout_ms': -1,
|
||||
'skip_double_compressed_messages': False,
|
||||
'security_protocol': 'PLAINTEXT',
|
||||
'ssl_context': None,
|
||||
'ssl_check_hostname': True,
|
||||
|
Reference in New Issue
Block a user