Add support for LZ4 compressed messages using python-lz4 module
This commit is contained in:
@@ -79,6 +79,14 @@ for more details.
|
|||||||
>>> for i in range(1000):
|
>>> for i in range(1000):
|
||||||
... producer.send('foobar', b'msg %d' % i)
|
... producer.send('foobar', b'msg %d' % i)
|
||||||
|
|
||||||
|
Compression
|
||||||
|
***********
|
||||||
|
|
||||||
|
kafka-python supports gzip compression/decompression natively. To produce or
|
||||||
|
consume snappy and lz4 compressed messages, you must install `lz4` (`lz4-cffi`
|
||||||
|
if using pypy) and/or `python-snappy` (also requires snappy library).
|
||||||
|
See `Installation <http://kafka-python.readthedocs.org/en/master/install.html#optional-snappy-install>`_
|
||||||
|
for more information.
|
||||||
|
|
||||||
Protocol
|
Protocol
|
||||||
********
|
********
|
||||||
|
@@ -37,6 +37,17 @@ Using `setup.py` directly:
|
|||||||
cd kafka-python
|
cd kafka-python
|
||||||
python setup.py install
|
python setup.py install
|
||||||
|
|
||||||
|
Optional LZ4 install
|
||||||
|
********************
|
||||||
|
|
||||||
|
To enable LZ4 compression/decompression, install `lz4`:
|
||||||
|
|
||||||
|
>>> pip install lz4
|
||||||
|
|
||||||
|
Or `lz4-cffi` if using pypy:
|
||||||
|
|
||||||
|
>>> pip install lz4-cffi
|
||||||
|
|
||||||
|
|
||||||
Optional Snappy install
|
Optional Snappy install
|
||||||
***********************
|
***********************
|
||||||
|
@@ -13,6 +13,15 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
_HAS_SNAPPY = False
|
_HAS_SNAPPY = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import lz4
|
||||||
|
from lz4 import compress as lz4_encode
|
||||||
|
from lz4 import decompress as lz4_decode
|
||||||
|
except ImportError:
|
||||||
|
lz4 = None
|
||||||
|
lz4_encode = None
|
||||||
|
lz4_decode = None
|
||||||
|
|
||||||
|
|
||||||
def has_gzip():
|
def has_gzip():
|
||||||
return True
|
return True
|
||||||
@@ -22,6 +31,10 @@ def has_snappy():
|
|||||||
return _HAS_SNAPPY
|
return _HAS_SNAPPY
|
||||||
|
|
||||||
|
|
||||||
|
def has_lz4():
|
||||||
|
return lz4 is not None
|
||||||
|
|
||||||
|
|
||||||
def gzip_encode(payload, compresslevel=None):
|
def gzip_encode(payload, compresslevel=None):
|
||||||
if not compresslevel:
|
if not compresslevel:
|
||||||
compresslevel = 9
|
compresslevel = 9
|
||||||
|
@@ -5,8 +5,8 @@ import io
|
|||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from ..codec import (has_gzip, has_snappy,
|
from ..codec import (has_gzip, has_snappy, has_lz4,
|
||||||
gzip_encode, snappy_encode)
|
gzip_encode, snappy_encode, lz4_encode)
|
||||||
from ..protocol.types import Int32, Int64
|
from ..protocol.types import Int32, Int64
|
||||||
from ..protocol.message import MessageSet, Message
|
from ..protocol.message import MessageSet, Message
|
||||||
|
|
||||||
@@ -27,6 +27,7 @@ class MessageSetBuffer(object):
|
|||||||
_COMPRESSORS = {
|
_COMPRESSORS = {
|
||||||
'gzip': (has_gzip, gzip_encode, Message.CODEC_GZIP),
|
'gzip': (has_gzip, gzip_encode, Message.CODEC_GZIP),
|
||||||
'snappy': (has_snappy, snappy_encode, Message.CODEC_SNAPPY),
|
'snappy': (has_snappy, snappy_encode, Message.CODEC_SNAPPY),
|
||||||
|
'lz4': (has_lz4, lz4_encode, Message.CODEC_LZ4),
|
||||||
}
|
}
|
||||||
def __init__(self, buf, batch_size, compression_type=None):
|
def __init__(self, buf, batch_size, compression_type=None):
|
||||||
assert batch_size > 0, 'batch_size must be > 0'
|
assert batch_size > 0, 'batch_size must be > 0'
|
||||||
|
@@ -111,7 +111,7 @@ class KafkaProducer(object):
|
|||||||
remains alive. This is the strongest available guarantee.
|
remains alive. This is the strongest available guarantee.
|
||||||
If unset, defaults to acks=1.
|
If unset, defaults to acks=1.
|
||||||
compression_type (str): The compression type for all data generated by
|
compression_type (str): The compression type for all data generated by
|
||||||
the producer. Valid values are 'gzip', 'snappy', or None.
|
the producer. Valid values are 'gzip', 'snappy', 'lz4', or None.
|
||||||
Compression is of full batches of data, so the efficacy of batching
|
Compression is of full batches of data, so the efficacy of batching
|
||||||
will also impact the compression ratio (more batching means better
|
will also impact the compression ratio (more batching means better
|
||||||
compression). Default: None.
|
compression). Default: None.
|
||||||
|
@@ -114,7 +114,7 @@ class RecordAccumulator(object):
|
|||||||
In the current implementation, this setting is an approximation.
|
In the current implementation, this setting is an approximation.
|
||||||
Default: 33554432 (32MB)
|
Default: 33554432 (32MB)
|
||||||
compression_type (str): The compression type for all data generated by
|
compression_type (str): The compression type for all data generated by
|
||||||
the producer. Valid values are 'gzip', 'snappy', or None.
|
the producer. Valid values are 'gzip', 'snappy', 'lz4', or None.
|
||||||
Compression is of full batches of data, so the efficacy of batching
|
Compression is of full batches of data, so the efficacy of batching
|
||||||
will also impact the compression ratio (more batching means better
|
will also impact the compression ratio (more batching means better
|
||||||
compression). Default: None.
|
compression). Default: None.
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
import io
|
import io
|
||||||
|
|
||||||
from ..codec import gzip_decode, snappy_decode
|
from ..codec import (has_gzip, has_snappy, has_lz4,
|
||||||
|
gzip_decode, snappy_decode, lz4_decode)
|
||||||
from . import pickle
|
from . import pickle
|
||||||
from .struct import Struct
|
from .struct import Struct
|
||||||
from .types import (
|
from .types import (
|
||||||
@@ -20,6 +21,7 @@ class Message(Struct):
|
|||||||
CODEC_MASK = 0x03
|
CODEC_MASK = 0x03
|
||||||
CODEC_GZIP = 0x01
|
CODEC_GZIP = 0x01
|
||||||
CODEC_SNAPPY = 0x02
|
CODEC_SNAPPY = 0x02
|
||||||
|
CODEC_LZ4 = 0x03
|
||||||
HEADER_SIZE = 14 # crc(4), magic(1), attributes(1), key+value size(4*2)
|
HEADER_SIZE = 14 # crc(4), magic(1), attributes(1), key+value size(4*2)
|
||||||
|
|
||||||
def __init__(self, value, key=None, magic=0, attributes=0, crc=0):
|
def __init__(self, value, key=None, magic=0, attributes=0, crc=0):
|
||||||
@@ -61,11 +63,18 @@ class Message(Struct):
|
|||||||
|
|
||||||
def decompress(self):
|
def decompress(self):
|
||||||
codec = self.attributes & self.CODEC_MASK
|
codec = self.attributes & self.CODEC_MASK
|
||||||
assert codec in (self.CODEC_GZIP, self.CODEC_SNAPPY)
|
assert codec in (self.CODEC_GZIP, self.CODEC_SNAPPY, self.CODEC_LZ4)
|
||||||
if codec == self.CODEC_GZIP:
|
if codec == self.CODEC_GZIP:
|
||||||
|
assert has_gzip(), 'Gzip decompression unsupported'
|
||||||
raw_bytes = gzip_decode(self.value)
|
raw_bytes = gzip_decode(self.value)
|
||||||
else:
|
elif codec == self.CODEC_SNAPPY:
|
||||||
|
assert has_snappy(), 'Snappy decompression unsupported'
|
||||||
raw_bytes = snappy_decode(self.value)
|
raw_bytes = snappy_decode(self.value)
|
||||||
|
elif codec == self.CODEC_LZ4:
|
||||||
|
assert has_lz4(), 'LZ4 decompression unsupported'
|
||||||
|
raw_bytes = lz4_decode(self.value)
|
||||||
|
else:
|
||||||
|
raise Exception('This should be impossible')
|
||||||
|
|
||||||
return MessageSet.decode(raw_bytes, bytes_to_read=len(raw_bytes))
|
return MessageSet.decode(raw_bytes, bytes_to_read=len(raw_bytes))
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user