Fix bug causing KafkaProducer to double-compress message batches on retry

This commit is contained in:
Dana Powers
2016-07-13 09:15:57 -07:00
parent 594f7079da
commit ca9d2fabc3

View File

@@ -89,22 +89,29 @@ class MessageSetBuffer(object):
return self._buffer.tell() >= self._batch_size return self._buffer.tell() >= self._batch_size
def close(self): def close(self):
if self._compressor: # This method may be called multiple times on the same batch
# TODO: avoid copies with bytearray / memoryview # i.e., on retries
self._buffer.seek(4) # we need to make sure we only close it out once
msg = Message(self._compressor(self._buffer.read()), # otherwise compressed messages may be double-compressed
attributes=self._compression_attributes, # see Issue 718
magic=self._message_version) if not self._closed:
encoded = msg.encode() if self._compressor:
self._buffer.seek(4) # TODO: avoid copies with bytearray / memoryview
self._buffer.write(Int64.encode(0)) # offset 0 for wrapper msg self._buffer.seek(4)
self._buffer.write(Int32.encode(len(encoded))) msg = Message(self._compressor(self._buffer.read()),
self._buffer.write(encoded) attributes=self._compression_attributes,
magic=self._message_version)
encoded = msg.encode()
self._buffer.seek(4)
self._buffer.write(Int64.encode(0)) # offset 0 for wrapper msg
self._buffer.write(Int32.encode(len(encoded)))
self._buffer.write(encoded)
# Update the message set size, and return ready for full read()
size = self._buffer.tell() - 4
self._buffer.seek(0)
self._buffer.write(Int32.encode(size))
# Update the message set size, and return ready for full read()
size = self._buffer.tell() - 4
self._buffer.seek(0)
self._buffer.write(Int32.encode(size))
self._buffer.seek(0) self._buffer.seek(0)
self._closed = True self._closed = True