Merge pull request #233 from dpkp/str_join_speedup

Improve string concatenation performance on pypy and python 3
This commit is contained in:
Mark Roberts
2014-09-11 01:08:25 -07:00
3 changed files with 91 additions and 71 deletions

View File

@@ -71,11 +71,13 @@ class KafkaProtocol(object):
Offset => int64 Offset => int64
MessageSize => int32 MessageSize => int32
""" """
message_set = b"" message_set = []
for message in messages: for message in messages:
encoded_message = KafkaProtocol._encode_message(message) encoded_message = KafkaProtocol._encode_message(message)
message_set += struct.pack('>qi%ds' % len(encoded_message), 0, len(encoded_message), encoded_message) message_set.append(struct.pack('>qi%ds' % len(encoded_message), 0,
return message_set len(encoded_message),
encoded_message))
return b''.join(message_set)
@classmethod @classmethod
def _encode_message(cls, message): def _encode_message(cls, message):
@@ -95,9 +97,11 @@ class KafkaProtocol(object):
Value => bytes Value => bytes
""" """
if message.magic == 0: if message.magic == 0:
msg = struct.pack('>BB', message.magic, message.attributes) msg = b''.join([
msg += write_int_string(message.key) struct.pack('>BB', message.magic, message.attributes),
msg += write_int_string(message.value) write_int_string(message.key),
write_int_string(message.value)
])
crc = crc32(msg) crc = crc32(msg)
msg = struct.pack('>I%ds' % len(msg), crc, msg) msg = struct.pack('>I%ds' % len(msg), crc, msg)
else: else:
@@ -197,21 +201,24 @@ class KafkaProtocol(object):
payloads = [] if payloads is None else payloads payloads = [] if payloads is None else payloads
grouped_payloads = group_by_topic_and_partition(payloads) grouped_payloads = group_by_topic_and_partition(payloads)
message = cls._encode_message_header(client_id, correlation_id, message = []
KafkaProtocol.PRODUCE_KEY) message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.PRODUCE_KEY))
message += struct.pack('>hii', acks, timeout, len(grouped_payloads)) message.append(struct.pack('>hii', acks, timeout,
len(grouped_payloads)))
for topic, topic_payloads in grouped_payloads.items(): for topic, topic_payloads in grouped_payloads.items():
message += struct.pack('>h%dsi' % len(topic), message.append(struct.pack('>h%dsi' % len(topic), len(topic), topic,
len(topic), topic, len(topic_payloads)) len(topic_payloads)))
for partition, payload in topic_payloads.items(): for partition, payload in topic_payloads.items():
msg_set = KafkaProtocol._encode_message_set(payload.messages) msg_set = KafkaProtocol._encode_message_set(payload.messages)
message += struct.pack('>ii%ds' % len(msg_set), partition, message.append(struct.pack('>ii%ds' % len(msg_set), partition,
len(msg_set), msg_set) len(msg_set), msg_set))
return struct.pack('>i%ds' % len(message), len(message), message) msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)
@classmethod @classmethod
def decode_produce_response(cls, data): def decode_produce_response(cls, data):
@@ -254,21 +261,23 @@ class KafkaProtocol(object):
payloads = [] if payloads is None else payloads payloads = [] if payloads is None else payloads
grouped_payloads = group_by_topic_and_partition(payloads) grouped_payloads = group_by_topic_and_partition(payloads)
message = cls._encode_message_header(client_id, correlation_id, message = []
KafkaProtocol.FETCH_KEY) message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.FETCH_KEY))
# -1 is the replica id # -1 is the replica id
message += struct.pack('>iiii', -1, max_wait_time, min_bytes, message.append(struct.pack('>iiii', -1, max_wait_time, min_bytes,
len(grouped_payloads)) len(grouped_payloads)))
for topic, topic_payloads in grouped_payloads.items(): for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic) message.append(write_short_string(topic))
message += struct.pack('>i', len(topic_payloads)) message.append(struct.pack('>i', len(topic_payloads)))
for partition, payload in topic_payloads.items(): for partition, payload in topic_payloads.items():
message += struct.pack('>iqi', partition, payload.offset, message.append(struct.pack('>iqi', partition, payload.offset,
payload.max_bytes) payload.max_bytes))
return struct.pack('>i%ds' % len(message), len(message), message) msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)
@classmethod @classmethod
def decode_fetch_response(cls, data): def decode_fetch_response(cls, data):
@@ -301,21 +310,23 @@ class KafkaProtocol(object):
payloads = [] if payloads is None else payloads payloads = [] if payloads is None else payloads
grouped_payloads = group_by_topic_and_partition(payloads) grouped_payloads = group_by_topic_and_partition(payloads)
message = cls._encode_message_header(client_id, correlation_id, message = []
KafkaProtocol.OFFSET_KEY) message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.OFFSET_KEY))
# -1 is the replica id # -1 is the replica id
message += struct.pack('>ii', -1, len(grouped_payloads)) message.append(struct.pack('>ii', -1, len(grouped_payloads)))
for topic, topic_payloads in grouped_payloads.items(): for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic) message.append(write_short_string(topic))
message += struct.pack('>i', len(topic_payloads)) message.append(struct.pack('>i', len(topic_payloads)))
for partition, payload in topic_payloads.items(): for partition, payload in topic_payloads.items():
message += struct.pack('>iqi', partition, payload.time, message.append(struct.pack('>iqi', partition, payload.time,
payload.max_offsets) payload.max_offsets))
return struct.pack('>i%ds' % len(message), len(message), message) msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)
@classmethod @classmethod
def decode_offset_response(cls, data): def decode_offset_response(cls, data):
@@ -360,15 +371,17 @@ class KafkaProtocol(object):
else: else:
topics = payloads topics = payloads
message = cls._encode_message_header(client_id, correlation_id, message = []
KafkaProtocol.METADATA_KEY) message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.METADATA_KEY))
message += struct.pack('>i', len(topics)) message.append(struct.pack('>i', len(topics)))
for topic in topics: for topic in topics:
message += struct.pack('>h%ds' % len(topic), len(topic), topic) message.append(struct.pack('>h%ds' % len(topic), len(topic), topic))
return write_int_string(message) msg = b''.join(message)
return write_int_string(msg)
@classmethod @classmethod
def decode_metadata_response(cls, data): def decode_metadata_response(cls, data):
@@ -435,20 +448,22 @@ class KafkaProtocol(object):
""" """
grouped_payloads = group_by_topic_and_partition(payloads) grouped_payloads = group_by_topic_and_partition(payloads)
message = cls._encode_message_header(client_id, correlation_id, message = []
KafkaProtocol.OFFSET_COMMIT_KEY) message.append(cls._encode_message_header(client_id, correlation_id,
message += write_short_string(group) KafkaProtocol.OFFSET_COMMIT_KEY))
message += struct.pack('>i', len(grouped_payloads)) message.append(write_short_string(group))
message.append(struct.pack('>i', len(grouped_payloads)))
for topic, topic_payloads in grouped_payloads.items(): for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic) message.append(write_short_string(topic))
message += struct.pack('>i', len(topic_payloads)) message.append(struct.pack('>i', len(topic_payloads)))
for partition, payload in topic_payloads.items(): for partition, payload in topic_payloads.items():
message += struct.pack('>iq', partition, payload.offset) message.append(struct.pack('>iq', partition, payload.offset))
message += write_short_string(payload.metadata) message.append(write_short_string(payload.metadata))
return struct.pack('>i%ds' % len(message), len(message), message) msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)
@classmethod @classmethod
def decode_offset_commit_response(cls, data): def decode_offset_commit_response(cls, data):
@@ -484,20 +499,23 @@ class KafkaProtocol(object):
payloads: list of OffsetFetchRequest payloads: list of OffsetFetchRequest
""" """
grouped_payloads = group_by_topic_and_partition(payloads) grouped_payloads = group_by_topic_and_partition(payloads)
message = cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.OFFSET_FETCH_KEY)
message += write_short_string(group) message = []
message += struct.pack('>i', len(grouped_payloads)) message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.OFFSET_FETCH_KEY))
message.append(write_short_string(group))
message.append(struct.pack('>i', len(grouped_payloads)))
for topic, topic_payloads in grouped_payloads.items(): for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic) message.append(write_short_string(topic))
message += struct.pack('>i', len(topic_payloads)) message.append(struct.pack('>i', len(topic_payloads)))
for partition, payload in topic_payloads.items(): for partition, payload in topic_payloads.items():
message += struct.pack('>i', partition) message.append(struct.pack('>i', partition))
return struct.pack('>i%ds' % len(message), len(message), message) msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)
@classmethod @classmethod
def decode_offset_fetch_response(cls, data): def decode_offset_fetch_response(cls, data):

View File

@@ -2,6 +2,8 @@ import os
import time import time
import uuid import uuid
from six.moves import range
from kafka import ( from kafka import (
SimpleProducer, KeyedProducer, SimpleProducer, KeyedProducer,
create_message, create_gzip_message, create_snappy_message, create_message, create_gzip_message, create_snappy_message,

View File

@@ -453,31 +453,31 @@ class TestProtocol(unittest.TestCase):
self.assertEqual(encoded, expected) self.assertEqual(encoded, expected)
def _create_encoded_metadata_response(self, brokers, topics): def _create_encoded_metadata_response(self, brokers, topics):
encoded = struct.pack('>ii', 3, len(brokers)) encoded = []
encoded.append(struct.pack('>ii', 3, len(brokers)))
for broker in brokers: for broker in brokers:
encoded += struct.pack('>ih%dsi' % len(broker.host), broker.nodeId, encoded.append(struct.pack('>ih%dsi' % len(broker.host),
len(broker.host), broker.host, broker.port) broker.nodeId, len(broker.host),
broker.host, broker.port))
encoded += struct.pack('>i', len(topics)) encoded.append(struct.pack('>i', len(topics)))
for topic in topics: for topic in topics:
encoded += struct.pack('>hh%dsi' % len(topic.topic), encoded.append(struct.pack('>hh%dsi' % len(topic.topic),
topic.error, len(topic.topic), topic.error, len(topic.topic),
topic.topic, len(topic.partitions)) topic.topic, len(topic.partitions)))
for metadata in topic.partitions: for metadata in topic.partitions:
encoded += struct.pack('>hiii', encoded.append(struct.pack('>hiii', metadata.error,
metadata.error, metadata.partition, metadata.leader,
metadata.partition, len(metadata.replicas)))
metadata.leader,
len(metadata.replicas))
if len(metadata.replicas) > 0: if len(metadata.replicas) > 0:
encoded += struct.pack('>%di' % len(metadata.replicas), encoded.append(struct.pack('>%di' % len(metadata.replicas),
*metadata.replicas) *metadata.replicas))
encoded += struct.pack('>i', len(metadata.isr)) encoded.append(struct.pack('>i', len(metadata.isr)))
if len(metadata.isr) > 0: if len(metadata.isr) > 0:
encoded += struct.pack('>%di' % len(metadata.isr), encoded.append(struct.pack('>%di' % len(metadata.isr),
*metadata.isr) *metadata.isr))
return encoded return b''.join(encoded)
def test_decode_metadata_response(self): def test_decode_metadata_response(self):
node_brokers = [ node_brokers = [