PEP8-ify most of the files
consumer.py and conn.py will be done later after pending merges
This commit is contained in:
@@ -18,10 +18,12 @@ from kafka.util import (
|
||||
|
||||
log = logging.getLogger("kafka")
|
||||
|
||||
|
||||
class KafkaProtocol(object):
|
||||
"""
|
||||
Class to encapsulate all of the protocol encoding/decoding. This class does not
|
||||
have any state associated with it, it is purely for organization.
|
||||
Class to encapsulate all of the protocol encoding/decoding.
|
||||
This class does not have any state associated with it, it is purely
|
||||
for organization.
|
||||
"""
|
||||
PRODUCE_KEY = 0
|
||||
FETCH_KEY = 1
|
||||
@@ -44,18 +46,18 @@ class KafkaProtocol(object):
|
||||
"""
|
||||
Encode the common request envelope
|
||||
"""
|
||||
return struct.pack('>hhih%ds' % len(client_id),
|
||||
return struct.pack('>hhih%ds' % len(client_id),
|
||||
request_key, # ApiKey
|
||||
0, # ApiVersion
|
||||
correlation_id, # CorrelationId
|
||||
len(client_id), #
|
||||
len(client_id),
|
||||
client_id) # ClientId
|
||||
|
||||
@classmethod
|
||||
def _encode_message_set(cls, messages):
|
||||
"""
|
||||
Encode a MessageSet. Unlike other arrays in the protocol, MessageSets are
|
||||
not length-prefixed
|
||||
Encode a MessageSet. Unlike other arrays in the protocol,
|
||||
MessageSets are not length-prefixed
|
||||
|
||||
Format
|
||||
======
|
||||
@@ -66,7 +68,8 @@ class KafkaProtocol(object):
|
||||
message_set = ""
|
||||
for message in messages:
|
||||
encoded_message = KafkaProtocol._encode_message(message)
|
||||
message_set += struct.pack('>qi%ds' % len(encoded_message), 0, len(encoded_message), encoded_message)
|
||||
message_set += struct.pack('>qi%ds' % len(encoded_message), 0,
|
||||
len(encoded_message), encoded_message)
|
||||
return message_set
|
||||
|
||||
@classmethod
|
||||
@@ -74,10 +77,10 @@ class KafkaProtocol(object):
|
||||
"""
|
||||
Encode a single message.
|
||||
|
||||
The magic number of a message is a format version number. The only supported
|
||||
magic number right now is zero
|
||||
The magic number of a message is a format version number.
|
||||
The only supported magic number right now is zero
|
||||
|
||||
Format
|
||||
Format
|
||||
======
|
||||
Message => Crc MagicByte Attributes Key Value
|
||||
Crc => int32
|
||||
@@ -96,24 +99,27 @@ class KafkaProtocol(object):
|
||||
raise Exception("Unexpected magic number: %d" % message.magic)
|
||||
return msg
|
||||
|
||||
|
||||
@classmethod
|
||||
def _decode_message_set_iter(cls, data):
|
||||
"""
|
||||
Iteratively decode a MessageSet
|
||||
|
||||
Reads repeated elements of (offset, message), calling decode_message to decode a
|
||||
single message. Since compressed messages contain futher MessageSets, these two methods
|
||||
have been decoupled so that they may recurse easily.
|
||||
Reads repeated elements of (offset, message), calling decode_message
|
||||
to decode a single message. Since compressed messages contain futher
|
||||
MessageSets, these two methods have been decoupled so that they may
|
||||
recurse easily.
|
||||
"""
|
||||
cur = 0
|
||||
while cur < len(data):
|
||||
try:
|
||||
((offset, ), cur) = relative_unpack('>q', data, cur)
|
||||
(msg, cur) = read_int_string(data, cur)
|
||||
for (offset, message) in KafkaProtocol._decode_message(msg, offset):
|
||||
for (offset, message) in KafkaProtocol._decode_message(msg,
|
||||
offset):
|
||||
yield OffsetAndMessage(offset, message)
|
||||
except BufferUnderflowError: # If we get a partial read of a message, stop
|
||||
|
||||
except BufferUnderflowError:
|
||||
# If we get a partial read of a message, stop
|
||||
raise StopIteration()
|
||||
|
||||
@classmethod
|
||||
@@ -121,9 +127,10 @@ class KafkaProtocol(object):
|
||||
"""
|
||||
Decode a single Message
|
||||
|
||||
The only caller of this method is decode_message_set_iter. They are decoupled to
|
||||
support nested messages (compressed MessageSets). The offset is actually read from
|
||||
decode_message_set_iter (it is part of the MessageSet payload).
|
||||
The only caller of this method is decode_message_set_iter.
|
||||
They are decoupled to support nested messages (compressed MessageSets).
|
||||
The offset is actually read from decode_message_set_iter (it is part
|
||||
of the MessageSet payload).
|
||||
"""
|
||||
((crc, magic, att), cur) = relative_unpack('>iBB', data, 0)
|
||||
if crc != zlib.crc32(data[4:]):
|
||||
@@ -131,23 +138,29 @@ class KafkaProtocol(object):
|
||||
|
||||
(key, cur) = read_int_string(data, cur)
|
||||
(value, cur) = read_int_string(data, cur)
|
||||
if att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_NONE:
|
||||
|
||||
codec = att & KafkaProtocol.ATTRIBUTE_CODEC_MASK
|
||||
|
||||
if codec == KafkaProtocol.CODEC_NONE:
|
||||
yield (offset, Message(magic, att, key, value))
|
||||
elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_GZIP:
|
||||
|
||||
elif codec == KafkaProtocol.CODEC_GZIP:
|
||||
gz = gzip_decode(value)
|
||||
for (offset, message) in KafkaProtocol._decode_message_set_iter(gz):
|
||||
yield (offset, message)
|
||||
elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_SNAPPY:
|
||||
for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz):
|
||||
yield (offset, msg)
|
||||
|
||||
elif codec == KafkaProtocol.CODEC_SNAPPY:
|
||||
snp = snappy_decode(value)
|
||||
for (offset, message) in KafkaProtocol._decode_message_set_iter(snp):
|
||||
yield (offset, message)
|
||||
for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp):
|
||||
yield (offset, msg)
|
||||
|
||||
##################
|
||||
# Public API #
|
||||
##################
|
||||
|
||||
@classmethod
|
||||
def encode_produce_request(cls, client_id, correlation_id, payloads=[], acks=1, timeout=1000):
|
||||
def encode_produce_request(cls, client_id, correlation_id,
|
||||
payloads=None, acks=1, timeout=1000):
|
||||
"""
|
||||
Encode some ProduceRequest structs
|
||||
|
||||
@@ -161,39 +174,53 @@ class KafkaProtocol(object):
|
||||
1: written to disk by the leader
|
||||
2+: waits for this many number of replicas to sync
|
||||
-1: waits for all replicas to be in sync
|
||||
timeout: Maximum time the server will wait for acks from replicas. This is _not_ a socket timeout
|
||||
timeout: Maximum time the server will wait for acks from replicas.
|
||||
This is _not_ a socket timeout
|
||||
"""
|
||||
payloads = [] if payloads is None else payloads
|
||||
grouped_payloads = group_by_topic_and_partition(payloads)
|
||||
message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.PRODUCE_KEY)
|
||||
|
||||
message = cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.PRODUCE_KEY)
|
||||
|
||||
message += struct.pack('>hii', acks, timeout, len(grouped_payloads))
|
||||
|
||||
for topic, topic_payloads in grouped_payloads.items():
|
||||
message += struct.pack('>h%dsi' % len(topic), len(topic), topic, len(topic_payloads))
|
||||
message += struct.pack('>h%dsi' % len(topic),
|
||||
len(topic), topic, len(topic_payloads))
|
||||
|
||||
for partition, payload in topic_payloads.items():
|
||||
message_set = KafkaProtocol._encode_message_set(payload.messages)
|
||||
message += struct.pack('>ii%ds' % len(message_set), partition, len(message_set), message_set)
|
||||
msg_set = KafkaProtocol._encode_message_set(payload.messages)
|
||||
message += struct.pack('>ii%ds' % len(msg_set), partition,
|
||||
len(msg_set), msg_set)
|
||||
|
||||
return struct.pack('>i%ds' % len(message), len(message), message)
|
||||
|
||||
@classmethod
|
||||
def decode_produce_response(cls, data):
|
||||
"""
|
||||
Decode bytes to a ProduceResponse
|
||||
Decode bytes to a ProduceResponse
|
||||
|
||||
Params
|
||||
======
|
||||
data: bytes to decode
|
||||
"""
|
||||
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
||||
|
||||
for i in range(num_topics):
|
||||
((strlen,), cur) = relative_unpack('>h', data, cur)
|
||||
topic = data[cur:cur+strlen]
|
||||
topic = data[cur:cur + strlen]
|
||||
cur += strlen
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
for i in range(num_partitions):
|
||||
((partition, error, offset), cur) = relative_unpack('>ihq', data, cur)
|
||||
((partition, error, offset), cur) = relative_unpack('>ihq',
|
||||
data, cur)
|
||||
|
||||
yield ProduceResponse(topic, partition, error, offset)
|
||||
|
||||
@classmethod
|
||||
def encode_fetch_request(cls, client_id, correlation_id, payloads=[], max_wait_time=100, min_bytes=4096):
|
||||
def encode_fetch_request(cls, client_id, correlation_id, payloads=None,
|
||||
max_wait_time=100, min_bytes=4096):
|
||||
"""
|
||||
Encodes some FetchRequest structs
|
||||
|
||||
@@ -203,17 +230,27 @@ class KafkaProtocol(object):
|
||||
correlation_id: string
|
||||
payloads: list of FetchRequest
|
||||
max_wait_time: int, how long to block waiting on min_bytes of data
|
||||
min_bytes: int, the minimum number of bytes to accumulate before returning the response
|
||||
min_bytes: int, the minimum number of bytes to accumulate before
|
||||
returning the response
|
||||
"""
|
||||
|
||||
|
||||
payloads = [] if payloads is None else payloads
|
||||
grouped_payloads = group_by_topic_and_partition(payloads)
|
||||
message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.FETCH_KEY)
|
||||
message += struct.pack('>iiii', -1, max_wait_time, min_bytes, len(grouped_payloads)) # -1 is the replica id
|
||||
|
||||
message = cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.FETCH_KEY)
|
||||
|
||||
# -1 is the replica id
|
||||
message += struct.pack('>iiii', -1, max_wait_time, min_bytes,
|
||||
len(grouped_payloads))
|
||||
|
||||
for topic, topic_payloads in grouped_payloads.items():
|
||||
message += write_short_string(topic)
|
||||
message += struct.pack('>i', len(topic_payloads))
|
||||
for partition, payload in topic_payloads.items():
|
||||
message += struct.pack('>iqi', partition, payload.offset, payload.max_bytes)
|
||||
message += struct.pack('>iqi', partition, payload.offset,
|
||||
payload.max_bytes)
|
||||
|
||||
return struct.pack('>i%ds' % len(message), len(message), message)
|
||||
|
||||
@classmethod
|
||||
@@ -226,25 +263,41 @@ class KafkaProtocol(object):
|
||||
data: bytes to decode
|
||||
"""
|
||||
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
||||
|
||||
for i in range(num_topics):
|
||||
(topic, cur) = read_short_string(data, cur)
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
|
||||
for i in range(num_partitions):
|
||||
((partition, error, highwater_mark_offset), cur) = relative_unpack('>ihq', data, cur)
|
||||
((partition, error, highwater_mark_offset), cur) = \
|
||||
relative_unpack('>ihq', data, cur)
|
||||
|
||||
(message_set, cur) = read_int_string(data, cur)
|
||||
yield FetchResponse(topic, partition, error, highwater_mark_offset,
|
||||
|
||||
yield FetchResponse(
|
||||
topic, partition, error,
|
||||
highwater_mark_offset,
|
||||
KafkaProtocol._decode_message_set_iter(message_set))
|
||||
|
||||
@classmethod
|
||||
def encode_offset_request(cls, client_id, correlation_id, payloads=[]):
|
||||
def encode_offset_request(cls, client_id, correlation_id, payloads=None):
|
||||
payloads = [] if payloads is None else payloads
|
||||
grouped_payloads = group_by_topic_and_partition(payloads)
|
||||
message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_KEY)
|
||||
message += struct.pack('>ii', -1, len(grouped_payloads)) # -1 is the replica id
|
||||
|
||||
message = cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.OFFSET_KEY)
|
||||
|
||||
# -1 is the replica id
|
||||
message += struct.pack('>ii', -1, len(grouped_payloads))
|
||||
|
||||
for topic, topic_payloads in grouped_payloads.items():
|
||||
message += write_short_string(topic)
|
||||
message += struct.pack('>i', len(topic_payloads))
|
||||
|
||||
for partition, payload in topic_payloads.items():
|
||||
message += struct.pack('>iqi', partition, payload.time, payload.max_offsets)
|
||||
message += struct.pack('>iqi', partition, payload.time,
|
||||
payload.max_offsets)
|
||||
|
||||
return struct.pack('>i%ds' % len(message), len(message), message)
|
||||
|
||||
@classmethod
|
||||
@@ -257,19 +310,24 @@ class KafkaProtocol(object):
|
||||
data: bytes to decode
|
||||
"""
|
||||
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
||||
|
||||
for i in range(num_topics):
|
||||
(topic, cur) = read_short_string(data, cur)
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
|
||||
for i in range(num_partitions):
|
||||
((partition, error, num_offsets,), cur) = relative_unpack('>ihi', data, cur)
|
||||
((partition, error, num_offsets,), cur) = \
|
||||
relative_unpack('>ihi', data, cur)
|
||||
|
||||
offsets = []
|
||||
for j in range(num_offsets):
|
||||
((offset,), cur) = relative_unpack('>q', data, cur)
|
||||
offsets.append(offset)
|
||||
|
||||
yield OffsetResponse(topic, partition, error, tuple(offsets))
|
||||
|
||||
@classmethod
|
||||
def encode_metadata_request(cls, client_id, correlation_id, topics=[]):
|
||||
def encode_metadata_request(cls, client_id, correlation_id, topics=None):
|
||||
"""
|
||||
Encode a MetadataRequest
|
||||
|
||||
@@ -279,10 +337,15 @@ class KafkaProtocol(object):
|
||||
correlation_id: string
|
||||
topics: list of strings
|
||||
"""
|
||||
message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.METADATA_KEY)
|
||||
topics = [] if topics is None else topics
|
||||
message = cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.METADATA_KEY)
|
||||
|
||||
message += struct.pack('>i', len(topics))
|
||||
|
||||
for topic in topics:
|
||||
message += struct.pack('>h%ds' % len(topic), len(topic), topic)
|
||||
|
||||
return write_int_string(message)
|
||||
|
||||
@classmethod
|
||||
@@ -307,22 +370,34 @@ class KafkaProtocol(object):
|
||||
# Topic info
|
||||
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
||||
topicMetadata = {}
|
||||
|
||||
for i in range(num_topics):
|
||||
((topicError,), cur) = relative_unpack('>h', data, cur)
|
||||
(topicName, cur) = read_short_string(data, cur)
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
partitionMetadata = {}
|
||||
|
||||
for j in range(num_partitions):
|
||||
((partitionErrorCode, partition, leader, numReplicas), cur) = relative_unpack('>hiii', data, cur)
|
||||
(replicas, cur) = relative_unpack('>%di' % numReplicas, data, cur)
|
||||
((partitionErrorCode, partition, leader, numReplicas), cur) = \
|
||||
relative_unpack('>hiii', data, cur)
|
||||
|
||||
(replicas, cur) = relative_unpack('>%di' % numReplicas,
|
||||
data, cur)
|
||||
|
||||
((numIsr,), cur) = relative_unpack('>i', data, cur)
|
||||
(isr, cur) = relative_unpack('>%di' % numIsr, data, cur)
|
||||
partitionMetadata[partition] = PartitionMetadata(topicName, partition, leader, replicas, isr)
|
||||
|
||||
partitionMetadata[partition] = \
|
||||
PartitionMetadata(topicName, partition, leader,
|
||||
replicas, isr)
|
||||
|
||||
topicMetadata[topicName] = partitionMetadata
|
||||
|
||||
return (brokers, topicMetadata)
|
||||
|
||||
@classmethod
|
||||
def encode_offset_commit_request(cls, client_id, correlation_id, group, payloads):
|
||||
def encode_offset_commit_request(cls, client_id, correlation_id,
|
||||
group, payloads):
|
||||
"""
|
||||
Encode some OffsetCommitRequest structs
|
||||
|
||||
@@ -333,16 +408,21 @@ class KafkaProtocol(object):
|
||||
group: string, the consumer group you are committing offsets for
|
||||
payloads: list of OffsetCommitRequest
|
||||
"""
|
||||
grouped_payloads= group_by_topic_and_partition(payloads)
|
||||
message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_COMMIT_KEY)
|
||||
grouped_payloads = group_by_topic_and_partition(payloads)
|
||||
|
||||
message = cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.OFFSET_COMMIT_KEY)
|
||||
message += write_short_string(group)
|
||||
message += struct.pack('>i', len(grouped_payloads))
|
||||
|
||||
for topic, topic_payloads in grouped_payloads.items():
|
||||
message += write_short_string(topic)
|
||||
message += struct.pack('>i', len(topic_payloads))
|
||||
|
||||
for partition, payload in topic_payloads.items():
|
||||
message += struct.pack('>iq', partition, payload.offset)
|
||||
message += write_short_string(payload.metadata)
|
||||
|
||||
return struct.pack('>i%ds' % len(message), len(message), message)
|
||||
|
||||
@classmethod
|
||||
@@ -357,15 +437,18 @@ class KafkaProtocol(object):
|
||||
((correlation_id,), cur) = relative_unpack('>i', data, 0)
|
||||
(client_id, cur) = read_short_string(data, cur)
|
||||
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
||||
|
||||
for i in xrange(num_topics):
|
||||
(topic, cur) = read_short_string(data, cur)
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
|
||||
for i in xrange(num_partitions):
|
||||
((partition, error), cur) = relative_unpack('>ih', data, cur)
|
||||
yield OffsetCommitResponse(topic, partition, error)
|
||||
|
||||
@classmethod
|
||||
def encode_offset_fetch_request(cls, client_id, correlation_id, group, payloads):
|
||||
def encode_offset_fetch_request(cls, client_id, correlation_id,
|
||||
group, payloads):
|
||||
"""
|
||||
Encode some OffsetFetchRequest structs
|
||||
|
||||
@@ -377,14 +460,19 @@ class KafkaProtocol(object):
|
||||
payloads: list of OffsetFetchRequest
|
||||
"""
|
||||
grouped_payloads = group_by_topic_and_partition(payloads)
|
||||
message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_FETCH_KEY)
|
||||
message = cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.OFFSET_FETCH_KEY)
|
||||
|
||||
message += write_short_string(group)
|
||||
message += struct.pack('>i', len(grouped_payloads))
|
||||
|
||||
for topic, topic_payloads in grouped_payloads.items():
|
||||
message += write_short_string(topic)
|
||||
message += struct.pack('>i', len(topic_payloads))
|
||||
|
||||
for partition, payload in topic_payloads.items():
|
||||
message += struct.pack('>i', partition)
|
||||
|
||||
return struct.pack('>i%ds' % len(message), len(message), message)
|
||||
|
||||
@classmethod
|
||||
@@ -400,14 +488,19 @@ class KafkaProtocol(object):
|
||||
((correlation_id,), cur) = relative_unpack('>i', data, 0)
|
||||
(client_id, cur) = read_short_string(data, cur)
|
||||
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
||||
|
||||
for i in range(num_topics):
|
||||
(topic, cur) = read_short_string(data, cur)
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
|
||||
for i in range(num_partitions):
|
||||
((partition, offset), cur) = relative_unpack('>iq', data, cur)
|
||||
(metadata, cur) = read_short_string(data, cur)
|
||||
((error,), cur) = relative_unpack('>h', data, cur)
|
||||
yield OffsetFetchResponse(topic, partition, offset, metadata, error)
|
||||
|
||||
yield OffsetFetchResponse(topic, partition, offset,
|
||||
metadata, error)
|
||||
|
||||
|
||||
def create_message(payload, key=None):
|
||||
"""
|
||||
@@ -420,6 +513,7 @@ def create_message(payload, key=None):
|
||||
"""
|
||||
return Message(0, 0, key, payload)
|
||||
|
||||
|
||||
def create_gzip_message(payloads, key=None):
|
||||
"""
|
||||
Construct a Gzipped Message containing multiple Messages
|
||||
@@ -433,9 +527,13 @@ def create_gzip_message(payloads, key=None):
|
||||
key: bytes, a key used for partition routing (optional)
|
||||
"""
|
||||
message_set = KafkaProtocol._encode_message_set(
|
||||
[create_message(payload) for payload in payloads])
|
||||
gzipped = gzip_encode(message_set)
|
||||
return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP), key, gzipped)
|
||||
[create_message(payload) for payload in payloads])
|
||||
|
||||
gzipped = gzip_encode(message_set)
|
||||
codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP
|
||||
|
||||
return Message(0, 0x00 | codec, key, gzipped)
|
||||
|
||||
|
||||
def create_snappy_message(payloads, key=None):
|
||||
"""
|
||||
@@ -450,6 +548,9 @@ def create_snappy_message(payloads, key=None):
|
||||
key: bytes, a key used for partition routing (optional)
|
||||
"""
|
||||
message_set = KafkaProtocol._encode_message_set(
|
||||
[create_message(payload) for payload in payloads])
|
||||
snapped = snappy_encode(message_set)
|
||||
return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY), key, snapped)
|
||||
[create_message(payload) for payload in payloads])
|
||||
|
||||
snapped = snappy_encode(message_set)
|
||||
codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY
|
||||
|
||||
return Message(0, 0x00 | codec, key, snapped)
|
||||
|
||||
Reference in New Issue
Block a user