Change KafkaProtocol to encode/decode Structs, not bytes
- add Payload to kafka.common Request/Responses namedtuples - OffsetFetch and OffsetCommit still need to be converted
This commit is contained in:
@@ -21,24 +21,24 @@ ConsumerMetadataResponse = namedtuple("ConsumerMetadataResponse",
|
||||
["error", "nodeId", "host", "port"])
|
||||
|
||||
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProduceAPI
|
||||
ProduceRequest = namedtuple("ProduceRequest",
|
||||
ProduceRequestPayload = namedtuple("ProduceRequestPayload",
|
||||
["topic", "partition", "messages"])
|
||||
|
||||
ProduceResponse = namedtuple("ProduceResponse",
|
||||
ProduceResponsePayload = namedtuple("ProduceResponsePayload",
|
||||
["topic", "partition", "error", "offset"])
|
||||
|
||||
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-FetchAPI
|
||||
FetchRequest = namedtuple("FetchRequest",
|
||||
FetchRequestPayload = namedtuple("FetchRequest",
|
||||
["topic", "partition", "offset", "max_bytes"])
|
||||
|
||||
FetchResponse = namedtuple("FetchResponse",
|
||||
FetchResponsePayload = namedtuple("FetchResponse",
|
||||
["topic", "partition", "error", "highwaterMark", "messages"])
|
||||
|
||||
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI
|
||||
OffsetRequest = namedtuple("OffsetRequest",
|
||||
OffsetRequestPayload = namedtuple("OffsetRequest",
|
||||
["topic", "partition", "time", "max_offsets"])
|
||||
|
||||
OffsetResponse = namedtuple("OffsetResponse",
|
||||
OffsetResponsePayload = namedtuple("OffsetResponse",
|
||||
["topic", "partition", "error", "offsets"])
|
||||
|
||||
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
|
||||
|
@@ -7,16 +7,21 @@ import six
|
||||
|
||||
from six.moves import xrange
|
||||
|
||||
import kafka.common
|
||||
import kafka.protocol.commit
|
||||
import kafka.protocol.fetch
|
||||
import kafka.protocol.message
|
||||
import kafka.protocol.metadata
|
||||
import kafka.protocol.offset
|
||||
import kafka.protocol.produce
|
||||
|
||||
from kafka.codec import (
|
||||
gzip_encode, gzip_decode, snappy_encode, snappy_decode
|
||||
)
|
||||
from kafka.common import (
|
||||
Message, OffsetAndMessage, TopicAndPartition,
|
||||
BrokerMetadata, TopicMetadata, PartitionMetadata,
|
||||
MetadataResponse, ProduceResponse, FetchResponse,
|
||||
OffsetResponse, OffsetCommitResponse, OffsetFetchResponse,
|
||||
ProtocolError, BufferUnderflowError, ChecksumError,
|
||||
ConsumerFetchSizeTooSmall, UnsupportedCodecError,
|
||||
OffsetCommitResponse, OffsetFetchResponse,
|
||||
ProtocolError, ChecksumError,
|
||||
UnsupportedCodecError,
|
||||
ConsumerMetadataResponse
|
||||
)
|
||||
from kafka.util import (
|
||||
@@ -114,41 +119,6 @@ class KafkaProtocol(object):
|
||||
raise ProtocolError("Unexpected magic number: %d" % message.magic)
|
||||
return msg
|
||||
|
||||
@classmethod
|
||||
def _decode_message_set_iter(cls, data):
|
||||
"""
|
||||
Iteratively decode a MessageSet
|
||||
|
||||
Reads repeated elements of (offset, message), calling decode_message
|
||||
to decode a single message. Since compressed messages contain futher
|
||||
MessageSets, these two methods have been decoupled so that they may
|
||||
recurse easily.
|
||||
"""
|
||||
cur = 0
|
||||
read_message = False
|
||||
while cur < len(data):
|
||||
try:
|
||||
((offset, ), cur) = relative_unpack('>q', data, cur)
|
||||
(msg, cur) = read_int_string(data, cur)
|
||||
for (offset, message) in KafkaProtocol._decode_message(msg, offset):
|
||||
read_message = True
|
||||
yield OffsetAndMessage(offset, message)
|
||||
except BufferUnderflowError:
|
||||
# NOTE: Not sure this is correct error handling:
|
||||
# Is it possible to get a BUE if the message set is somewhere
|
||||
# in the middle of the fetch response? If so, we probably have
|
||||
# an issue that's not fetch size too small.
|
||||
# Aren't we ignoring errors if we fail to unpack data by
|
||||
# raising StopIteration()?
|
||||
# If _decode_message() raises a ChecksumError, couldn't that
|
||||
# also be due to the fetch size being too small?
|
||||
if read_message is False:
|
||||
# If we get a partial read of a message, but haven't
|
||||
# yielded anything there's a problem
|
||||
raise ConsumerFetchSizeTooSmall()
|
||||
else:
|
||||
raise StopIteration()
|
||||
|
||||
@classmethod
|
||||
def _decode_message(cls, data, offset):
|
||||
"""
|
||||
@@ -169,7 +139,7 @@ class KafkaProtocol(object):
|
||||
codec = att & ATTRIBUTE_CODEC_MASK
|
||||
|
||||
if codec == CODEC_NONE:
|
||||
yield (offset, Message(magic, att, key, value))
|
||||
yield (offset, kafka.common.Message(magic, att, key, value))
|
||||
|
||||
elif codec == CODEC_GZIP:
|
||||
gz = gzip_decode(value)
|
||||
@@ -186,253 +156,143 @@ class KafkaProtocol(object):
|
||||
##################
|
||||
|
||||
@classmethod
|
||||
def encode_produce_request(cls, client_id, correlation_id,
|
||||
payloads=None, acks=1, timeout=1000):
|
||||
def encode_produce_request(cls, payloads=(), acks=1, timeout=1000):
|
||||
"""
|
||||
Encode some ProduceRequest structs
|
||||
Encode a ProduceRequest struct
|
||||
|
||||
Arguments:
|
||||
client_id: string
|
||||
correlation_id: int
|
||||
payloads: list of ProduceRequest
|
||||
payloads: list of ProduceRequestPayload
|
||||
acks: How "acky" you want the request to be
|
||||
0: immediate response
|
||||
1: written to disk by the leader
|
||||
2+: waits for this many number of replicas to sync
|
||||
0: immediate response
|
||||
-1: waits for all replicas to be in sync
|
||||
timeout: Maximum time the server will wait for acks from replicas.
|
||||
timeout: Maximum time (in ms) the server will wait for replica acks.
|
||||
This is _not_ a socket timeout
|
||||
|
||||
Returns: ProduceRequest
|
||||
"""
|
||||
payloads = [] if payloads is None else payloads
|
||||
grouped_payloads = group_by_topic_and_partition(payloads)
|
||||
if acks not in (1, 0, -1):
|
||||
raise ValueError('ProduceRequest acks (%s) must be 1, 0, -1' % acks)
|
||||
|
||||
message = []
|
||||
message.append(cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.PRODUCE_KEY))
|
||||
|
||||
message.append(struct.pack('>hii', acks, timeout,
|
||||
len(grouped_payloads)))
|
||||
|
||||
for topic, topic_payloads in grouped_payloads.items():
|
||||
message.append(struct.pack('>h%dsi' % len(topic), len(topic), topic,
|
||||
len(topic_payloads)))
|
||||
|
||||
for partition, payload in topic_payloads.items():
|
||||
msg_set = KafkaProtocol._encode_message_set(payload.messages)
|
||||
message.append(struct.pack('>ii%ds' % len(msg_set), partition,
|
||||
len(msg_set), msg_set))
|
||||
|
||||
msg = b''.join(message)
|
||||
return struct.pack('>i%ds' % len(msg), len(msg), msg)
|
||||
return kafka.protocol.produce.ProduceRequest(
|
||||
required_acks=acks,
|
||||
timeout=timeout,
|
||||
topics=[(
|
||||
topic,
|
||||
[(
|
||||
partition,
|
||||
[(0, 0, kafka.protocol.message.Message(msg.value, key=msg.key,
|
||||
magic=msg.magic,
|
||||
attributes=msg.attributes))
|
||||
for msg in payload.messages])
|
||||
for partition, payload in topic_payloads.items()])
|
||||
for topic, topic_payloads in group_by_topic_and_partition(payloads).items()])
|
||||
|
||||
@classmethod
|
||||
def decode_produce_response(cls, data):
|
||||
def decode_produce_response(cls, response):
|
||||
"""
|
||||
Decode bytes to a ProduceResponse
|
||||
Decode ProduceResponse to ProduceResponsePayload
|
||||
|
||||
Arguments:
|
||||
data: bytes to decode
|
||||
response: ProduceResponse
|
||||
|
||||
Return: list of ProduceResponsePayload
|
||||
"""
|
||||
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
||||
|
||||
for _ in range(num_topics):
|
||||
((strlen,), cur) = relative_unpack('>h', data, cur)
|
||||
topic = data[cur:cur + strlen]
|
||||
cur += strlen
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
for _ in range(num_partitions):
|
||||
((partition, error, offset), cur) = relative_unpack('>ihq',
|
||||
data, cur)
|
||||
|
||||
yield ProduceResponse(topic, partition, error, offset)
|
||||
return [
|
||||
kafka.common.ProduceResponsePayload(topic, partition, error, offset)
|
||||
for topic, partitions in response.topics
|
||||
for partition, error, offset in partitions
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def encode_fetch_request(cls, client_id, correlation_id, payloads=None,
|
||||
max_wait_time=100, min_bytes=4096):
|
||||
def encode_fetch_request(cls, payloads=(), max_wait_time=100, min_bytes=4096):
|
||||
"""
|
||||
Encodes some FetchRequest structs
|
||||
Encodes a FetchRequest struct
|
||||
|
||||
Arguments:
|
||||
client_id: string
|
||||
correlation_id: int
|
||||
payloads: list of FetchRequest
|
||||
max_wait_time: int, how long to block waiting on min_bytes of data
|
||||
min_bytes: int, the minimum number of bytes to accumulate before
|
||||
returning the response
|
||||
payloads: list of FetchRequestPayload
|
||||
max_wait_time (int, optional): ms to block waiting for min_bytes
|
||||
data. Defaults to 100.
|
||||
min_bytes (int, optional): minimum bytes required to return before
|
||||
max_wait_time. Defaults to 4096.
|
||||
|
||||
Return: FetchRequest
|
||||
"""
|
||||
|
||||
payloads = [] if payloads is None else payloads
|
||||
grouped_payloads = group_by_topic_and_partition(payloads)
|
||||
|
||||
message = []
|
||||
message.append(cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.FETCH_KEY))
|
||||
|
||||
# -1 is the replica id
|
||||
message.append(struct.pack('>iiii', -1, max_wait_time, min_bytes,
|
||||
len(grouped_payloads)))
|
||||
|
||||
for topic, topic_payloads in grouped_payloads.items():
|
||||
message.append(write_short_string(topic))
|
||||
message.append(struct.pack('>i', len(topic_payloads)))
|
||||
for partition, payload in topic_payloads.items():
|
||||
message.append(struct.pack('>iqi', partition, payload.offset,
|
||||
payload.max_bytes))
|
||||
|
||||
msg = b''.join(message)
|
||||
return struct.pack('>i%ds' % len(msg), len(msg), msg)
|
||||
return kafka.protocol.fetch.FetchRequest(
|
||||
replica_id=-1,
|
||||
max_wait_time=max_wait_time,
|
||||
min_bytes=min_bytes,
|
||||
topics=[(
|
||||
topic,
|
||||
[(
|
||||
partition,
|
||||
payload.offset,
|
||||
payload.max_bytes)
|
||||
for partition, payload in topic_payloads.items()])
|
||||
for topic, topic_payloads in group_by_topic_and_partition(payloads).items()])
|
||||
|
||||
@classmethod
|
||||
def decode_fetch_response(cls, data):
|
||||
def decode_fetch_response(cls, response):
|
||||
"""
|
||||
Decode bytes to a FetchResponse
|
||||
Decode FetchResponse struct to FetchResponsePayloads
|
||||
|
||||
Arguments:
|
||||
data: bytes to decode
|
||||
response: FetchResponse
|
||||
"""
|
||||
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
||||
|
||||
for _ in range(num_topics):
|
||||
(topic, cur) = read_short_string(data, cur)
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
|
||||
for j in range(num_partitions):
|
||||
((partition, error, highwater_mark_offset), cur) = \
|
||||
relative_unpack('>ihq', data, cur)
|
||||
|
||||
(message_set, cur) = read_int_string(data, cur)
|
||||
|
||||
yield FetchResponse(
|
||||
topic, partition, error,
|
||||
highwater_mark_offset,
|
||||
KafkaProtocol._decode_message_set_iter(message_set))
|
||||
return [
|
||||
kafka.common.FetchResponsePayload(
|
||||
topic, partition, error, highwater_offset, [
|
||||
kafka.common.OffsetAndMessage(offset, message)
|
||||
for offset, _, message in messages])
|
||||
for topic, partitions in response.topics
|
||||
for partition, error, highwater_offset, messages in partitions
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def encode_offset_request(cls, client_id, correlation_id, payloads=None):
|
||||
payloads = [] if payloads is None else payloads
|
||||
grouped_payloads = group_by_topic_and_partition(payloads)
|
||||
|
||||
message = []
|
||||
message.append(cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.OFFSET_KEY))
|
||||
|
||||
# -1 is the replica id
|
||||
message.append(struct.pack('>ii', -1, len(grouped_payloads)))
|
||||
|
||||
for topic, topic_payloads in grouped_payloads.items():
|
||||
message.append(write_short_string(topic))
|
||||
message.append(struct.pack('>i', len(topic_payloads)))
|
||||
|
||||
for partition, payload in topic_payloads.items():
|
||||
message.append(struct.pack('>iqi', partition, payload.time,
|
||||
payload.max_offsets))
|
||||
|
||||
msg = b''.join(message)
|
||||
return struct.pack('>i%ds' % len(msg), len(msg), msg)
|
||||
def encode_offset_request(cls, payloads=()):
|
||||
return kafka.protocol.offset.OffsetRequest(
|
||||
replica_id=-1,
|
||||
topics=[(
|
||||
topic,
|
||||
[(
|
||||
partition,
|
||||
payload.time,
|
||||
payload.max_offsets)
|
||||
for partition, payload in six.iteritems(topic_payloads)])
|
||||
for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))])
|
||||
|
||||
@classmethod
|
||||
def decode_offset_response(cls, data):
|
||||
def decode_offset_response(cls, response):
|
||||
"""
|
||||
Decode bytes to an OffsetResponse
|
||||
Decode OffsetResponse into OffsetResponsePayloads
|
||||
|
||||
Arguments:
|
||||
data: bytes to decode
|
||||
response: OffsetResponse
|
||||
|
||||
Returns: list of OffsetResponsePayloads
|
||||
"""
|
||||
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
||||
|
||||
for _ in range(num_topics):
|
||||
(topic, cur) = read_short_string(data, cur)
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
|
||||
for _ in range(num_partitions):
|
||||
((partition, error, num_offsets,), cur) = \
|
||||
relative_unpack('>ihi', data, cur)
|
||||
|
||||
offsets = []
|
||||
for k in range(num_offsets):
|
||||
((offset,), cur) = relative_unpack('>q', data, cur)
|
||||
offsets.append(offset)
|
||||
|
||||
yield OffsetResponse(topic, partition, error, tuple(offsets))
|
||||
return [
|
||||
kafka.common.OffsetResponsePayload(topic, partition, error, tuple(offsets))
|
||||
for topic, partitions in response.topics
|
||||
for partition, error, offsets in partitions
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def encode_metadata_request(cls, client_id, correlation_id, topics=None,
|
||||
payloads=None):
|
||||
def encode_metadata_request(cls, topics=(), payloads=None):
|
||||
"""
|
||||
Encode a MetadataRequest
|
||||
|
||||
Arguments:
|
||||
client_id: string
|
||||
correlation_id: int
|
||||
topics: list of strings
|
||||
"""
|
||||
if payloads is None:
|
||||
topics = [] if topics is None else topics
|
||||
else:
|
||||
if payloads is not None:
|
||||
topics = payloads
|
||||
|
||||
message = []
|
||||
message.append(cls._encode_message_header(client_id, correlation_id,
|
||||
KafkaProtocol.METADATA_KEY))
|
||||
|
||||
message.append(struct.pack('>i', len(topics)))
|
||||
|
||||
for topic in topics:
|
||||
message.append(struct.pack('>h%ds' % len(topic), len(topic), topic))
|
||||
|
||||
msg = b''.join(message)
|
||||
return write_int_string(msg)
|
||||
return kafka.protocol.metadata.MetadataRequest(topics)
|
||||
|
||||
@classmethod
|
||||
def decode_metadata_response(cls, data):
|
||||
"""
|
||||
Decode bytes to a MetadataResponse
|
||||
|
||||
Arguments:
|
||||
data: bytes to decode
|
||||
"""
|
||||
((correlation_id, numbrokers), cur) = relative_unpack('>ii', data, 0)
|
||||
|
||||
# Broker info
|
||||
brokers = []
|
||||
for _ in range(numbrokers):
|
||||
((nodeId, ), cur) = relative_unpack('>i', data, cur)
|
||||
(host, cur) = read_short_string(data, cur)
|
||||
((port,), cur) = relative_unpack('>i', data, cur)
|
||||
brokers.append(BrokerMetadata(nodeId, host, port))
|
||||
|
||||
# Topic info
|
||||
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
||||
topic_metadata = []
|
||||
|
||||
for _ in range(num_topics):
|
||||
((topic_error,), cur) = relative_unpack('>h', data, cur)
|
||||
(topic_name, cur) = read_short_string(data, cur)
|
||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||
partition_metadata = []
|
||||
|
||||
for _ in range(num_partitions):
|
||||
((partition_error_code, partition, leader, numReplicas), cur) = \
|
||||
relative_unpack('>hiii', data, cur)
|
||||
|
||||
(replicas, cur) = relative_unpack(
|
||||
'>%di' % numReplicas, data, cur)
|
||||
|
||||
((num_isr,), cur) = relative_unpack('>i', data, cur)
|
||||
(isr, cur) = relative_unpack('>%di' % num_isr, data, cur)
|
||||
|
||||
partition_metadata.append(
|
||||
PartitionMetadata(topic_name, partition, leader,
|
||||
replicas, isr, partition_error_code)
|
||||
)
|
||||
|
||||
topic_metadata.append(
|
||||
TopicMetadata(topic_name, topic_error, partition_metadata)
|
||||
)
|
||||
|
||||
return MetadataResponse(brokers, topic_metadata)
|
||||
def decode_metadata_response(cls, response):
|
||||
return response
|
||||
|
||||
@classmethod
|
||||
def encode_consumer_metadata_request(cls, client_id, correlation_id, payloads):
|
||||
@@ -587,7 +447,7 @@ def create_message(payload, key=None):
|
||||
key: bytes, a key used for partition routing (optional)
|
||||
|
||||
"""
|
||||
return Message(0, 0, key, payload)
|
||||
return kafka.common.Message(0, 0, key, payload)
|
||||
|
||||
|
||||
def create_gzip_message(payloads, key=None, compresslevel=None):
|
||||
@@ -608,7 +468,7 @@ def create_gzip_message(payloads, key=None, compresslevel=None):
|
||||
gzipped = gzip_encode(message_set, compresslevel=compresslevel)
|
||||
codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP
|
||||
|
||||
return Message(0, 0x00 | codec, key, gzipped)
|
||||
return kafka.common.Message(0, 0x00 | codec, key, gzipped)
|
||||
|
||||
|
||||
def create_snappy_message(payloads, key=None):
|
||||
@@ -629,7 +489,7 @@ def create_snappy_message(payloads, key=None):
|
||||
snapped = snappy_encode(message_set)
|
||||
codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY
|
||||
|
||||
return Message(0, 0x00 | codec, key, snapped)
|
||||
return kafka.common.Message(0, 0x00 | codec, key, snapped)
|
||||
|
||||
|
||||
def create_message_set(messages, codec=CODEC_NONE, key=None, compresslevel=None):
|
||||
|
Reference in New Issue
Block a user