Big code re-org

2013-03-30 00:28:00 -04:00
parent 3499e2f6ea
commit b6d98c07b4
11 changed files with 868 additions and 673 deletions
--- a/example.py
+++ b/example.py
@@ -1,6 +1,7 @@
 import logging
 from kafka.client import KafkaClient, FetchRequest, ProduceRequest
 from kafka.consumer import SimpleConsumer
 def produce_example(kafka):
    message = kafka.create_message("testing")
@@ -20,11 +21,8 @@ def produce_gz_example(kafka):
    kafka.send_message_set(request)
 def main():
-    kafka = KafkaClient("localhost", 9092)
+    client = KafkaClient("localhost", 9092)
-    produce_example(kafka)
+    consumer = SimpleConsumer(client, "test-group", "my-topic")
    produce_gz_example(kafka)
    consume_example(kafka)
    kafka.close()
 if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
--- a/kafka/NOTES.md
+++ b/kafka/NOTES.md
@@ -15,3 +15,18 @@ There are a few levels of abstraction:
 * Partitioned (run each message through a partitioning function)
 ** HashPartitioned
 ** FunctionPartition
 # Possible API
    client = KafkaClient("localhost", 9092)
    producer = KafkaProducer(client, "topic")
    producer.send_string("hello")
    consumer = KafkaConsumer(client, "group", "topic")
    consumer.seek(10, 2) # seek to beginning (lowest offset)
    consumer.commit() # commit it
    for msg in consumer.iter_messages():
        print msg
--- a/kafka/init.py
+++ b/kafka/init.py
@@ -1,11 +1,18 @@
 __title__ = 'kafka'
-__version__ = '0.1-alpha'
+__version__ = '0.2-alpha'
 __author__ = 'David Arthur'
 __license__ = 'Apache License 2.0'
 __copyright__ = 'Copyright 2012, David Arthur under Apache License, v2.0'
-from .client import (
+from kafka.client import KafkaClient 
-    KafkaClient 
+from kafka.conn import KafkaConnection
 from kafka.protocol import (
    create_message, create_gzip_message, create_snappy_message
 )
-from .codec import gzip_encode, gzip_decode
+from kafka.producer import SimpleProducer
-from .codec import snappy_encode, snappy_decode
+from kafka.consumer import SimpleConsumer
 __all__ = [
    'KafkaClient', 'KafkaConnection', 'SimpleProducer', 'SimpleConsumer',
    'create_message', 'create_gzip_message', 'create_snappy_message'
 ]
--- a/kafka/client.py
+++ b/kafka/client.py
@@ -1,588 +1,19 @@
 import base64
-from collections import namedtuple, defaultdict
+from collections import defaultdict
 from functools import partial
 from itertools import count, cycle
 import logging
 from operator import attrgetter
 import socket
 import struct
 import time
 import zlib
-from .codec import gzip_encode, gzip_decode
+from kafka.common import *
-from .codec import snappy_encode, snappy_decode
+from kafka.conn import KafkaConnection
-from .util import read_short_string, read_int_string
+from kafka.protocol import KafkaProtocol 
 from .util import relative_unpack
 from .util import write_short_string, write_int_string
 from .util import group_by_topic_and_partition
 from .util import BufferUnderflowError, ChecksumError
 log = logging.getLogger("kafka")
 ###############
 #   Structs   #
 ###############
 # Request payloads
 ProduceRequest = namedtuple("ProduceRequest", ["topic", "partition", "messages"])
 FetchRequest = namedtuple("FetchRequest", ["topic", "partition", "offset", "max_bytes"])
 OffsetRequest = namedtuple("OffsetRequest", ["topic", "partition", "time", "max_offsets"])
 OffsetCommitRequest = namedtuple("OffsetCommitRequest", ["topic", "partition", "offset", "metadata"])
 OffsetFetchRequest = namedtuple("OffsetFetchRequest", ["topic", "partition"])
 # Response payloads
 ProduceResponse = namedtuple("ProduceResponse", ["topic", "partition", "error", "offset"])
 FetchResponse = namedtuple("FetchResponse", ["topic", "partition", "error", "highwaterMark", "messages"])
 OffsetResponse = namedtuple("OffsetResponse", ["topic", "partition", "error", "offsets"])
 OffsetCommitResponse = namedtuple("OffsetCommitResponse", ["topic", "partition", "error"])
 OffsetFetchResponse = namedtuple("OffsetFetchResponse", ["topic", "partition", "offset", "metadata", "error"])
 BrokerMetadata = namedtuple("BrokerMetadata", ["nodeId", "host", "port"])
 PartitionMetadata = namedtuple("PartitionMetadata", ["topic", "partition", "leader", "replicas", "isr"])
 # Other useful structs 
 OffsetAndMessage = namedtuple("OffsetAndMessage", ["offset", "message"])
 Message = namedtuple("Message", ["magic", "attributes", "key", "value"])
 TopicAndPartition = namedtuple("TopicAndPartition", ["topic", "partition"])
 class ErrorMapping(object):
    # Many of these are not actually used by the client
    UNKNOWN                   = -1
    NO_ERROR                  = 0
    OFFSET_OUT_OF_RANGE       = 1
    INVALID_MESSAGE           = 2
    UNKNOWN_TOPIC_OR_PARTITON = 3
    INVALID_FETCH_SIZE        = 4
    LEADER_NOT_AVAILABLE      = 5
    NOT_LEADER_FOR_PARTITION  = 6
    REQUEST_TIMED_OUT         = 7
    BROKER_NOT_AVAILABLE      = 8
    REPLICA_NOT_AVAILABLE     = 9
    MESSAGE_SIZE_TO_LARGE     = 10
    STALE_CONTROLLER_EPOCH    = 11
    OFFSET_METADATA_TOO_LARGE = 12
 class KafkaProtocol(object):
    """
    Class to encapsulate all of the protocol encoding/decoding. This class does not
    have any state associated with it, it is purely for organization.
    """
    PRODUCE_KEY       = 0
    FETCH_KEY         = 1
    OFFSET_KEY        = 2
    METADATA_KEY      = 3
    OFFSET_COMMIT_KEY = 6
    OFFSET_FETCH_KEY  = 7
    ATTRIBUTE_CODEC_MASK = 0x03
    CODEC_NONE = 0x00
    CODEC_GZIP = 0x01
    CODEC_SNAPPY = 0x02
    ###################
    #   Private API   #
    ###################
    @classmethod
    def _encode_message_header(cls, client_id, correlation_id, request_key):
        """
        Encode the common request envelope
        """
        return struct.pack('>hhih%ds' % len(client_id), 
                           request_key,          # ApiKey
                           0,                    # ApiVersion
                           correlation_id,       # CorrelationId
                           len(client_id),       #
                           client_id)            # ClientId
    @classmethod
    def _encode_message_set(cls, messages):
        """
        Encode a MessageSet. Unlike other arrays in the protocol, MessageSets are 
        not length-prefixed
        Format
        ======
        MessageSet => [Offset MessageSize Message]
          Offset => int64
          MessageSize => int32
        """
        message_set = ""
        for message in messages:
            encoded_message = KafkaProtocol._encode_message(message)
            message_set += struct.pack('>qi%ds' % len(encoded_message), 0, len(encoded_message), encoded_message)
        return message_set
    @classmethod
    def _encode_message(cls, message):
        """
        Encode a single message.
        The magic number of a message is a format version number. The only supported
        magic number right now is zero
        Format 
        ======
        Message => Crc MagicByte Attributes Key Value
          Crc => int32
          MagicByte => int8
          Attributes => int8
          Key => bytes
          Value => bytes
        """
        if message.magic == 0:
            msg = struct.pack('>BB', message.magic, message.attributes)
            msg += write_int_string(message.key)
            msg += write_int_string(message.value)
            crc = zlib.crc32(msg)
            msg = struct.pack('>i%ds' % len(msg), crc, msg)
        else:
            raise Exception("Unexpected magic number: %d" % message.magic)
        return msg
    @classmethod
    def _decode_message_set_iter(cls, data):
        """
        Iteratively decode a MessageSet
        Reads repeated elements of (offset, message), calling decode_message to decode a
        single message. Since compressed messages contain futher MessageSets, these two methods
        have been decoupled so that they may recurse easily.
        """
        cur = 0
        while cur < len(data):
            try:
                ((offset, ), cur) = relative_unpack('>q', data, cur)
                (msg, cur) = read_int_string(data, cur)
                for (offset, message) in KafkaProtocol._decode_message(msg, offset):
                    yield OffsetAndMessage(offset, message)
            except BufferUnderflowError: # If we get a partial read of a message, stop
                raise StopIteration()
    @classmethod
    def _decode_message(cls, data, offset):
        """
        Decode a single Message
        The only caller of this method is decode_message_set_iter. They are decoupled to 
        support nested messages (compressed MessageSets). The offset is actually read from
        decode_message_set_iter (it is part of the MessageSet payload).
        """
        ((crc, magic, att), cur) = relative_unpack('>iBB', data, 0)
        if crc != zlib.crc32(data[4:]):
            raise ChecksumError("Message checksum failed")
        (key, cur) = read_int_string(data, cur)
        (value, cur) = read_int_string(data, cur)
        if att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_NONE:
            yield (offset, Message(magic, att, key, value))
        elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_GZIP:
            gz = gzip_decode(value)
            for (offset, message) in KafkaProtocol._decode_message_set_iter(gz):
                yield (offset, message)
        elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_SNAPPY:
            snp = snappy_decode(value)
            for (offset, message) in KafkaProtocol._decode_message_set_iter(snp):
                yield (offset, message)
    ##################
    #   Public API   #
    ##################
    @classmethod
    def create_message(cls, payload, key=None):
        """
        Construct a Message
        Params
        ======
        payload: bytes, the payload to send to Kafka
        key: bytes, a key used for partition routing (optional)
        """
        return Message(0, 0, key, payload)
    @classmethod
    def create_gzip_message(cls, payloads, key=None):
        """
        Construct a Gzipped Message containing multiple Messages
        The given payloads will be encoded, compressed, and sent as a single atomic
        message to Kafka.
        Params
        ======
        payloads: list(bytes), a list of payload to send be sent to Kafka
        key: bytes, a key used for partition routing (optional)
        """
        message_set = KafkaProtocol._encode_message_set(
                [KafkaProtocol.create_message(payload) for payload in payloads])
        gzipped = gzip_encode(message_set) 
        return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP), key, gzipped)
    @classmethod
    def create_snappy_message(cls, payloads, key=None):
        """
        Construct a Snappy Message containing multiple Messages
        The given payloads will be encoded, compressed, and sent as a single atomic
        message to Kafka.
        Params
        ======
        payloads: list(bytes), a list of payload to send be sent to Kafka
        key: bytes, a key used for partition routing (optional)
        """
        message_set = KafkaProtocol._encode_message_set(
                [KafkaProtocol.create_message(payload) for payload in payloads])
        snapped = snappy_encode(message_set) 
        return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY), key, snapped)
    @classmethod
    def encode_produce_request(cls, client_id, correlation_id, payloads=[], acks=1, timeout=1000):
        """
        Encode some ProduceRequest structs
        Params
        ======
        client_id: string
        correlation_id: string
        payloads: list of ProduceRequest
        acks: How "acky" you want the request to be
            0: immediate response
            1: written to disk by the leader
            2+: waits for this many number of replicas to sync
            -1: waits for all replicas to be in sync
        timeout: Maximum time the server will wait for acks from replicas. This is _not_ a socket timeout
        """
        grouped_payloads = group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.PRODUCE_KEY)
        message += struct.pack('>hii', acks, timeout, len(grouped_payloads))
        for topic, topic_payloads in grouped_payloads.items():
            message += struct.pack('>h%dsi' % len(topic), len(topic), topic, len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message_set = KafkaProtocol._encode_message_set(payload.messages)
                message += struct.pack('>ii%ds' % len(message_set), partition, len(message_set), message_set)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_produce_response(cls, data):
        """
        Decode bytes to a ProduceResponse 
        Params
        ======
        data: bytes to decode
        """
        ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
        for i in range(num_topics):
            ((strlen,), cur) = relative_unpack('>h', data, cur)
            topic = data[cur:cur+strlen]
            cur += strlen
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in range(num_partitions):
                ((partition, error, offset), cur) = relative_unpack('>ihq', data, cur)
                yield ProduceResponse(topic, partition, error, offset)
    @classmethod
    def encode_fetch_request(cls, client_id, correlation_id, payloads=[], max_wait_time=100, min_bytes=4096):
        """
        Encodes some FetchRequest structs
        Params
        ======
        client_id: string
        correlation_id: string
        payloads: list of FetchRequest
        max_wait_time: int, how long to block waiting on min_bytes of data
        min_bytes: int, the minimum number of bytes to accumulate before returning the response
        """
        grouped_payloads = group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.FETCH_KEY)
        message += struct.pack('>iiii', -1, max_wait_time, min_bytes, len(grouped_payloads)) # -1 is the replica id
        for topic, topic_payloads in grouped_payloads.items():
            message += write_short_string(topic)
            message += struct.pack('>i', len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message += struct.pack('>iqi', partition, payload.offset, payload.max_bytes)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_fetch_response_iter(cls, data):
        """
        Decode bytes to a FetchResponse
        Params
        ======
        data: bytes to decode
        """
        ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
        for i in range(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in range(num_partitions):
                ((partition, error, highwater_mark_offset), cur) = relative_unpack('>ihq', data, cur)
                (message_set, cur) = read_int_string(data, cur)
                yield FetchResponse(topic, partition, error, highwater_mark_offset,
                        KafkaProtocol._decode_message_set_iter(message_set))
    @classmethod
    def encode_offset_request(cls, client_id, correlation_id, payloads=[]):
        grouped_payloads = group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_KEY)
        message += struct.pack('>ii', -1, len(grouped_payloads)) # -1 is the replica id
        for topic, topic_payloads in grouped_payloads.items():
            message += write_short_string(topic)
            message += struct.pack('>i', len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message += struct.pack('>iqi', partition, payload.time, payload.max_offsets)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_offset_response(cls, data):
        """
        Decode bytes to an OffsetResponse
        Params
        ======
        data: bytes to decode
        """
        ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
        for i in range(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in range(num_partitions):
                ((partition, error, num_offsets,), cur) = relative_unpack('>ihi', data, cur)
                offsets = []
                for j in range(num_offsets):
                    ((offset,), cur) = relative_unpack('>q', data, cur)
                    offsets.append(offset)
                yield OffsetResponse(topic, partition, error, tuple(offsets))
    @classmethod
    def encode_metadata_request(cls, client_id, correlation_id, topics=[]):
        """
        Encode a MetadataRequest
        Params
        ======
        client_id: string
        correlation_id: string
        topics: list of strings
        """
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.METADATA_KEY)
        message += struct.pack('>i', len(topics))
        for topic in topics:
            message += struct.pack('>h%ds' % len(topic), len(topic), topic)
        return write_int_string(message)
    @classmethod
    def decode_metadata_response(cls, data):
        """
        Decode bytes to a MetadataResponse
        Params
        ======
        data: bytes to decode
        """
        ((correlation_id, numBrokers), cur) = relative_unpack('>ii', data, 0)
        # Broker info
        brokers = {}
        for i in range(numBrokers):
            ((nodeId, ), cur) = relative_unpack('>i', data, cur)
            (host, cur) = read_short_string(data, cur)
            ((port,), cur) = relative_unpack('>i', data, cur)
            brokers[nodeId] = BrokerMetadata(nodeId, host, port)
        # Topic info
        ((num_topics,), cur) = relative_unpack('>i', data, cur)
        topicMetadata = {}
        for i in range(num_topics):
            ((topicError,), cur) = relative_unpack('>h', data, cur)
            (topicName, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            partitionMetadata = {}
            for j in range(num_partitions):
                ((partitionErrorCode, partition, leader, numReplicas), cur) = relative_unpack('>hiii', data, cur)
                (replicas, cur) = relative_unpack('>%di' % numReplicas, data, cur)
                ((numIsr,), cur) = relative_unpack('>i', data, cur)
                (isr, cur) = relative_unpack('>%di' % numIsr, data, cur)
                partitionMetadata[partition] = PartitionMetadata(topicName, partition, leader, replicas, isr)
            topicMetadata[topicName] = partitionMetadata
        return (brokers, topicMetadata)
    @classmethod
    def encode_offset_commit_request(cls, client_id, correlation_id, group, payloads):
        """
        Encode some OffsetCommitRequest structs
        Params
        ======
        client_id: string
        correlation_id: string
        group: string, the consumer group you are committing offsets for
        payloads: list of OffsetCommitRequest
        """
        grouped_payloads= group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_COMMIT_KEY)
        message += write_short_string(group)
        message += struct.pack('>i', len(grouped_payloads))
        for topic, topic_payloads in grouped_payloads.items():
            message += write_short_string(topic)
            message += struct.pack('>i', len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message += struct.pack('>iq', partition, payload.offset)
                message += write_short_string(payload.metadata)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_offset_commit_response(cls, data):
        """
        Decode bytes to an OffsetCommitResponse
        Params
        ======
        data: bytes to decode
        """
        data = data[2:] # TODO remove me when versionId is removed
        ((correlation_id,), cur) = relative_unpack('>i', data, 0)
        (client_id, cur) = read_short_string(data, cur)
        ((num_topics,), cur) = relative_unpack('>i', data, cur)
        for i in xrange(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in xrange(num_partitions):
                ((partition, error), cur) = relative_unpack('>ih', data, cur)
                yield OffsetCommitResponse(topic, partition, error)
    @classmethod
    def encode_offset_fetch_request(cls, client_id, correlation_id, group, payloads):
        """
        Encode some OffsetFetchRequest structs
        Params
        ======
        client_id: string
        correlation_id: string
        group: string, the consumer group you are fetching offsets for
        payloads: list of OffsetFetchRequest
        """
        grouped_payloads = group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_FETCH_KEY)
        message += write_short_string(group)
        message += struct.pack('>i', len(grouped_payloads))
        for topic, topic_payloads in grouped_payloads.items():
            message += write_short_string(topic)
            message += struct.pack('>i', len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message += struct.pack('>i', partition)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_offset_fetch_response(cls, data):
        """
        Decode bytes to an OffsetFetchResponse
        Params
        ======
        data: bytes to decode
        """
        data = data[2:] # TODO remove me when versionId is removed
        ((correlation_id,), cur) = relative_unpack('>i', data, 0)
        (client_id, cur) = read_short_string(data, cur)
        ((num_topics,), cur) = relative_unpack('>i', data, cur)
        for i in range(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in range(num_partitions):
                ((partition, offset), cur) = relative_unpack('>iq', data, cur)
                (metadata, cur) = read_short_string(data, cur)
                ((error,), cur) = relative_unpack('>h', data, cur)
                yield OffsetFetchResponse(topic, partition, offset, metadata, error)
 class KafkaConnection(object):
    """
    A socket connection to a single Kafka broker
    This class is _not_ thread safe. Each call to `send` must be followed
    by a call to `recv` in order to get the correct response. Eventually, 
    we can do something in here to facilitate multiplexed requests/responses
    since the Kafka API includes a correlation id.
    """
    def __init__(self, host, port, bufsize=4096):
        self.host = host
        self.port = port
        self.bufsize = bufsize
        self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self._sock.connect((host, port))
        self._sock.settimeout(10)
    def __str__(self):
        return "<KafkaConnection host=%s port=%d>" % (self.host, self.port)
    ###################
    #   Private API   #
    ###################
    def _consume_response(self):
        """
        Fully consumer the response iterator
        """
        data = ""
        for chunk in self._consume_response_iter():
            data += chunk
        return data
    def _consume_response_iter(self):
        """
        This method handles the response header and error messages. It 
        then returns an iterator for the chunks of the response
        """
        log.debug("Handling response from Kafka")
        # Read the size off of the header
        resp = self._sock.recv(4)
        if resp == "":
            raise Exception("Got no response from Kafka")
        (size,) = struct.unpack('>i', resp)
        messageSize = size - 4
        log.debug("About to read %d bytes from Kafka", messageSize)
        # Read the remainder of the response 
        total = 0
        while total < messageSize:
            resp = self._sock.recv(self.bufsize)
            log.debug("Read %d bytes from Kafka", len(resp))
            if resp == "":
                raise BufferUnderflowError("Not enough data to read this response")
            total += len(resp)
            yield resp
    ##################
    #   Public API   #
    ##################
    # TODO multiplex socket communication to allow for multi-threaded clients
    def send(self, requestId, payload):
        "Send a request to Kafka"
        sent = self._sock.sendall(payload)
        if sent == 0:
            raise RuntimeError("Kafka went away")
        self.data = self._consume_response()
    def recv(self, requestId):
        "Get a response from Kafka"
        return self.data
    def close(self):
        "Close this connection"
        self._sock.close()
 class KafkaClient(object):
    CLIENT_ID = "kafka-python"
@@ -808,71 +239,3 @@ class KafkaClient(object):
            else:
                out.append(offset_fetch_response)
        return out
 class SimpleProducer(object):
    """
    A simple, round-robbin producer. Each message goes to exactly one partition
    """
    def __init__(self, client, topic):
        self.client = client
        self.topic = topic
        self.client.load_metadata_for_topics(topic)
        self.next_partition = cycle(self.client.topic_partitions[topic])
    def send_message(self, msg):
        req = ProduceRequest(self.topic, self.next_partition.next(),
            messages=[KafkaProtocol.create_message(msg)])
        resp = self.client.send_produce_request([req]).next()
 class SimpleConsumer(object):
    """
    A simple consumer implementation that consumes all partitions for a topic
    """
    def __init__(self, client, group, topic):
        self.client = client
        self.topic = topic
        self.group = group
        self.client.load_metadata_for_topics(topic)
        self.offsets = {}
        def get_or_init_offset_callback(resp):
            if resp.error == ErrorMapping.NO_ERROR:
                return resp.offset
            elif resp.error == ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON:
                return 0
            else:
                raise Exception("OffsetFetchRequest for topic=%s, partition=%d failed with errorcode=%s" % (
                    resp.topic, resp.partition, resp.error))
        for partition in self.client.topic_partitions[topic]:
            req = OffsetFetchRequest(topic, partition)
            (offset,) = self.client.send_offset_fetch_request(group, [req],
                          callback=get_or_init_offset_callback, fail_on_error=False)
            self.offsets[partition] = offset
    def __iter__(self):
        iters = {}
        for partition, offset in self.offsets.items():
            iters[partition] = self.__iter_partition__(partition, offset)
        while True:
            for it in iters.values():
                yield it.next()
    def __iter_partition__(self, partition, offset):
        while True:
            req = FetchRequest(self.topic, partition, offset, 1024) 
            (resp,) = self.client.send_fetch_request([req])
            assert resp.topic == self.topic
            assert resp.partition == partition
            next_offset = None
            for message in resp.messages:
                next_offset = message.offset
                yield message
            if next_offset is None:
                raise StopIteration("No more messages")
            else:
                offset = next_offset + 1
            # Commit offset here?
--- a/kafka/common.py
+++ b/kafka/common.py
@@ -0,0 +1,43 @@
 from collections import namedtuple
 ###############
 #   Structs   #
 ###############
 # Request payloads
 ProduceRequest = namedtuple("ProduceRequest", ["topic", "partition", "messages"])
 FetchRequest = namedtuple("FetchRequest", ["topic", "partition", "offset", "max_bytes"])
 OffsetRequest = namedtuple("OffsetRequest", ["topic", "partition", "time", "max_offsets"])
 OffsetCommitRequest = namedtuple("OffsetCommitRequest", ["topic", "partition", "offset", "metadata"])
 OffsetFetchRequest = namedtuple("OffsetFetchRequest", ["topic", "partition"])
 # Response payloads
 ProduceResponse = namedtuple("ProduceResponse", ["topic", "partition", "error", "offset"])
 FetchResponse = namedtuple("FetchResponse", ["topic", "partition", "error", "highwaterMark", "messages"])
 OffsetResponse = namedtuple("OffsetResponse", ["topic", "partition", "error", "offsets"])
 OffsetCommitResponse = namedtuple("OffsetCommitResponse", ["topic", "partition", "error"])
 OffsetFetchResponse = namedtuple("OffsetFetchResponse", ["topic", "partition", "offset", "metadata", "error"])
 BrokerMetadata = namedtuple("BrokerMetadata", ["nodeId", "host", "port"])
 PartitionMetadata = namedtuple("PartitionMetadata", ["topic", "partition", "leader", "replicas", "isr"])
 # Other useful structs 
 OffsetAndMessage = namedtuple("OffsetAndMessage", ["offset", "message"])
 Message = namedtuple("Message", ["magic", "attributes", "key", "value"])
 TopicAndPartition = namedtuple("TopicAndPartition", ["topic", "partition"])
 class ErrorMapping(object):
    # Many of these are not actually used by the client
    UNKNOWN                   = -1
    NO_ERROR                  = 0
    OFFSET_OUT_OF_RANGE       = 1
    INVALID_MESSAGE           = 2
    UNKNOWN_TOPIC_OR_PARTITON = 3
    INVALID_FETCH_SIZE        = 4
    LEADER_NOT_AVAILABLE      = 5
    NOT_LEADER_FOR_PARTITION  = 6
    REQUEST_TIMED_OUT         = 7
    BROKER_NOT_AVAILABLE      = 8
    REPLICA_NOT_AVAILABLE     = 9
    MESSAGE_SIZE_TO_LARGE     = 10
    STALE_CONTROLLER_EPOCH    = 11
    OFFSET_METADATA_TOO_LARGE = 12
--- a/kafka/conn.py
+++ b/kafka/conn.py
@@ -0,0 +1,85 @@
 import logging
 import socket
 import struct
 log = logging.getLogger("kafka")
 class KafkaConnection(object):
    """
    A socket connection to a single Kafka broker
    This class is _not_ thread safe. Each call to `send` must be followed
    by a call to `recv` in order to get the correct response. Eventually, 
    we can do something in here to facilitate multiplexed requests/responses
    since the Kafka API includes a correlation id.
    """
    def __init__(self, host, port, bufsize=4096):
        self.host = host
        self.port = port
        self.bufsize = bufsize
        self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self._sock.connect((host, port))
        self._sock.settimeout(10)
    def __str__(self):
        return "<KafkaConnection host=%s port=%d>" % (self.host, self.port)
    ###################
    #   Private API   #
    ###################
    def _consume_response(self):
        """
        Fully consumer the response iterator
        """
        data = ""
        for chunk in self._consume_response_iter():
            data += chunk
        return data
    def _consume_response_iter(self):
        """
        This method handles the response header and error messages. It 
        then returns an iterator for the chunks of the response
        """
        log.debug("Handling response from Kafka")
        # Read the size off of the header
        resp = self._sock.recv(4)
        if resp == "":
            raise Exception("Got no response from Kafka")
        (size,) = struct.unpack('>i', resp)
        messageSize = size - 4
        log.debug("About to read %d bytes from Kafka", messageSize)
        # Read the remainder of the response 
        total = 0
        while total < messageSize:
            resp = self._sock.recv(self.bufsize)
            log.debug("Read %d bytes from Kafka", len(resp))
            if resp == "":
                raise BufferUnderflowError("Not enough data to read this response")
            total += len(resp)
            yield resp
    ##################
    #   Public API   #
    ##################
    # TODO multiplex socket communication to allow for multi-threaded clients
    def send(self, requestId, payload):
        "Send a request to Kafka"
        sent = self._sock.sendall(payload)
        if sent == 0:
            raise RuntimeError("Kafka went away")
        self.data = self._consume_response()
    def recv(self, requestId):
        "Get a response from Kafka"
        return self.data
    def close(self):
        "Close this connection"
        self._sock.close()
--- a/kafka/consumer.py
+++ b/kafka/consumer.py
@@ -0,0 +1,159 @@
 import logging
 from threading import Lock
 from kafka.common import (
    ErrorMapping, FetchRequest,
    OffsetRequest, OffsetFetchRequest, OffsetCommitRequest
 )
 log = logging.getLogger("kafka")
 class SimpleConsumer(object):
    """
    A simple consumer implementation that consumes all partitions for a topic
    client: a connected KafkaClient
    group: a name for this consumer, used for offset storage and must be unique
    topic: the topic to consume
    auto_commit: default True. Whether or not to auto commit the offsets
    auto_commit_every_n: default 100. How many messages to consume before a commit
    auto_commit_every_t: default 5000. How much time (in milliseconds) to wait before commit
    Auto commit details:
    If both auto_commit_every_n and auto_commit_every_t are set, they will reset one another
    when one is triggered. These triggers simply call the commit method on this class. A 
    manual call to commit will also reset these triggers
    """
    def __init__(self, client, group, topic, auto_commit=False, auto_commit_every_n=None, auto_commit_every_t=None):
        self.client = client
        self.topic = topic
        self.group = group
        self.client.load_metadata_for_topics(topic)
        self.offsets = {}
        # Set up the auto-commit timer
        if auto_commit is True:
            if auto_commit_every_t is not None:
                self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit)
                self.commit_timer.start()
        self.commit_lock = Lock()
        self.count_since_commit = 0
        self.auto_commit = auto_commit
        self.auto_commit_every_n = auto_commit_every_n
        self.auto_commit_every_t = auto_commit_every_t
        def get_or_init_offset_callback(resp):
            if resp.error == ErrorMapping.NO_ERROR:
                return resp.offset
            elif resp.error == ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON:
                return 0
            else:
                raise Exception("OffsetFetchRequest for topic=%s, partition=%d failed with errorcode=%s" % (
                    resp.topic, resp.partition, resp.error))
        for partition in self.client.topic_partitions[topic]:
            req = OffsetFetchRequest(topic, partition)
            (offset,) = self.client.send_offset_fetch_request(group, [req],
                          callback=get_or_init_offset_callback, fail_on_error=False)
            self.offsets[partition] = offset
        print self.offsets
    def seek(self, offset, whence):
        """
        Alter the current offset in the consumer, similar to fseek
        offset: how much to modify the offset
        whence: where to modify it from
                0 is relative to the earliest available offset (head)
                1 is relative to the current offset
                2 is relative to the latest known offset (tail)
        """
        if whence == 1:
            # relative to current position
            for partition, _offset in self.offsets.items():
                self.offset[partition] = _offset + offset
        elif whence in (0, 2):
            # relative to beginning or end
            reqs = []
            for partition in offsets.keys():
                if whence == 0:
                    reqs.append(OffsetRequest(self.topic, partition, -2, 1))
                elif whence == 2:
                    reqs.append(OffsetRequest(self.topic, partition, -1, 1))
                else:
                    pass
            resps = self.client.send_offset_request([req])
            for resp in resps:
                self.offsets[resp.partition] = resp.offsets[0] + offset
        else:
            raise
    def commit(self, partitions=[]):
        """
        Commit offsets for this consumer
        partitions: list of partitions to commit, default is to commit all of them
        """
        # short circuit if nothing happened
        if self.count_since_commit == 0:
            return
        with self.commit_lock:
            reqs = []
            if len(partitions) == 0: # commit all partitions
                for partition, offset in self.offsets.items():
                    log.debug("Commit offset %d in SimpleConsumer: group=%s, topic=%s, partition=%s" % (
                        offset, self.group, self.topic, partition))
                    reqs.append(OffsetCommitRequest(self.topic, partition, offset, None))
            else:
                for partition in partitions:
                    offset = self.offsets[partition]
                    log.debug("Commit offset %d in SimpleConsumer: group=%s, topic=%s, partition=%s" % (
                        offset, self.group, self.topic, partition))
                    reqs.append(OffsetCommitRequest(self.topic, partition, offset, None))
            resps = self.send_offset_commit_request(self.group, reqs)
            for resp in resps:
                assert resp.error == 0
            self.count_since_commit = 0
    def __iter__(self):
        iters = {}
        for partition, offset in self.offsets.items():
            iters[partition] = self.__iter_partition__(partition, offset)
        while True:
            for it in iters.values():
                yield it.next()
                self.count_since_commit += 1
                # deal with auto commits
                if self.auto_commit is True:
                    if self.auto_commit_every_n is not None and self.count_since_commit > self.auto_commit_every_n:
                        if self.commit_timer is not None:
                            self.commit_timer.stop()
                            self.commit()
                            self.commit_timer.start()
                        else:
                            self.commit()
    def __iter_partition__(self, partition, offset):
        while True:
            req = FetchRequest(self.topic, partition, offset, 1024) 
            (resp,) = self.client.send_fetch_request([req])
            assert resp.topic == self.topic
            assert resp.partition == partition
            next_offset = None
            for message in resp.messages:
                next_offset = message.offset
                print partition, message, message.offset
                yield message
                # update the internal state _after_ we yield the message
                self.offsets[partition] = message.offset
            print partition, next_offset
            if next_offset is None:
                break
            else:
                offset = next_offset + 1
--- a/kafka/producer.py
+++ b/kafka/producer.py
@@ -0,0 +1,22 @@
 from itertools import cycle
 import logging
 from kafka.common import ProduceRequest
 from kafka.protocol import create_message
 log = logging.getLogger("kafka")
 class SimpleProducer(object):
    """
    A simple, round-robbin producer. Each message goes to exactly one partition
    """
    def __init__(self, client, topic):
        self.client = client
        self.topic = topic
        self.client.load_metadata_for_topics(topic)
        self.next_partition = cycle(self.client.topic_partitions[topic])
    def send_message(self, msg):
        req = ProduceRequest(self.topic, self.next_partition.next(),
            messages=[create_message(msg)])
        resp = self.client.send_produce_request([req]).next()
--- a/kafka/protocol.py
+++ b/kafka/protocol.py
@@ -0,0 +1,457 @@
 import logging
 import struct
 import zlib
 from kafka.codec import (
    gzip_encode, gzip_decode, snappy_encode, snappy_decode
 )
 from kafka.common import (
    BrokerMetadata, PartitionMetadata, Message, OffsetAndMessage,
    ProduceResponse, FetchResponse, OffsetResponse,
    OffsetCommitResponse, OffsetFetchResponse
 )
 from kafka.util import (
    read_short_string, read_int_string, relative_unpack,
    write_short_string, write_int_string, group_by_topic_and_partition,
    BufferUnderflowError, ChecksumError
 )
 log = logging.getLogger("kafka")
 class KafkaProtocol(object):
    """
    Class to encapsulate all of the protocol encoding/decoding. This class does not
    have any state associated with it, it is purely for organization.
    """
    PRODUCE_KEY       = 0
    FETCH_KEY         = 1
    OFFSET_KEY        = 2
    METADATA_KEY      = 3
    OFFSET_COMMIT_KEY = 6
    OFFSET_FETCH_KEY  = 7
    ATTRIBUTE_CODEC_MASK = 0x03
    CODEC_NONE = 0x00
    CODEC_GZIP = 0x01
    CODEC_SNAPPY = 0x02
    ###################
    #   Private API   #
    ###################
    @classmethod
    def _encode_message_header(cls, client_id, correlation_id, request_key):
        """
        Encode the common request envelope
        """
        return struct.pack('>hhih%ds' % len(client_id), 
                           request_key,          # ApiKey
                           0,                    # ApiVersion
                           correlation_id,       # CorrelationId
                           len(client_id),       #
                           client_id)            # ClientId
    @classmethod
    def _encode_message_set(cls, messages):
        """
        Encode a MessageSet. Unlike other arrays in the protocol, MessageSets are 
        not length-prefixed
        Format
        ======
        MessageSet => [Offset MessageSize Message]
          Offset => int64
          MessageSize => int32
        """
        message_set = ""
        for message in messages:
            encoded_message = KafkaProtocol._encode_message(message)
            message_set += struct.pack('>qi%ds' % len(encoded_message), 0, len(encoded_message), encoded_message)
        return message_set
    @classmethod
    def _encode_message(cls, message):
        """
        Encode a single message.
        The magic number of a message is a format version number. The only supported
        magic number right now is zero
        Format 
        ======
        Message => Crc MagicByte Attributes Key Value
          Crc => int32
          MagicByte => int8
          Attributes => int8
          Key => bytes
          Value => bytes
        """
        if message.magic == 0:
            msg = struct.pack('>BB', message.magic, message.attributes)
            msg += write_int_string(message.key)
            msg += write_int_string(message.value)
            crc = zlib.crc32(msg)
            msg = struct.pack('>i%ds' % len(msg), crc, msg)
        else:
            raise Exception("Unexpected magic number: %d" % message.magic)
        return msg
    @classmethod
    def _decode_message_set_iter(cls, data):
        """
        Iteratively decode a MessageSet
        Reads repeated elements of (offset, message), calling decode_message to decode a
        single message. Since compressed messages contain futher MessageSets, these two methods
        have been decoupled so that they may recurse easily.
        """
        cur = 0
        while cur < len(data):
            try:
                ((offset, ), cur) = relative_unpack('>q', data, cur)
                (msg, cur) = read_int_string(data, cur)
                for (offset, message) in KafkaProtocol._decode_message(msg, offset):
                    yield OffsetAndMessage(offset, message)
            except BufferUnderflowError: # If we get a partial read of a message, stop
                raise StopIteration()
    @classmethod
    def _decode_message(cls, data, offset):
        """
        Decode a single Message
        The only caller of this method is decode_message_set_iter. They are decoupled to 
        support nested messages (compressed MessageSets). The offset is actually read from
        decode_message_set_iter (it is part of the MessageSet payload).
        """
        ((crc, magic, att), cur) = relative_unpack('>iBB', data, 0)
        if crc != zlib.crc32(data[4:]):
            raise ChecksumError("Message checksum failed")
        (key, cur) = read_int_string(data, cur)
        (value, cur) = read_int_string(data, cur)
        if att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_NONE:
            yield (offset, Message(magic, att, key, value))
        elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_GZIP:
            gz = gzip_decode(value)
            for (offset, message) in KafkaProtocol._decode_message_set_iter(gz):
                yield (offset, message)
        elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_SNAPPY:
            snp = snappy_decode(value)
            for (offset, message) in KafkaProtocol._decode_message_set_iter(snp):
                yield (offset, message)
    ##################
    #   Public API   #
    ##################
    @classmethod
    def encode_produce_request(cls, client_id, correlation_id, payloads=[], acks=1, timeout=1000):
        """
        Encode some ProduceRequest structs
        Params
        ======
        client_id: string
        correlation_id: string
        payloads: list of ProduceRequest
        acks: How "acky" you want the request to be
            0: immediate response
            1: written to disk by the leader
            2+: waits for this many number of replicas to sync
            -1: waits for all replicas to be in sync
        timeout: Maximum time the server will wait for acks from replicas. This is _not_ a socket timeout
        """
        grouped_payloads = group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.PRODUCE_KEY)
        message += struct.pack('>hii', acks, timeout, len(grouped_payloads))
        for topic, topic_payloads in grouped_payloads.items():
            message += struct.pack('>h%dsi' % len(topic), len(topic), topic, len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message_set = KafkaProtocol._encode_message_set(payload.messages)
                message += struct.pack('>ii%ds' % len(message_set), partition, len(message_set), message_set)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_produce_response(cls, data):
        """
        Decode bytes to a ProduceResponse 
        Params
        ======
        data: bytes to decode
        """
        ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
        for i in range(num_topics):
            ((strlen,), cur) = relative_unpack('>h', data, cur)
            topic = data[cur:cur+strlen]
            cur += strlen
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in range(num_partitions):
                ((partition, error, offset), cur) = relative_unpack('>ihq', data, cur)
                yield ProduceResponse(topic, partition, error, offset)
    @classmethod
    def encode_fetch_request(cls, client_id, correlation_id, payloads=[], max_wait_time=100, min_bytes=4096):
        """
        Encodes some FetchRequest structs
        Params
        ======
        client_id: string
        correlation_id: string
        payloads: list of FetchRequest
        max_wait_time: int, how long to block waiting on min_bytes of data
        min_bytes: int, the minimum number of bytes to accumulate before returning the response
        """
        grouped_payloads = group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.FETCH_KEY)
        message += struct.pack('>iiii', -1, max_wait_time, min_bytes, len(grouped_payloads)) # -1 is the replica id
        for topic, topic_payloads in grouped_payloads.items():
            message += write_short_string(topic)
            message += struct.pack('>i', len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message += struct.pack('>iqi', partition, payload.offset, payload.max_bytes)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_fetch_response_iter(cls, data):
        """
        Decode bytes to a FetchResponse
        Params
        ======
        data: bytes to decode
        """
        ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
        for i in range(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in range(num_partitions):
                ((partition, error, highwater_mark_offset), cur) = relative_unpack('>ihq', data, cur)
                (message_set, cur) = read_int_string(data, cur)
                yield FetchResponse(topic, partition, error, highwater_mark_offset,
                        KafkaProtocol._decode_message_set_iter(message_set))
    @classmethod
    def encode_offset_request(cls, client_id, correlation_id, payloads=[]):
        grouped_payloads = group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_KEY)
        message += struct.pack('>ii', -1, len(grouped_payloads)) # -1 is the replica id
        for topic, topic_payloads in grouped_payloads.items():
            message += write_short_string(topic)
            message += struct.pack('>i', len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message += struct.pack('>iqi', partition, payload.time, payload.max_offsets)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_offset_response(cls, data):
        """
        Decode bytes to an OffsetResponse
        Params
        ======
        data: bytes to decode
        """
        ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
        for i in range(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in range(num_partitions):
                ((partition, error, num_offsets,), cur) = relative_unpack('>ihi', data, cur)
                offsets = []
                for j in range(num_offsets):
                    ((offset,), cur) = relative_unpack('>q', data, cur)
                    offsets.append(offset)
                yield OffsetResponse(topic, partition, error, tuple(offsets))
    @classmethod
    def encode_metadata_request(cls, client_id, correlation_id, topics=[]):
        """
        Encode a MetadataRequest
        Params
        ======
        client_id: string
        correlation_id: string
        topics: list of strings
        """
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.METADATA_KEY)
        message += struct.pack('>i', len(topics))
        for topic in topics:
            message += struct.pack('>h%ds' % len(topic), len(topic), topic)
        return write_int_string(message)
    @classmethod
    def decode_metadata_response(cls, data):
        """
        Decode bytes to a MetadataResponse
        Params
        ======
        data: bytes to decode
        """
        ((correlation_id, numBrokers), cur) = relative_unpack('>ii', data, 0)
        # Broker info
        brokers = {}
        for i in range(numBrokers):
            ((nodeId, ), cur) = relative_unpack('>i', data, cur)
            (host, cur) = read_short_string(data, cur)
            ((port,), cur) = relative_unpack('>i', data, cur)
            brokers[nodeId] = BrokerMetadata(nodeId, host, port)
        # Topic info
        ((num_topics,), cur) = relative_unpack('>i', data, cur)
        topicMetadata = {}
        for i in range(num_topics):
            ((topicError,), cur) = relative_unpack('>h', data, cur)
            (topicName, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            partitionMetadata = {}
            for j in range(num_partitions):
                ((partitionErrorCode, partition, leader, numReplicas), cur) = relative_unpack('>hiii', data, cur)
                (replicas, cur) = relative_unpack('>%di' % numReplicas, data, cur)
                ((numIsr,), cur) = relative_unpack('>i', data, cur)
                (isr, cur) = relative_unpack('>%di' % numIsr, data, cur)
                partitionMetadata[partition] = PartitionMetadata(topicName, partition, leader, replicas, isr)
            topicMetadata[topicName] = partitionMetadata
        return (brokers, topicMetadata)
    @classmethod
    def encode_offset_commit_request(cls, client_id, correlation_id, group, payloads):
        """
        Encode some OffsetCommitRequest structs
        Params
        ======
        client_id: string
        correlation_id: string
        group: string, the consumer group you are committing offsets for
        payloads: list of OffsetCommitRequest
        """
        grouped_payloads= group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_COMMIT_KEY)
        message += write_short_string(group)
        message += struct.pack('>i', len(grouped_payloads))
        for topic, topic_payloads in grouped_payloads.items():
            message += write_short_string(topic)
            message += struct.pack('>i', len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message += struct.pack('>iq', partition, payload.offset)
                message += write_short_string(payload.metadata)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_offset_commit_response(cls, data):
        """
        Decode bytes to an OffsetCommitResponse
        Params
        ======
        data: bytes to decode
        """
        data = data[2:] # TODO remove me when versionId is removed
        ((correlation_id,), cur) = relative_unpack('>i', data, 0)
        (client_id, cur) = read_short_string(data, cur)
        ((num_topics,), cur) = relative_unpack('>i', data, cur)
        for i in xrange(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in xrange(num_partitions):
                ((partition, error), cur) = relative_unpack('>ih', data, cur)
                yield OffsetCommitResponse(topic, partition, error)
    @classmethod
    def encode_offset_fetch_request(cls, client_id, correlation_id, group, payloads):
        """
        Encode some OffsetFetchRequest structs
        Params
        ======
        client_id: string
        correlation_id: string
        group: string, the consumer group you are fetching offsets for
        payloads: list of OffsetFetchRequest
        """
        grouped_payloads = group_by_topic_and_partition(payloads)
        message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_FETCH_KEY)
        message += write_short_string(group)
        message += struct.pack('>i', len(grouped_payloads))
        for topic, topic_payloads in grouped_payloads.items():
            message += write_short_string(topic)
            message += struct.pack('>i', len(topic_payloads))
            for partition, payload in topic_payloads.items():
                message += struct.pack('>i', partition)
        return struct.pack('>i%ds' % len(message), len(message), message)
    @classmethod
    def decode_offset_fetch_response(cls, data):
        """
        Decode bytes to an OffsetFetchResponse
        Params
        ======
        data: bytes to decode
        """
        data = data[2:] # TODO remove me when versionId is removed
        ((correlation_id,), cur) = relative_unpack('>i', data, 0)
        (client_id, cur) = read_short_string(data, cur)
        ((num_topics,), cur) = relative_unpack('>i', data, cur)
        for i in range(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            for i in range(num_partitions):
                ((partition, offset), cur) = relative_unpack('>iq', data, cur)
                (metadata, cur) = read_short_string(data, cur)
                ((error,), cur) = relative_unpack('>h', data, cur)
                yield OffsetFetchResponse(topic, partition, offset, metadata, error)
 def create_message(payload, key=None):
    """
    Construct a Message
    Params
    ======
    payload: bytes, the payload to send to Kafka
    key: bytes, a key used for partition routing (optional)
    """
    return Message(0, 0, key, payload)
 def create_gzip_message(payloads, key=None):
    """
    Construct a Gzipped Message containing multiple Messages
    The given payloads will be encoded, compressed, and sent as a single atomic
    message to Kafka.
    Params
    ======
    payloads: list(bytes), a list of payload to send be sent to Kafka
    key: bytes, a key used for partition routing (optional)
    """
    message_set = KafkaProtocol._encode_message_set(
            [create_message(payload) for payload in payloads])
    gzipped = gzip_encode(message_set) 
    return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP), key, gzipped)
 def create_snappy_message(payloads, key=None):
    """
    Construct a Snappy Message containing multiple Messages
    The given payloads will be encoded, compressed, and sent as a single atomic
    message to Kafka.
    Params
    ======
    payloads: list(bytes), a list of payload to send be sent to Kafka
    key: bytes, a key used for partition routing (optional)
    """
    message_set = KafkaProtocol._encode_message_set(
            [create_message(payload) for payload in payloads])
    snapped = snappy_encode(message_set) 
    return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY), key, snapped)
--- a/kafka/util.py
+++ b/kafka/util.py
@@ -1,6 +1,7 @@
 from collections import defaultdict
 from itertools import groupby
 import struct
 from threading import Timer
 def write_int_string(s):
    if s is None:
@@ -56,3 +57,27 @@ class BufferUnderflowError(Exception):
 class ChecksumError(Exception):
    pass
 class ReentrantTimer(object):
    """
    A timer that can be restarted, unlike threading.Timer (although this uses threading.Timer)
    t: timer interval in milliseconds
    fn: a callable to invoke
    """
    def __init__(self, t, fn):
        self.timer = None
        self.t = t
        self.fn = fn
    def start(self):
        if self.timer is None:
            self.timer = Timer(self.t / 1000., self.fn)
            self.timer.start()
        else:
            self.timer.cancel()
            self.timer = Timer(self.t / 1000., self.fn)
            self.timer.start()
    def stop(self):
        self.timer.cancel()
--- a/test/integration.py
+++ b/test/integration.py
@@ -12,7 +12,8 @@ import time
 import unittest
 from urlparse import urlparse
-from kafka.client import *
+from kafka import *
 from kafka.common import *
 def get_open_port():
    sock = socket.socket()
@@ -146,7 +147,7 @@ class TestKafkaClient(unittest.TestCase):
    def test_produce_many_simple(self):
        produce = ProduceRequest("test_produce_many_simple", 0, messages=[
-            KafkaProtocol.create_message("Test message %d" % i) for i in range(100)
+            create_message("Test message %d" % i) for i in range(100)
        ]) 
        for resp in self.client.send_produce_request([produce]):
@@ -172,7 +173,7 @@ class TestKafkaClient(unittest.TestCase):
    def test_produce_10k_simple(self):
        produce = ProduceRequest("test_produce_10k_simple", 0, messages=[
-            KafkaProtocol.create_message("Test message %d" % i) for i in range(10000)
+            create_message("Test message %d" % i) for i in range(10000)
        ]) 
        for resp in self.client.send_produce_request([produce]):
@@ -183,8 +184,8 @@ class TestKafkaClient(unittest.TestCase):
        self.assertEquals(offset.offsets[0], 10000)
    def test_produce_many_gzip(self):
-        message1 = KafkaProtocol.create_gzip_message(["Gzipped 1 %d" % i for i in range(100)])
+        message1 = create_gzip_message(["Gzipped 1 %d" % i for i in range(100)])
-        message2 = KafkaProtocol.create_gzip_message(["Gzipped 2 %d" % i for i in range(100)])
+        message2 = create_gzip_message(["Gzipped 2 %d" % i for i in range(100)])
        produce = ProduceRequest("test_produce_many_gzip", 0, messages=[message1, message2])
@@ -196,8 +197,8 @@ class TestKafkaClient(unittest.TestCase):
        self.assertEquals(offset.offsets[0], 200)
    def test_produce_many_snappy(self):
-        message1 = KafkaProtocol.create_snappy_message(["Snappy 1 %d" % i for i in range(100)])
+        message1 = create_snappy_message(["Snappy 1 %d" % i for i in range(100)])
-        message2 = KafkaProtocol.create_snappy_message(["Snappy 2 %d" % i for i in range(100)])
+        message2 = create_snappy_message(["Snappy 2 %d" % i for i in range(100)])
        produce = ProduceRequest("test_produce_many_snappy", 0, messages=[message1, message2])
@@ -209,9 +210,9 @@ class TestKafkaClient(unittest.TestCase):
        self.assertEquals(offset.offsets[0], 200)
    def test_produce_mixed(self):
-        message1 = KafkaProtocol.create_message("Just a plain message")
+        message1 = create_message("Just a plain message")
-        message2 = KafkaProtocol.create_gzip_message(["Gzipped %d" % i for i in range(100)])
+        message2 = create_gzip_message(["Gzipped %d" % i for i in range(100)])
-        message3 = KafkaProtocol.create_snappy_message(["Snappy %d" % i for i in range(100)])
+        message3 = create_snappy_message(["Snappy %d" % i for i in range(100)])
        produce = ProduceRequest("test_produce_mixed", 0, messages=[message1, message2, message3])
@@ -225,7 +226,7 @@ class TestKafkaClient(unittest.TestCase):
    def test_produce_100k_gzipped(self):
        produce = ProduceRequest("test_produce_100k_gzipped", 0, messages=[
-            KafkaProtocol.create_gzip_message(["Gzipped %d" % i for i in range(100000)])
+            create_gzip_message(["Gzipped %d" % i for i in range(100000)])
        ])
        for resp in self.client.send_produce_request([produce]):
@@ -252,8 +253,8 @@ class TestKafkaClient(unittest.TestCase):
    def test_produce_consume(self):
        produce = ProduceRequest("test_produce_consume", 0, messages=[
-            KafkaProtocol.create_message("Just a test message"),
+            create_message("Just a test message"),
-            KafkaProtocol.create_message("Message with a key", "foo"),
+            create_message("Message with a key", "foo"),
        ])
        for resp in self.client.send_produce_request([produce]):
@@ -276,7 +277,7 @@ class TestKafkaClient(unittest.TestCase):
    def test_produce_consume_many(self):
        produce = ProduceRequest("test_produce_consume_many", 0, messages=[
-            KafkaProtocol.create_message("Test message %d" % i) for i in range(100)
+            create_message("Test message %d" % i) for i in range(100)
        ])
        for resp in self.client.send_produce_request([produce]):
@@ -308,10 +309,10 @@ class TestKafkaClient(unittest.TestCase):
    def test_produce_consume_two_partitions(self):
        produce1 = ProduceRequest("test_produce_consume_two_partitions", 0, messages=[
-            KafkaProtocol.create_message("Partition 0 %d" % i) for i in range(10)
+            create_message("Partition 0 %d" % i) for i in range(10)
        ])
        produce2 = ProduceRequest("test_produce_consume_two_partitions", 1, messages=[
-            KafkaProtocol.create_message("Partition 1 %d" % i) for i in range(10)
+            create_message("Partition 1 %d" % i) for i in range(10)
        ])
        for resp in self.client.send_produce_request([produce1, produce2]):
@@ -400,22 +401,25 @@ class TestConsumer(unittest.TestCase):
        cls.server2.close()
    def test_consumer(self):
        # Produce 100 messages to partition 0
        produce1 = ProduceRequest("test_consumer", 0, messages=[
-            KafkaProtocol.create_message("Test message 0 %d" % i) for i in range(100)
+            create_message("Test message 0 %d" % i) for i in range(100)
        ])
        produce2 = ProduceRequest("test_consumer", 1, messages=[
            KafkaProtocol.create_message("Test message 1 %d" % i) for i in range(100)
        ])
        for resp in self.client.send_produce_request([produce1]):
            self.assertEquals(resp.error, 0)
            self.assertEquals(resp.offset, 0)
        # Produce 100 messages to partition 1
        produce2 = ProduceRequest("test_consumer", 1, messages=[
            create_message("Test message 1 %d" % i) for i in range(100)
        ])
        for resp in self.client.send_produce_request([produce2]):
            self.assertEquals(resp.error, 0)
            self.assertEquals(resp.offset, 0)
        # Start a consumer
        consumer = SimpleConsumer(self.client, "group1", "test_consumer")
        all_messages = []
        for message in consumer:
@@ -424,6 +428,23 @@ class TestConsumer(unittest.TestCase):
        self.assertEquals(len(all_messages), 200)
        self.assertEquals(len(all_messages), len(set(all_messages))) # make sure there are no dupes
        # Produce more messages
        produce3 = ProduceRequest("test_consumer", 1, messages=[
            create_message("Test message 3 %d" % i) for i in range(10)
        ])
        for resp in self.client.send_produce_request([produce3]):
            self.assertEquals(resp.error, 0)
            self.assertEquals(resp.offset, 100)
        # Start a new consumer, make sure we only get the newly produced messages
        consumer = SimpleConsumer(self.client, "group1", "test_consumer")
        all_messages = []
        for message in consumer:
            all_messages.append(message)
        self.assertEquals(len(all_messages), 10)
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
+    logging.basicConfig(level=logging.DEBUG)
    unittest.main()