Merge tag '0.9.5' into debian/unstable

2016-03-02 13:50:29 +00:00
parent 88704428ed 99d4a3a8b1
commit a06de23a09
71 changed files with 4080 additions and 1546 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,6 +5,7 @@ python:
    - 2.7
    - 3.3
    - 3.4
+    - 3.5
    - pypy

 env:
@@ -12,18 +13,27 @@ env:
    - KAFKA_VERSION=0.8.0
    - KAFKA_VERSION=0.8.1
    - KAFKA_VERSION=0.8.1.1
-    - KAFKA_VERSION=0.8.2.0
+    - KAFKA_VERSION=0.8.2.2
+    - KAFKA_VERSION=0.9.0.0
+
+sudo: false
+
+addons:
+  apt:
+    packages:
+      - libsnappy-dev
+
+cache:
+  directories:
+    - $HOME/.cache/pip
+    - servers/

 before_install:
-    - sudo apt-get install libsnappy-dev
    - ./build_integration.sh

 install:
    - pip install tox coveralls
    - pip install .
-    # Deal with issue on Travis builders re: multiprocessing.Queue :(
-    # See https://github.com/travis-ci/travis-cookbooks/issues/155
-    - sudo rm -rf /dev/shm && sudo ln -s /run/shm /dev/shm

 deploy:
  provider: pypi
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -1,16 +1,49 @@
-# Contributors
-
-Top 10 contributors, listed by contribution. See https://github.com/mumrah/kafka-python/graphs/contributors for the full list
-
-* David Arthur, [@mumrah](https://github.com/mumrah)
+# Current Maintainer
 * Dana Powers, [@dpkp](https://github.com/dpkp)
-* Mahendra M, [@mahendra](https://github.com/mahendra)
-* Mark Roberts, [@wizzat](https://github.com/wizzat)
-* Omar, [@rdiomar](https://github.com/rdiomar) - RIP, Omar. 2014
-* Bruno Renié, [@brutasse](https://github.com/brutasse)
-* Marc Labbé, [@mrtheb](https://github.com/mrtheb)
-* Ivan Pouzyrevsky, [@sandello](https://github.com/sandello)
+
+# Original Author and First Commit
+* David Arthur, [@mumrah](https://github.com/mumrah)
+
+# Contributors - 2015 (alpha by username)
+* Alex Couture-Beil, [@alexcb](https://github.com/alexcb)
+* Ali-Akber Saifee, [@alisaifee](https://github.com/alisaifee)
+* Christophe-Marie Duquesne, [@chmduquesne](https://github.com/chmduquesne)
 * Thomas Dimson, [@cosbynator](https://github.com/cosbynator)
-* Zack Dever, [@zever](https://github.com/zever)
+* Kasper Jacobsen, [@Dinoshauer](https://github.com/Dinoshauer)
+* Ross Duggan, [@duggan](https://github.com/duggan)
+* Enrico Canzonieri, [@ecanzonieri](https://github.com/ecanzonieri)
+* haosdent, [@haosdent](https://github.com/haosdent)
+* Arturo Filastò, [@hellais](https://github.com/hellais)
+* Job Evers‐Meltzer, [@jobevers](https://github.com/jobevers)
+* Martin Olveyra, [@kalessin](https://github.com/kalessin)
+* Kubilay Kocak, [@koobs](https://github.com/koobs)
+* Matthew L Daniel <mdaniel@gmail.com>
+* Eric Hewitt, [@meandthewallaby](https://github.com/meandthewallaby)
+* Oliver Jowett [@mutability](https://github.com/mutability)
+* Shaolei Zhou, [@reAsOn2010](https://github.com/reAsOn2010)
+* Oskari Saarenmaa, [@saaros](https://github.com/saaros)
+* John Anderson, [@sontek](https://github.com/sontek)
+* Eduard Iskandarov, [@toidi](https://github.com/toidi)
+* Todd Palino, [@toddpalino](https://github.com/toddpalino)
+* trbs, [@trbs](https://github.com/trbs)
+* Viktor Shlapakov, [@vshlapakov](https://github.com/vshlapakov)
+* Will Daly, [@wedaly](https://github.com/wedaly)
+* Warren Kiser, [@wkiser](https://github.com/wkiser)
+* William Ting, [@wting](https://github.com/wting)
+* Zack Dever, [@zackdever](https://github.com/zackdever)
+
+# More Contributors
+* Bruno Renié, [@brutasse](https://github.com/brutasse)
+* Thomas Dimson, [@cosbynator](https://github.com/cosbynator)
+* Jesse Myers, [@jessemyers](https://github.com/jessemyers)
+* Mahendra M, [@mahendra](https://github.com/mahendra)
+* Miguel Eduardo Gil Biraud, [@mgilbir](https://github.com/mgilbir)
+* Marc Labbé, [@mrtheb](https://github.com/mrtheb)
+* Patrick Lucas, [@patricklucas](https://github.com/patricklucas)
+* Omar Ghishan, [@rdiomar](https://github.com/rdiomar) - RIP, Omar. 2014
+* Ivan Pouzyrevsky, [@sandello](https://github.com/sandello)
+* Lou Marvin Caraig, [@se7entyse7en](https://github.com/se7entyse7en)
+* waliaashish85, [@waliaashish85](https://github.com/waliaashish85)
+* Mark Roberts, [@wizzat](https://github.com/wizzat)

 Thanks to all who have contributed!
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,110 @@
+# 0.9.5 (Dec 6, 2015)
+
+Consumers
+* Initial support for consumer coordinator [offsets only] (toddpalino PR 420)
+* Allow blocking until some messages are received in SimpleConsumer (saaros PR 457)
+* Support subclass config changes in KafkaConsumer (zackdever PR 446)
+* Support retry semantics in MultiProcessConsumer (barricadeio PR 456)
+* Support partition_info in MultiProcessConsumer (scrapinghub PR 418)
+* Enable seek() to an absolute offset in SimpleConsumer (haosdent PR 412)
+* Add KafkaConsumer.close() (ucarion PR 426)
+
+Producers
+* Cath client.reinit() exceptions in async producer (dpkp)
+* Producer.stop() now blocks until async thread completes (dpkp PR 485)
+* Catch errors during load_metadata_for_topics in async producer (bschopman PR 467)
+* Add compression-level support for codecs that support it (trbs PR 454)
+* Fix translation of Java murmur2 code, fix byte encoding for Python 3 (chrischamberlin PR 439)
+* Only call stop() on not-stopped producer objects (docker-hub PR 435)
+* Allow null payload for deletion feature (scrapinghub PR 409)
+
+Clients
+* Use non-blocking io for broker aware requests (ecanzonieri PR 473)
+* Use debug logging level for metadata request (ecanzonieri PR 415)
+* Catch KafkaUnavailableError in _send_broker_aware_request (mutability PR 436)
+* Lower logging level on replica not available and commit (ecanzonieri PR 415)
+
+Documentation
+* Update docs and links wrt maintainer change (mumrah -> dpkp)
+
+Internals
+* Add py35 to tox testing
+* Update travis config to use container infrastructure
+* Add 0.8.2.2 and 0.9.0.0 resources for integration tests; update default official releases
+* new pylint disables for pylint 1.5.1 (zackdever PR 481)
+* Fix python3 / python2 comments re queue/Queue (dpkp)
+* Add Murmur2Partitioner to kafka __all__ imports (dpkp Issue 471)
+* Include LICENSE in PyPI sdist (koobs PR 441)
+
+# 0.9.4 (June 11, 2015)
+
+Consumers
+* Refactor SimpleConsumer internal fetch handling (dpkp PR 399)
+* Handle exceptions in SimpleConsumer commit() and reset_partition_offset() (dpkp PR 404)
+* Improve FailedPayloadsError handling in KafkaConsumer (dpkp PR 398)
+* KafkaConsumer: avoid raising KeyError in task_done (dpkp PR 389)
+* MultiProcessConsumer -- support configured partitions list (dpkp PR 380)
+* Fix SimpleConsumer leadership change handling (dpkp PR 393) 
+* Fix SimpleConsumer connection error handling (reAsOn2010 PR 392)
+* Improve Consumer handling of 'falsy' partition values (wting PR 342)
+* Fix _offsets call error in KafkaConsumer (hellais PR 376)
+* Fix str/bytes bug in KafkaConsumer (dpkp PR 365)
+* Register atexit handlers for consumer and producer thread/multiprocess cleanup (dpkp PR 360)
+* Always fetch commit offsets in base consumer unless group is None (dpkp PR 356)
+* Stop consumer threads on delete (dpkp PR 357)
+* Deprecate metadata_broker_list in favor of bootstrap_servers in KafkaConsumer (dpkp PR 340)
+* Support pass-through parameters in multiprocess consumer (scrapinghub PR 336)
+* Enable offset commit on SimpleConsumer.seek (ecanzonieri PR 350)
+* Improve multiprocess consumer partition distribution (scrapinghub PR 335)
+* Ignore messages with offset less than requested (wkiser PR 328)
+* Handle OffsetOutOfRange in SimpleConsumer (ecanzonieri PR 296)
+
+Producers
+* Add Murmur2Partitioner (dpkp PR 378)
+* Log error types in SimpleProducer and SimpleConsumer (dpkp PR 405)
+* SimpleProducer support configuration of fail_on_error (dpkp PR 396)
+* Deprecate KeyedProducer.send() (dpkp PR 379)
+* Further improvements to async producer code (dpkp PR 388)
+* Add more configuration parameters for async producer (dpkp)
+* Deprecate SimpleProducer batch_send=True in favor of async (dpkp)
+* Improve async producer error handling and retry logic (vshlapakov PR 331)
+* Support message keys in async producer (vshlapakov PR 329)
+* Use threading instead of multiprocessing for Async Producer (vshlapakov PR 330)
+* Stop threads on __del__ (chmduquesne PR 324)
+* Fix leadership failover handling in KeyedProducer (dpkp PR 314)
+
+KafkaClient
+* Add .topics property for list of known topics (dpkp)
+* Fix request / response order guarantee bug in KafkaClient (dpkp PR 403)
+* Improve KafkaClient handling of connection failures in _get_conn (dpkp)
+* Client clears local metadata cache before updating from server (dpkp PR 367)
+* KafkaClient should return a response or error for each request - enable better retry handling (dpkp PR 366)
+* Improve str/bytes conversion in KafkaClient and KafkaConsumer (dpkp PR 332)
+* Always return sorted partition ids in client.get_partition_ids_for_topic() (dpkp PR 315)
+
+Documentation
+* Cleanup Usage Documentation
+* Improve KafkaConsumer documentation (dpkp PR 341)
+* Update consumer documentation (sontek PR 317)
+* Add doc configuration for tox (sontek PR 316)
+* Switch to .rst doc format (sontek PR 321)
+* Fixup google groups link in README (sontek PR 320)
+* Automate documentation at kafka-python.readthedocs.org
+
+Internals
+* Switch integration testing from 0.8.2.0 to 0.8.2.1 (dpkp PR 402)
+* Fix most flaky tests, improve debug logging, improve fixture handling (dpkp)
+* General style cleanups (dpkp PR 394)
+* Raise error on duplicate topic-partition payloads in protocol grouping (dpkp)
+* Use module-level loggers instead of simply 'kafka' (dpkp)
+* Remove pkg_resources check for __version__ at runtime (dpkp PR 387)
+* Make external API consistently support python3 strings for topic (kecaps PR 361)
+* Fix correlation id overflow (dpkp PR 355)
+* Cleanup kafka/common structs (dpkp PR 338)
+* Use context managers in gzip_encode / gzip_decode (dpkp PR 337)
+* Save failed request as FailedPayloadsError attribute (jobevers PR 302)
+* Remove unused kafka.queue (mumrah)
+
 # 0.9.3 (Feb 3, 2015)

 * Add coveralls.io support (sontek PR 307)
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,5 @@
-include VERSION
 recursive-include kafka *.py
+include README.rst
+include LICENSE
+include AUTHORS.md
+include CHANGES.md
--- a/README.md
+++ b/README.md
@@ -1,39 +0,0 @@
-# Kafka Python client
-
-[![Build Status](https://api.travis-ci.org/mumrah/kafka-python.png?branch=master)](https://travis-ci.org/mumrah/kafka-python)
-[![Coverage Status](https://coveralls.io/repos/mumrah/kafka-python/badge.svg?branch=master)](https://coveralls.io/r/mumrah/kafka-python?branch=master)
-
-[Full documentation available on ReadTheDocs](http://kafka-python.readthedocs.org/en/latest/)
-
-This module provides low-level protocol support for Apache Kafka as well as
-high-level consumer and producer classes. Request batching is supported by the
-protocol as well as broker-aware request routing. Gzip and Snappy compression
-is also supported for message sets.
-
-http://kafka.apache.org/
-
-On Freenode IRC at #kafka-python, as well as #apache-kafka
-
-For general discussion of kafka-client design and implementation (not python specific),
-see https://groups.google.com/forum/m/#!forum/kafka-clients
-
-# License
-
-Copyright 2014, David Arthur under Apache License, v2.0. See `LICENSE`
-
-# Status
-
-The current stable version of this package is [**0.9.3**](https://github.com/mumrah/kafka-python/releases/tag/v0.9.3) and is compatible with
-
-Kafka broker versions
- 0.8.2.0 [offset management currently ZK only -- does not support ConsumerCoordinator offset management APIs]
- 0.8.1.1
- 0.8.1
- 0.8.0
-
-Python versions
- 2.6 (tested on 2.6.9)
- 2.7 (tested on 2.7.9)
- 3.3 (tested on 3.3.5)
- 3.4 (tested on 3.4.2)
- pypy (tested on pypy 2.4.0 / python 2.7.8)
--- a/README.rst
+++ b/README.rst
@@ -0,0 +1,59 @@
+Kafka Python client
+------------------------
+.. image:: https://api.travis-ci.org/dpkp/kafka-python.png?branch=master
+    :target: https://travis-ci.org/dpkp/kafka-python
+    :alt: Build Status
+
+.. image:: https://coveralls.io/repos/dpkp/kafka-python/badge.svg?branch=master
+    :target: https://coveralls.io/r/dpkp/kafka-python?branch=master
+    :alt: Coverage Status
+
+.. image:: https://readthedocs.org/projects/kafka-python/badge/?version=latest
+    :target: http://kafka-python.readthedocs.org/en/latest/
+    :alt: Full documentation available on ReadTheDocs
+
+This module provides low-level protocol support for Apache Kafka as well as
+high-level consumer and producer classes. Request batching is supported by the
+protocol as well as broker-aware request routing. Gzip and Snappy compression
+is also supported for message sets.
+
+Coordinated Consumer Group support is under development - see Issue #38.
+
+Full documentation available on `Read the Docs <https://kafka-python.readthedocs.org/en/latest/>`_
+
+On Freenode IRC at #kafka-python, as well as #apache-kafka
+
+For general discussion of kafka-client design and implementation (not python specific),
+see https://groups.google.com/forum/#!forum/kafka-clients
+
+For information about Apache Kafka generally, see https://kafka.apache.org/
+
+License
+----------
+Apache License, v2.0. See `LICENSE <https://github.com/dpkp/kafka-python/blob/master/LICENSE>`_
+Copyright 2015, David Arthur, Dana Powers, and Contributors
+(See `AUTHORS <https://github.com/dpkp/kafka-python/blob/master/AUTHORS.md>`_
+
+Status
+----------
+The current stable version of this package is
+`0.9.5 <https://github.com/dpkp/kafka-python/releases/tag/v0.9.5>`_
+and is compatible with:
+
+Kafka broker versions
+
+- 0.9.0.0
+- 0.8.2.2
+- 0.8.2.1
+- 0.8.1.1
+- 0.8.1
+- 0.8.0
+
+Python versions
+
+- 3.5 (tested on 3.5.0)
+- 3.4 (tested on 3.4.2)
+- 3.3 (tested on 3.3.5)
+- 2.7 (tested on 2.7.9)
+- 2.6 (tested on 2.6.9)
+- pypy (tested on pypy 2.5.0 / python 2.7.8)
--- a/1
+++ b/1
@@ -1 +0,0 @@
-0.9.3
--- a/build_integration.sh
+++ b/build_integration.sh
@@ -1,7 +1,7 @@
 #!/bin/bash

 # Versions available for testing via binary distributions
-OFFICIAL_RELEASES="0.8.0 0.8.1 0.8.1.1 0.8.2.0"
+OFFICIAL_RELEASES="0.8.1.1 0.8.2.2 0.9.0.0"

 # Useful configuration vars, with sensible defaults
 if [ -z "$SCALA_VERSION" ]; then
--- a/docs/api_reference.rst
+++ b/docs/api_reference.rst
@@ -1,67 +0,0 @@
-API Reference
-=============
-
-kafka
-----
-.. automodule:: kafka.client
-   :members:
-
-.. automodule:: kafka.codec
-   :members:
-
-.. automodule:: kafka.common
-   :members:
-
-.. automodule:: kafka.conn
-   :members:
-
-.. automodule:: kafka.context
-   :members:
-
-.. automodule:: kafka.protocol
-   :members:
-
-.. automodule:: kafka.queue
-   :members:
-
-.. automodule:: kafka.util
-   :members:
-
-
-kafka.consumer
--------------
-.. automodule:: kafka.consumer.base
-   :members:
-
-.. automodule:: kafka.consumer.kafka
-   :members:
-
-.. automodule:: kafka.consumer.multiprocess
-   :members:
-
-.. automodule:: kafka.consumer.simple
-   :members:
-
-
-kafka.partitioner
-----------------
-.. automodule:: kafka.partitioner.base
-   :members:
-
-.. automodule:: kafka.partitioner.hashed
-   :members:
-
-.. automodule:: kafka.partitioner.roundrobin
-   :members:
-
-
-kafka.producer
--------------
-.. automodule:: kafka.producer.base
-   :members:
-
-.. automodule:: kafka.producer.keyed
-   :members:
-
-.. automodule:: kafka.producer.simple
-   :members:
--- a/docs/apidoc/kafka.consumer.rst
+++ b/docs/apidoc/kafka.consumer.rst
@@ -0,0 +1,46 @@
+kafka.consumer package
+======================
+
+Submodules
+----------
+
+kafka.consumer.base module
+--------------------------
+
+.. automodule:: kafka.consumer.base
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.consumer.kafka module
+---------------------------
+
+.. automodule:: kafka.consumer.kafka
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.consumer.multiprocess module
+----------------------------------
+
+.. automodule:: kafka.consumer.multiprocess
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.consumer.simple module
+----------------------------
+
+.. automodule:: kafka.consumer.simple
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: kafka.consumer
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/apidoc/kafka.partitioner.rst
+++ b/docs/apidoc/kafka.partitioner.rst
@@ -0,0 +1,38 @@
+kafka.partitioner package
+=========================
+
+Submodules
+----------
+
+kafka.partitioner.base module
+-----------------------------
+
+.. automodule:: kafka.partitioner.base
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.partitioner.hashed module
+-------------------------------
+
+.. automodule:: kafka.partitioner.hashed
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.partitioner.roundrobin module
+-----------------------------------
+
+.. automodule:: kafka.partitioner.roundrobin
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: kafka.partitioner
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/apidoc/kafka.producer.rst
+++ b/docs/apidoc/kafka.producer.rst
@@ -0,0 +1,38 @@
+kafka.producer package
+======================
+
+Submodules
+----------
+
+kafka.producer.base module
+--------------------------
+
+.. automodule:: kafka.producer.base
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.producer.keyed module
+---------------------------
+
+.. automodule:: kafka.producer.keyed
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.producer.simple module
+----------------------------
+
+.. automodule:: kafka.producer.simple
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: kafka.producer
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/apidoc/kafka.rst
+++ b/docs/apidoc/kafka.rst
@@ -0,0 +1,79 @@
+kafka package
+=============
+
+Subpackages
+-----------
+
+.. toctree::
+
+    kafka.consumer
+    kafka.partitioner
+    kafka.producer
+
+Submodules
+----------
+
+kafka.client module
+-------------------
+
+.. automodule:: kafka.client
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.codec module
+------------------
+
+.. automodule:: kafka.codec
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.common module
+-------------------
+
+.. automodule:: kafka.common
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.conn module
+-----------------
+
+.. automodule:: kafka.conn
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.context module
+--------------------
+
+.. automodule:: kafka.context
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.protocol module
+---------------------
+
+.. automodule:: kafka.protocol
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+kafka.util module
+-----------------
+
+.. automodule:: kafka.util
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: kafka
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/apidoc/modules.rst
+++ b/docs/apidoc/modules.rst
@@ -0,0 +1,7 @@
+kafka
+=====
+
+.. toctree::
+   :maxdepth: 4
+
+   kafka
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -30,6 +30,7 @@ import os
 # ones.
 extensions = [
    'sphinx.ext.autodoc',
+    'sphinx.ext.intersphinx',
    'sphinx.ext.viewcode',
    'sphinxcontrib.napoleon',
 ]
@@ -48,18 +49,17 @@ master_doc = 'index'

 # General information about the project.
 project = u'kafka-python'
-copyright = u'2015, David Arthur'
+copyright = u'2015 - David Arthur, Dana Powers, and Contributors'

 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-with open('../VERSION') as version_file:
-  version = version_file.read()
-  
+exec(open('../kafka/version.py').read())
+
 # The full version, including alpha/beta/rc tags.
-release = version
+release = __version__

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -203,7 +203,7 @@ latex_elements = {
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
  ('index', 'kafka-python.tex', u'kafka-python Documentation',
-   u'David Arthur', 'manual'),
+   u'Dana Powers', 'manual'),
 ]

 # The name of an image file (relative to this directory) to place at the top of
@@ -233,7 +233,7 @@ latex_documents = [
 # (source start file, name, description, authors, manual section).
 man_pages = [
    ('index', 'kafka-python', u'kafka-python Documentation',
-     [u'David Arthur'], 1)
+     [u'Dana Powers'], 1)
 ]

 # If true, show URL addresses after external links.
@@ -247,7 +247,7 @@ man_pages = [
 #  dir menu entry, description, category)
 texinfo_documents = [
  ('index', 'kafka-python', u'kafka-python Documentation',
-   u'David Arthur', 'kafka-python', 'One line description of project.',
+   u'Dana Powers', 'kafka-python', 'One line description of project.',
   'Miscellaneous'),
 ]

@@ -262,3 +262,10 @@ texinfo_documents = [

 # If true, do not generate a @detailmenu in the "Top" node's menu.
 #texinfo_no_detailmenu = False
+
+on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
+
+if not on_rtd:  # only import and set the theme if we're building docs locally
+    import sphinx_rtd_theme
+    html_theme = 'sphinx_rtd_theme'
+    html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,4 +1,3 @@
-
 kafka-python
 ============

@@ -7,35 +6,45 @@ high-level consumer and producer classes. Request batching is supported by the
 protocol as well as broker-aware request routing. Gzip and Snappy compression
 is also supported for message sets.

-http://kafka.apache.org/
+Coordinated Consumer Group support is under development - see Issue #38.

 On Freenode IRC at #kafka-python, as well as #apache-kafka

 For general discussion of kafka-client design and implementation (not python specific),
 see https://groups.google.com/forum/m/#!forum/kafka-clients

+For information about Apache Kafka generally, see https://kafka.apache.org/
+
 Status
 ------

-The current stable version of this package is `0.9.2 <https://github.com/mumrah/kafka-python/releases/tag/v0.9.2>`_ and is compatible with:
+The current stable version of this package is `0.9.5 <https://github.com/dpkp/kafka-python/releases/tag/v0.9.5>`_ and is compatible with:

 Kafka broker versions

-* 0.8.0
-* 0.8.1
+* 0.9.0.0
+* 0.8.2.2
+* 0.8.2.1
 * 0.8.1.1
+* 0.8.1
+* 0.8.0

 Python versions

+* 3.5 (tested on 3.5.0)
+* 3.4 (tested on 3.4.2)
+* 3.3 (tested on 3.3.5)
+* 2.7 (tested on 2.7.9)
 * 2.6 (tested on 2.6.9)
-* 2.7 (tested on 2.7.8)
-* pypy (tested on pypy 2.3.1 / python 2.7.6)
-* (Python 3.3 and 3.4 support has been added to trunk and will be available the next release)
+* pypy (tested on pypy 2.5.0 / python 2.7.8)

 License
 -------

-Copyright 2014, David Arthur under Apache License, v2.0. See `LICENSE <https://github.com/mumrah/kafka-python/blob/master/LICENSE>`_.
+Apache License, v2.0. See `LICENSE <https://github.com/dpkp/kafka-python/blob/master/LICENSE>`_.
+
+Copyright 2015, David Arthur, Dana Powers, and Contributors
+(See `AUTHORS <https://github.com/dpkp/kafka-python/blob/master/AUTHORS.md>`_).


 Contents
@@ -44,11 +53,10 @@ Contents
 .. toctree::
   :maxdepth: 2

+   usage
   install
   tests
-   usage
-   api_reference
-
+   API reference </apidoc/modules>

 Indices and tables
 ==================
@@ -56,4 +64,3 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
-
--- a/docs/install.rst
+++ b/docs/install.rst
@@ -11,7 +11,7 @@ Pip:

    pip install kafka-python

-Releases are also listed at https://github.com/mumrah/kafka-python/releases
+Releases are also listed at https://github.com/dpkp/kafka-python/releases


 Bleeding-Edge
@@ -19,21 +19,21 @@ Bleeding-Edge

 .. code:: bash

-    git clone https://github.com/mumrah/kafka-python
+    git clone https://github.com/dpkp/kafka-python
    pip install ./kafka-python

 Setuptools:

 .. code:: bash

-    git clone https://github.com/mumrah/kafka-python
+    git clone https://github.com/dpkp/kafka-python
    easy_install ./kafka-python

 Using `setup.py` directly:

 .. code:: bash

-    git clone https://github.com/mumrah/kafka-python
+    git clone https://github.com/dpkp/kafka-python
    cd kafka-python
    python setup.py install

--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,8 @@
 sphinx
 sphinxcontrib-napoleon
+sphinx_rtd_theme

 # Install kafka-python in editable mode
 # This allows the sphinx autodoc module
 # to load the Python modules and extract docstrings.
-e ..
+# -e ..
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -1,29 +1,32 @@
 Usage
 =====

-High level
----------
+SimpleProducer
+--------------

 .. code:: python

-    from kafka import KafkaClient, SimpleProducer, SimpleConsumer
+    from kafka import SimpleProducer, KafkaClient

    # To send messages synchronously
-    kafka = KafkaClient("localhost:9092")
+    kafka = KafkaClient('localhost:9092')
    producer = SimpleProducer(kafka)

-    # Note that the application is responsible for encoding messages to type str
-    producer.send_messages("my-topic", "some message")
-    producer.send_messages("my-topic", "this method", "is variadic")
+    # Note that the application is responsible for encoding messages to type bytes
+    producer.send_messages(b'my-topic', b'some message')
+    producer.send_messages(b'my-topic', b'this method', b'is variadic')

    # Send unicode message
-    producer.send_messages("my-topic", u'你怎么样?'.encode('utf-8'))
+    producer.send_messages(b'my-topic', u'你怎么样?'.encode('utf-8'))
+
+Asynchronous Mode
+-----------------
+
+.. code:: python

    # To send messages asynchronously
-    # WARNING: current implementation does not guarantee message delivery on failure!
-    # messages can get dropped! Use at your own risk! Or help us improve with a PR!
    producer = SimpleProducer(kafka, async=True)
-    producer.send_messages("my-topic", "async message")
+    producer.send_messages(b'my-topic', b'async message')

    # To wait for acknowledgements
    # ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to
@@ -32,13 +35,12 @@ High level
    #                            by all in sync replicas before sending a response
    producer = SimpleProducer(kafka, async=False,
                              req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
-                              ack_timeout=2000)
+                              ack_timeout=2000,
+                              sync_fail_on_error=False)

-    response = producer.send_messages("my-topic", "another message")
-
-    if response:
-        print(response[0].error)
-        print(response[0].offset)
+    responses = producer.send_messages(b'my-topic', b'another message')
+    for r in responses:
+        logging.info(r.offset)

    # To send messages in batch. You can use any of the available
    # producers for doing this. The following producer will collect
@@ -47,37 +49,117 @@ High level
    # Notes:
    # * If the producer dies before the messages are sent, there will be losses
    # * Call producer.stop() to send the messages and cleanup
-    producer = SimpleProducer(kafka, batch_send=True,
+    producer = SimpleProducer(kafka, async=True,
                              batch_send_every_n=20,
                              batch_send_every_t=60)

-    # To consume messages
-    consumer = SimpleConsumer(kafka, "my-group", "my-topic")
-    for message in consumer:
-        # message is raw byte string -- decode if necessary!
-        # e.g., for unicode: `message.decode('utf-8')`
-        print(message)
-
-    kafka.close()
-
-
 Keyed messages
 --------------

 .. code:: python

-    from kafka import KafkaClient, KeyedProducer, HashedPartitioner, RoundRobinPartitioner
+    from kafka import (
+        KafkaClient, KeyedProducer,
+        Murmur2Partitioner, RoundRobinPartitioner)

-    kafka = KafkaClient("localhost:9092")
+    kafka = KafkaClient('localhost:9092')

-    # HashedPartitioner is default
+    # HashedPartitioner is default (currently uses python hash())
    producer = KeyedProducer(kafka)
-    producer.send("my-topic", "key1", "some message")
-    producer.send("my-topic", "key2", "this methode")
+    producer.send_messages(b'my-topic', b'key1', b'some message')
+    producer.send_messages(b'my-topic', b'key2', b'this methode')

+    # Murmur2Partitioner attempts to mirror the java client hashing
+    producer = KeyedProducer(kafka, partitioner=Murmur2Partitioner)
+
+    # Or just produce round-robin (or just use SimpleProducer)
    producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)


+
+KafkaConsumer
+-------------
+
+.. code:: python
+
+    from kafka import KafkaConsumer
+
+    # To consume messages
+    consumer = KafkaConsumer('my-topic',
+                             group_id='my_group',
+                             bootstrap_servers=['localhost:9092'])
+    for message in consumer:
+        # message value is raw byte string -- decode if necessary!
+        # e.g., for unicode: `message.value.decode('utf-8')`
+        print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
+                                             message.offset, message.key,
+                                             message.value))
+
+
+messages (m) are namedtuples with attributes:
+
+  * `m.topic`: topic name (str)
+  * `m.partition`: partition number (int)
+  * `m.offset`: message offset on topic-partition log (int)
+  * `m.key`: key (bytes - can be None)
+  * `m.value`: message (output of deserializer_class - default is raw bytes)
+
+
+.. code:: python
+
+    from kafka import KafkaConsumer
+
+    # more advanced consumer -- multiple topics w/ auto commit offset
+    # management
+    consumer = KafkaConsumer('topic1', 'topic2',
+                             bootstrap_servers=['localhost:9092'],
+                             group_id='my_consumer_group',
+                             auto_commit_enable=True,
+                             auto_commit_interval_ms=30 * 1000,
+                             auto_offset_reset='smallest')
+
+    # Infinite iteration
+    for m in consumer:
+      do_some_work(m)
+
+      # Mark this message as fully consumed
+      # so it can be included in the next commit
+      #
+      # **messages that are not marked w/ task_done currently do not commit!
+      consumer.task_done(m)
+
+    # If auto_commit_enable is False, remember to commit() periodically
+    consumer.commit()
+
+    # Batch process interface
+    while True:
+      for m in kafka.fetch_messages():
+        process_message(m)
+        consumer.task_done(m)
+
+
+  Configuration settings can be passed to constructor,
+  otherwise defaults will be used:
+
+.. code:: python
+
+      client_id='kafka.consumer.kafka',
+      group_id=None,
+      fetch_message_max_bytes=1024*1024,
+      fetch_min_bytes=1,
+      fetch_wait_max_ms=100,
+      refresh_leader_backoff_ms=200,
+      bootstrap_servers=[],
+      socket_timeout_ms=30*1000,
+      auto_offset_reset='largest',
+      deserializer_class=lambda msg: msg,
+      auto_commit_enable=False,
+      auto_commit_interval_ms=60 * 1000,
+      consumer_timeout_ms=-1
+
+  Configuration parameters are described in more detail at
+  http://kafka.apache.org/documentation.html#highlevelconsumerapi
+
 Multiprocess consumer
 ---------------------

@@ -85,13 +167,13 @@ Multiprocess consumer

    from kafka import KafkaClient, MultiProcessConsumer

-    kafka = KafkaClient("localhost:9092")
+    kafka = KafkaClient('localhost:9092')

    # This will split the number of partitions among two processes
-    consumer = MultiProcessConsumer(kafka, "my-group", "my-topic", num_procs=2)
+    consumer = MultiProcessConsumer(kafka, b'my-group', b'my-topic', num_procs=2)

    # This will spawn processes such that each handles 2 partitions max
-    consumer = MultiProcessConsumer(kafka, "my-group", "my-topic",
+    consumer = MultiProcessConsumer(kafka, b'my-group', b'my-topic',
                                    partitions_per_proc=2)

    for message in consumer:
@@ -109,14 +191,14 @@ Low level
    from kafka.protocol import KafkaProtocol
    from kafka.common import ProduceRequest

-    kafka = KafkaClient("localhost:9092")
+    kafka = KafkaClient('localhost:9092')

-    req = ProduceRequest(topic="my-topic", partition=1,
-        messages=[create_message("some message")])
+    req = ProduceRequest(topic=b'my-topic', partition=1,
+        messages=[create_message(b'some message')])
    resps = kafka.send_produce_request(payloads=[req], fail_on_error=True)
    kafka.close()

-    resps[0].topic      # "my-topic"
+    resps[0].topic      # b'my-topic'
    resps[0].partition  # 1
    resps[0].error      # 0 (hopefully)
    resps[0].offset     # offset of the first message sent in this request
--- a/kafka/init.py
+++ b/kafka/init.py
@@ -1,10 +1,8 @@
 __title__ = 'kafka'
-# Use setuptools to get version from setup.py
-import pkg_resources
-__version__ = pkg_resources.require('kafka-python')[0].version
+from .version import __version__
 __author__ = 'David Arthur'
 __license__ = 'Apache License 2.0'
-__copyright__ = 'Copyright 2014, David Arthur under Apache License, v2.0'
+__copyright__ = 'Copyright 2015, David Arthur under Apache License, v2.0'

 from kafka.client import KafkaClient
 from kafka.conn import KafkaConnection
@@ -12,7 +10,7 @@ from kafka.protocol import (
    create_message, create_gzip_message, create_snappy_message
 )
 from kafka.producer import SimpleProducer, KeyedProducer
-from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner
+from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner
 from kafka.consumer import SimpleConsumer, MultiProcessConsumer, KafkaConsumer

 __all__ = [
--- a/kafka/client.py
+++ b/kafka/client.py
@@ -1,12 +1,11 @@
-import binascii
 import collections
 import copy
 import functools
-import itertools
 import logging
+import select
 import time
-import kafka.common

+import kafka.common
 from kafka.common import (TopicAndPartition, BrokerMetadata,
                          ConnectionError, FailedPayloadsError,
                          KafkaTimeoutError, KafkaUnavailableError,
@@ -15,24 +14,27 @@ from kafka.common import (TopicAndPartition, BrokerMetadata,

 from kafka.conn import collect_hosts, KafkaConnection, DEFAULT_SOCKET_TIMEOUT_SECONDS
 from kafka.protocol import KafkaProtocol
+from kafka.util import kafka_bytestring

-log = logging.getLogger("kafka")
+
+log = logging.getLogger(__name__)


 class KafkaClient(object):

-    CLIENT_ID = b"kafka-python"
-    ID_GEN = itertools.count()
+    CLIENT_ID = b'kafka-python'

    # NOTE: The timeout given to the client should always be greater than the
    # one passed to SimpleConsumer.get_message(), otherwise you can get a
    # socket timeout.
    def __init__(self, hosts, client_id=CLIENT_ID,
-                 timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS):
+                 timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS,
+                 correlation_id=0):
        # We need one connection to bootstrap
-        self.client_id = client_id
+        self.client_id = kafka_bytestring(client_id)
        self.timeout = timeout
        self.hosts = collect_hosts(hosts)
+        self.correlation_id = correlation_id

        # create connections only when we need them
        self.conns = {}
@@ -48,7 +50,7 @@ class KafkaClient(object):
    ##################

    def _get_conn(self, host, port):
-        "Get or create a connection to a broker using host and port"
+        """Get or create a connection to a broker using host and port"""
        host_key = (host, port)
        if host_key not in self.conns:
            self.conns[host_key] = KafkaConnection(
@@ -85,7 +87,7 @@ class KafkaClient(object):
        self.load_metadata_for_topics(topic)

        # If the partition doesn't actually exist, raise
-        if partition not in self.topic_partitions[topic]:
+        if partition not in self.topic_partitions.get(topic, []):
            raise UnknownTopicOrPartitionError(key)

        # If there's no leader for the partition, raise
@@ -96,11 +98,31 @@ class KafkaClient(object):
        # Otherwise return the BrokerMetadata
        return self.brokers[meta.leader]

+    def _get_coordinator_for_group(self, group):
+        """
+        Returns the coordinator broker for a consumer group.
+
+        ConsumerCoordinatorNotAvailableCode will be raised if the coordinator
+        does not currently exist for the group.
+
+        OffsetsLoadInProgressCode is raised if the coordinator is available
+        but is still loading offsets from the internal topic
+        """
+
+        resp = self.send_consumer_metadata_request(group)
+
+        # If there's a problem with finding the coordinator, raise the
+        # provided error
+        kafka.common.check_error(resp)
+
+        # Otherwise return the BrokerMetadata
+        return BrokerMetadata(resp.nodeId, resp.host, resp.port)
+
    def _next_id(self):
-        """
-        Generate a new correlation id
-        """
-        return next(KafkaClient.ID_GEN)
+        """Generate a new correlation id"""
+        # modulo to keep w/i int32
+        self.correlation_id = (self.correlation_id + 1) % 2**31
+        return self.correlation_id

    def _send_broker_unaware_request(self, payloads, encoder_fn, decoder_fn):
        """
@@ -109,6 +131,7 @@ class KafkaClient(object):
        """
        for (host, port) in self.hosts:
            requestId = self._next_id()
+            log.debug('Request %s: %s', requestId, payloads)
            try:
                conn = self._get_conn(host, port)
                request = encoder_fn(client_id=self.client_id,
@@ -117,13 +140,15 @@ class KafkaClient(object):

                conn.send(requestId, request)
                response = conn.recv(requestId)
-                return decoder_fn(response)
+                decoded = decoder_fn(response)
+                log.debug('Response %s: %s', requestId, decoded)
+                return decoded

            except Exception:
-                log.exception("Could not send request [%r] to server %s:%i, "
-                              "trying next server" % (requestId, host, port))
+                log.exception('Error sending request [%s] to server %s:%s, '
+                              'trying next server', requestId, host, port)

-        raise KafkaUnavailableError("All servers failed to process request")
+        raise KafkaUnavailableError('All servers failed to process request')

    def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn):
        """
@@ -134,7 +159,8 @@ class KafkaClient(object):
        Arguments:

        payloads: list of object-like entities with a topic (str) and
-            partition (int) attribute
+            partition (int) attribute; payloads with duplicate topic-partitions
+            are not supported.

        encode_fn: a method to encode the list of payloads to a request body,
            must accept client_id, correlation_id, and payloads as
@@ -148,72 +174,215 @@ class KafkaClient(object):

        List of response objects in the same order as the supplied payloads
        """
+        # encoders / decoders do not maintain ordering currently
+        # so we need to keep this so we can rebuild order before returning
+        original_ordering = [(p.topic, p.partition) for p in payloads]

        # Group the requests by topic+partition
-        original_keys = []
+        brokers_for_payloads = []
        payloads_by_broker = collections.defaultdict(list)

+        responses = {}
        for payload in payloads:
-            leader = self._get_leader_for_partition(payload.topic,
-                                                    payload.partition)
-
-            payloads_by_broker[leader].append(payload)
-            original_keys.append((payload.topic, payload.partition))
-
-        # Accumulate the responses in a dictionary
-        acc = {}
-
-        # keep a list of payloads that were failed to be sent to brokers
-        failed_payloads = []
+            try:
+                leader = self._get_leader_for_partition(payload.topic,
+                                                        payload.partition)
+                payloads_by_broker[leader].append(payload)
+                brokers_for_payloads.append(leader)
+            except KafkaUnavailableError as e:
+                log.warning('KafkaUnavailableError attempting to send request '
+                            'on topic %s partition %d', payload.topic, payload.partition)
+                topic_partition = (payload.topic, payload.partition)
+                responses[topic_partition] = FailedPayloadsError(payload)

        # For each broker, send the list of request payloads
+        # and collect the responses and errors
+        broker_failures = []
+
+        # For each KafkaConnection keep the real socket so that we can use
+        # a select to perform unblocking I/O
+        connections_by_socket = {}
        for broker, payloads in payloads_by_broker.items():
-            conn = self._get_conn(broker.host.decode('utf-8'), broker.port)
            requestId = self._next_id()
+            log.debug('Request %s to %s: %s', requestId, broker, payloads)
            request = encoder_fn(client_id=self.client_id,
                                 correlation_id=requestId, payloads=payloads)

-            failed = False
            # Send the request, recv the response
            try:
+                conn = self._get_conn(broker.host.decode('utf-8'), broker.port)
                conn.send(requestId, request)
-                if decoder_fn is None:
-                    continue
-                try:
-                    response = conn.recv(requestId)
-                except ConnectionError as e:
-                    log.warning("Could not receive response to request [%s] "
-                                "from server %s: %s", binascii.b2a_hex(request), conn, e)
-                    failed = True
+
            except ConnectionError as e:
-                log.warning("Could not send request [%s] to server %s: %s",
-                            binascii.b2a_hex(request), conn, e)
-                failed = True
+                broker_failures.append(broker)
+                log.warning('ConnectionError attempting to send request %s '
+                            'to server %s: %s', requestId, broker, e)

-            if failed:
-                failed_payloads += payloads
-                self.reset_all_metadata()
-                continue
+                for payload in payloads:
+                    topic_partition = (payload.topic, payload.partition)
+                    responses[topic_partition] = FailedPayloadsError(payload)

-            for response in decoder_fn(response):
-                acc[(response.topic, response.partition)] = response
+            # No exception, try to get response
+            else:

-        if failed_payloads:
-            raise FailedPayloadsError(failed_payloads)
+                # decoder_fn=None signal that the server is expected to not
+                # send a response.  This probably only applies to
+                # ProduceRequest w/ acks = 0
+                if decoder_fn is None:
+                    log.debug('Request %s does not expect a response '
+                              '(skipping conn.recv)', requestId)
+                    for payload in payloads:
+                        topic_partition = (payload.topic, payload.partition)
+                        responses[topic_partition] = None
+                    continue
+                else:
+                    connections_by_socket[conn.get_connected_socket()] = (conn, broker, requestId)

-        # Order the accumulated responses by the original key order
-        return (acc[k] for k in original_keys) if acc else ()
+        conn = None
+        while connections_by_socket:
+            sockets = connections_by_socket.keys()
+            rlist, _, _ = select.select(sockets, [], [], None)
+            conn, broker, requestId = connections_by_socket.pop(rlist[0])
+            try:
+                response = conn.recv(requestId)
+            except ConnectionError as e:
+                broker_failures.append(broker)
+                log.warning('ConnectionError attempting to receive a '
+                            'response to request %s from server %s: %s',
+                            requestId, broker, e)
+
+                for payload in payloads_by_broker[broker]:
+                    topic_partition = (payload.topic, payload.partition)
+                    responses[topic_partition] = FailedPayloadsError(payload)
+
+            else:
+                _resps = []
+                for payload_response in decoder_fn(response):
+                    topic_partition = (payload_response.topic,
+                                       payload_response.partition)
+                    responses[topic_partition] = payload_response
+                    _resps.append(payload_response)
+                log.debug('Response %s: %s', requestId, _resps)
+
+        # Connection errors generally mean stale metadata
+        # although sometimes it means incorrect api request
+        # Unfortunately there is no good way to tell the difference
+        # so we'll just reset metadata on all errors to be safe
+        if broker_failures:
+            self.reset_all_metadata()
+
+        # Return responses in the same order as provided
+        return [responses[tp] for tp in original_ordering]
+
+    def _send_consumer_aware_request(self, group, payloads, encoder_fn, decoder_fn):
+        """
+        Send a list of requests to the consumer coordinator for the group
+        specified using the supplied encode/decode functions. As the payloads
+        that use consumer-aware requests do not contain the group (e.g.
+        OffsetFetchRequest), all payloads must be for a single group.
+
+        Arguments:
+
+        group: the name of the consumer group (str) the payloads are for
+        payloads: list of object-like entities with topic (str) and
+            partition (int) attributes; payloads with duplicate
+            topic+partition are not supported.
+
+        encode_fn: a method to encode the list of payloads to a request body,
+            must accept client_id, correlation_id, and payloads as
+            keyword arguments
+
+        decode_fn: a method to decode a response body into response objects.
+            The response objects must be object-like and have topic
+            and partition attributes
+
+        Returns:
+
+        List of response objects in the same order as the supplied payloads
+        """
+        # encoders / decoders do not maintain ordering currently
+        # so we need to keep this so we can rebuild order before returning
+        original_ordering = [(p.topic, p.partition) for p in payloads]
+
+        broker = self._get_coordinator_for_group(group)
+
+        # Send the list of request payloads and collect the responses and
+        # errors
+        responses = {}
+        requestId = self._next_id()
+        log.debug('Request %s to %s: %s', requestId, broker, payloads)
+        request = encoder_fn(client_id=self.client_id,
+                             correlation_id=requestId, payloads=payloads)
+
+        # Send the request, recv the response
+        try:
+            conn = self._get_conn(broker.host.decode('utf-8'), broker.port)
+            conn.send(requestId, request)
+
+        except ConnectionError as e:
+            log.warning('ConnectionError attempting to send request %s '
+                        'to server %s: %s', requestId, broker, e)
+
+            for payload in payloads:
+                topic_partition = (payload.topic, payload.partition)
+                responses[topic_partition] = FailedPayloadsError(payload)
+
+        # No exception, try to get response
+        else:
+
+            # decoder_fn=None signal that the server is expected to not
+            # send a response.  This probably only applies to
+            # ProduceRequest w/ acks = 0
+            if decoder_fn is None:
+                log.debug('Request %s does not expect a response '
+                          '(skipping conn.recv)', requestId)
+                for payload in payloads:
+                    topic_partition = (payload.topic, payload.partition)
+                    responses[topic_partition] = None
+                return []
+
+            try:
+                response = conn.recv(requestId)
+            except ConnectionError as e:
+                log.warning('ConnectionError attempting to receive a '
+                            'response to request %s from server %s: %s',
+                            requestId, broker, e)
+
+                for payload in payloads:
+                    topic_partition = (payload.topic, payload.partition)
+                    responses[topic_partition] = FailedPayloadsError(payload)
+
+            else:
+                _resps = []
+                for payload_response in decoder_fn(response):
+                    topic_partition = (payload_response.topic,
+                                       payload_response.partition)
+                    responses[topic_partition] = payload_response
+                    _resps.append(payload_response)
+                log.debug('Response %s: %s', requestId, _resps)
+
+        # Return responses in the same order as provided
+        return [responses[tp] for tp in original_ordering]

    def __repr__(self):
        return '<KafkaClient client_id=%s>' % (self.client_id)

    def _raise_on_response_error(self, resp):
+
+        # Response can be an unraised exception object (FailedPayloadsError)
+        if isinstance(resp, Exception):
+            raise resp
+
+        # Or a server api error response
        try:
            kafka.common.check_error(resp)
        except (UnknownTopicOrPartitionError, NotLeaderForPartitionError):
            self.reset_topic_metadata(resp.topic)
            raise

+        # Return False if no error to enable list comprehensions
+        return False
+
    #################
    #   Public API  #
    #################
@@ -223,8 +392,11 @@ class KafkaClient(object):

    def copy(self):
        """
-        Create an inactive copy of the client object
-        A reinit() has to be done on the copy before it can be used again
+        Create an inactive copy of the client object, suitable for passing
+        to a separate thread.
+
+        Note that the copied connections are not initialized, so reinit() must
+        be called on the returned copy.
        """
        c = copy.deepcopy(self)
        for key in c.conns:
@@ -237,38 +409,40 @@ class KafkaClient(object):

    def reset_topic_metadata(self, *topics):
        for topic in topics:
-            try:
-                partitions = self.topic_partitions[topic]
-            except KeyError:
-                continue
-
-            for partition in partitions:
-                self.topics_to_brokers.pop(TopicAndPartition(topic, partition), None)
-
-            del self.topic_partitions[topic]
+            for topic_partition in list(self.topics_to_brokers.keys()):
+                if topic_partition.topic == topic:
+                    del self.topics_to_brokers[topic_partition]
+            if topic in self.topic_partitions:
+                del self.topic_partitions[topic]

    def reset_all_metadata(self):
        self.topics_to_brokers.clear()
        self.topic_partitions.clear()

    def has_metadata_for_topic(self, topic):
+        topic = kafka_bytestring(topic)
        return (
          topic in self.topic_partitions
          and len(self.topic_partitions[topic]) > 0
        )

    def get_partition_ids_for_topic(self, topic):
+        topic = kafka_bytestring(topic)
        if topic not in self.topic_partitions:
-            return None
+            return []

-        return list(self.topic_partitions[topic])
+        return sorted(list(self.topic_partitions[topic]))
+
+    @property
+    def topics(self):
+        return list(self.topic_partitions.keys())

    def ensure_topic_exists(self, topic, timeout = 30):
        start_time = time.time()

        while not self.has_metadata_for_topic(topic):
            if time.time() > start_time + timeout:
-                raise KafkaTimeoutError("Unable to create topic {0}".format(topic))
+                raise KafkaTimeoutError('Unable to create topic {0}'.format(topic))
            try:
                self.load_metadata_for_topics(topic)
            except LeaderNotAvailableError:
@@ -306,10 +480,18 @@ class KafkaClient(object):
        Partition-level errors will also not be raised here
        (a single partition w/o a leader, for example)
        """
+        topics = [kafka_bytestring(t) for t in topics]
+
+        if topics:
+            for topic in topics:
+                self.reset_topic_metadata(topic)
+        else:
+            self.reset_all_metadata()
+
        resp = self.send_metadata_request(topics)

-        log.debug("Broker metadata: %s", resp.brokers)
-        log.debug("Topic metadata: %s", resp.topics)
+        log.debug('Updating broker metadata: %s', resp.brokers)
+        log.debug('Updating topic metadata: %s', resp.topics)

        self.brokers = dict([(broker.nodeId, broker)
                             for broker in resp.brokers])
@@ -318,8 +500,6 @@ class KafkaClient(object):
            topic = topic_metadata.topic
            partitions = topic_metadata.partitions

-            self.reset_topic_metadata(topic)
-
            # Errors expected for new topics
            try:
                kafka.common.check_error(topic_metadata)
@@ -330,7 +510,7 @@ class KafkaClient(object):
                    raise

                # Otherwise, just log a warning
-                log.error("Error loading topic metadata for %s: %s", topic, type(e))
+                log.error('Error loading topic metadata for %s: %s', topic, type(e))
                continue

            self.topic_partitions[topic] = {}
@@ -356,7 +536,7 @@ class KafkaClient(object):
                # this error code is provided for admin purposes only
                # we never talk to replicas, only the leader
                except ReplicaNotAvailableError:
-                    log.warning('Some (non-leader) replicas not available for topic %s partition %d', topic, partition)
+                    log.debug('Some (non-leader) replicas not available for topic %s partition %d', topic, partition)

                # If Known Broker, topic_partition -> BrokerMetadata
                if leader in self.brokers:
@@ -371,12 +551,18 @@ class KafkaClient(object):

    def send_metadata_request(self, payloads=[], fail_on_error=True,
                              callback=None):
-
        encoder = KafkaProtocol.encode_metadata_request
        decoder = KafkaProtocol.decode_metadata_response

        return self._send_broker_unaware_request(payloads, encoder, decoder)

+    def send_consumer_metadata_request(self, payloads=[], fail_on_error=True,
+                                       callback=None):
+        encoder = KafkaProtocol.encode_consumer_metadata_request
+        decoder = KafkaProtocol.decode_consumer_metadata_response
+
+        return self._send_broker_unaware_request(payloads, encoder, decoder)
+
    def send_produce_request(self, payloads=[], acks=1, timeout=1000,
                             fail_on_error=True, callback=None):
        """
@@ -387,14 +573,27 @@ class KafkaClient(object):
        same order as the list of payloads specified

        Arguments:
-            payloads: list of ProduceRequest
-            fail_on_error: boolean, should we raise an Exception if we
-                           encounter an API error?
-            callback: function, instead of returning the ProduceResponse,
-                      first pass it through this function
+            payloads (list of ProduceRequest): produce requests to send to kafka
+                ProduceRequest payloads must not contain duplicates for any
+                topic-partition.
+            acks (int, optional): how many acks the servers should receive from replica
+                brokers before responding to the request. If it is 0, the server
+                will not send any response. If it is 1, the server will wait
+                until the data is written to the local log before sending a
+                response.  If it is -1, the server will wait until the message
+                is committed by all in-sync replicas before sending a response.
+                For any value > 1, the server will wait for this number of acks to
+                occur (but the server will never wait for more acknowledgements than
+                there are in-sync replicas). defaults to 1.
+            timeout (int, optional): maximum time in milliseconds the server can
+                await the receipt of the number of acks, defaults to 1000.
+            fail_on_error (bool, optional): raise exceptions on connection and
+                server response errors, defaults to True.
+            callback (function, optional): instead of returning the ProduceResponse,
+                first pass it through this function, defaults to None.

        Returns:
-            list of ProduceResponse or callback(ProduceResponse), in the
+            list of ProduceResponses, or callback results if supplied, in the
            order of input payloads
        """

@@ -410,16 +609,9 @@ class KafkaClient(object):

        resps = self._send_broker_aware_request(payloads, encoder, decoder)

-        out = []
-        for resp in resps:
-            if fail_on_error is True:
-                self._raise_on_response_error(resp)
-
-            if callback is not None:
-                out.append(callback(resp))
-            else:
-                out.append(resp)
-        return out
+        return [resp if not callback else callback(resp) for resp in resps
+                if resp is not None and
+                (not fail_on_error or not self._raise_on_response_error(resp))]

    def send_fetch_request(self, payloads=[], fail_on_error=True,
                           callback=None, max_wait_time=100, min_bytes=4096):
@@ -438,16 +630,8 @@ class KafkaClient(object):
            payloads, encoder,
            KafkaProtocol.decode_fetch_response)

-        out = []
-        for resp in resps:
-            if fail_on_error is True:
-                self._raise_on_response_error(resp)
-
-            if callback is not None:
-                out.append(callback(resp))
-            else:
-                out.append(resp)
-        return out
+        return [resp if not callback else callback(resp) for resp in resps
+                if not fail_on_error or not self._raise_on_response_error(resp)]

    def send_offset_request(self, payloads=[], fail_on_error=True,
                            callback=None):
@@ -456,15 +640,8 @@ class KafkaClient(object):
            KafkaProtocol.encode_offset_request,
            KafkaProtocol.decode_offset_response)

-        out = []
-        for resp in resps:
-            if fail_on_error is True:
-                self._raise_on_response_error(resp)
-            if callback is not None:
-                out.append(callback(resp))
-            else:
-                out.append(resp)
-        return out
+        return [resp if not callback else callback(resp) for resp in resps
+                if not fail_on_error or not self._raise_on_response_error(resp)]

    def send_offset_commit_request(self, group, payloads=[],
                                   fail_on_error=True, callback=None):
@@ -473,16 +650,8 @@ class KafkaClient(object):
        decoder = KafkaProtocol.decode_offset_commit_response
        resps = self._send_broker_aware_request(payloads, encoder, decoder)

-        out = []
-        for resp in resps:
-            if fail_on_error is True:
-                self._raise_on_response_error(resp)
-
-            if callback is not None:
-                out.append(callback(resp))
-            else:
-                out.append(resp)
-        return out
+        return [resp if not callback else callback(resp) for resp in resps
+                if not fail_on_error or not self._raise_on_response_error(resp)]

    def send_offset_fetch_request(self, group, payloads=[],
                                  fail_on_error=True, callback=None):
@@ -492,12 +661,16 @@ class KafkaClient(object):
        decoder = KafkaProtocol.decode_offset_fetch_response
        resps = self._send_broker_aware_request(payloads, encoder, decoder)

-        out = []
-        for resp in resps:
-            if fail_on_error is True:
-                self._raise_on_response_error(resp)
-            if callback is not None:
-                out.append(callback(resp))
-            else:
-                out.append(resp)
-        return out
+        return [resp if not callback else callback(resp) for resp in resps
+                if not fail_on_error or not self._raise_on_response_error(resp)]
+
+    def send_offset_fetch_request_kafka(self, group, payloads=[],
+                                  fail_on_error=True, callback=None):
+
+        encoder = functools.partial(KafkaProtocol.encode_offset_fetch_request,
+                          group=group, from_kafka=True)
+        decoder = KafkaProtocol.decode_offset_fetch_response
+        resps = self._send_consumer_aware_request(group, payloads, encoder, decoder)
+
+        return [resp if not callback else callback(resp) for resp in resps
+                if not fail_on_error or not self._raise_on_response_error(resp)]
--- a/kafka/codec.py
+++ b/kafka/codec.py
@@ -1,8 +1,7 @@
-from io import BytesIO
 import gzip
+from io import BytesIO
 import struct

-import six
 from six.moves import xrange

 _XERIAL_V1_HEADER = (-126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 1, 1)
@@ -10,9 +9,9 @@ _XERIAL_V1_FORMAT = 'bccccccBii'

 try:
    import snappy
-    _has_snappy = True
+    _HAS_SNAPPY = True
 except ImportError:
-    _has_snappy = False
+    _HAS_SNAPPY = False


 def has_gzip():
@@ -20,26 +19,39 @@ def has_gzip():


 def has_snappy():
-    return _has_snappy
+    return _HAS_SNAPPY


-def gzip_encode(payload):
-    buffer = BytesIO()
-    handle = gzip.GzipFile(fileobj=buffer, mode="w")
-    handle.write(payload)
-    handle.close()
-    buffer.seek(0)
-    result = buffer.read()
-    buffer.close()
+def gzip_encode(payload, compresslevel=None):
+    if not compresslevel:
+        compresslevel = 9
+
+    with BytesIO() as buf:
+
+        # Gzip context manager introduced in python 2.6
+        # so old-fashioned way until we decide to not support 2.6
+        gzipper = gzip.GzipFile(fileobj=buf, mode="w", compresslevel=compresslevel)
+        try:
+            gzipper.write(payload)
+        finally:
+            gzipper.close()
+
+        result = buf.getvalue()
+
    return result


 def gzip_decode(payload):
-    buffer = BytesIO(payload)
-    handle = gzip.GzipFile(fileobj=buffer, mode='r')
-    result = handle.read()
-    handle.close()
-    buffer.close()
+    with BytesIO(payload) as buf:
+
+        # Gzip context manager introduced in python 2.6
+        # so old-fashioned way until we decide to not support 2.6
+        gzipper = gzip.GzipFile(fileobj=buf, mode='r')
+        try:
+            result = gzipper.read()
+        finally:
+            gzipper.close()
+
    return result


@@ -47,8 +59,8 @@ def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024):
    """Encodes the given data with snappy if xerial_compatible is set then the
       stream is encoded in a fashion compatible with the xerial snappy library

-       The block size (xerial_blocksize) controls how frequent the blocking occurs
-       32k is the default in the xerial library.
+       The block size (xerial_blocksize) controls how frequent the blocking
+       occurs 32k is the default in the xerial library.

       The format winds up being
        +-------------+------------+--------------+------------+--------------+
@@ -63,7 +75,7 @@ def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024):
        length will always be <= blocksize.
    """

-    if not _has_snappy:
+    if not has_snappy():
        raise NotImplementedError("Snappy codec is not available")

    if xerial_compatible:
@@ -74,7 +86,7 @@ def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024):
        out = BytesIO()

        header = b''.join([struct.pack('!' + fmt, dat) for fmt, dat
-            in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER)])
+                           in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER)])

        out.write(header)
        for chunk in _chunker():
@@ -113,13 +125,13 @@ def _detect_xerial_stream(payload):
    """

    if len(payload) > 16:
-        header = header = struct.unpack('!' + _XERIAL_V1_FORMAT, bytes(payload)[:16])
+        header = struct.unpack('!' + _XERIAL_V1_FORMAT, bytes(payload)[:16])
        return header == _XERIAL_V1_HEADER
    return False


 def snappy_decode(payload):
-    if not _has_snappy:
+    if not has_snappy():
        raise NotImplementedError("Snappy codec is not available")

    if _detect_xerial_stream(payload):
--- a/kafka/common.py
+++ b/kafka/common.py
@@ -6,43 +6,53 @@ from collections import namedtuple
 #   Structs   #
 ###############

-# Request payloads
-ProduceRequest = namedtuple("ProduceRequest",
-                            ["topic", "partition", "messages"])
-
-FetchRequest = namedtuple("FetchRequest",
-                          ["topic", "partition", "offset", "max_bytes"])
-
-OffsetRequest = namedtuple("OffsetRequest",
-                           ["topic", "partition", "time", "max_offsets"])
-
-OffsetCommitRequest = namedtuple("OffsetCommitRequest",
-                                 ["topic", "partition", "offset", "metadata"])
-
+# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-MetadataAPI
 MetadataRequest = namedtuple("MetadataRequest",
    ["topics"])

-OffsetFetchRequest = namedtuple("OffsetFetchRequest", ["topic", "partition"])
-
 MetadataResponse = namedtuple("MetadataResponse",
    ["brokers", "topics"])

-# Response payloads
-ProduceResponse = namedtuple("ProduceResponse",
-                             ["topic", "partition", "error", "offset"])
+# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ConsumerMetadataRequest
+ConsumerMetadataRequest = namedtuple("ConsumerMetadataRequest",
+    ["groups"])

-FetchResponse = namedtuple("FetchResponse", ["topic", "partition", "error",
-                                             "highwaterMark", "messages"])
+ConsumerMetadataResponse = namedtuple("ConsumerMetadataResponse",
+    ["error", "nodeId", "host", "port"])
+
+# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProduceAPI
+ProduceRequest = namedtuple("ProduceRequest",
+    ["topic", "partition", "messages"])
+
+ProduceResponse = namedtuple("ProduceResponse",
+    ["topic", "partition", "error", "offset"])
+
+# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-FetchAPI
+FetchRequest = namedtuple("FetchRequest",
+    ["topic", "partition", "offset", "max_bytes"])
+
+FetchResponse = namedtuple("FetchResponse",
+    ["topic", "partition", "error", "highwaterMark", "messages"])
+
+# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI
+OffsetRequest = namedtuple("OffsetRequest",
+    ["topic", "partition", "time", "max_offsets"])

 OffsetResponse = namedtuple("OffsetResponse",
-                            ["topic", "partition", "error", "offsets"])
+    ["topic", "partition", "error", "offsets"])
+
+# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
+OffsetCommitRequest = namedtuple("OffsetCommitRequest",
+    ["topic", "partition", "offset", "metadata"])

 OffsetCommitResponse = namedtuple("OffsetCommitResponse",
-                                  ["topic", "partition", "error"])
+    ["topic", "partition", "error"])
+
+OffsetFetchRequest = namedtuple("OffsetFetchRequest",
+    ["topic", "partition"])

 OffsetFetchResponse = namedtuple("OffsetFetchResponse",
-                                 ["topic", "partition", "offset",
-                                  "metadata", "error"])
+    ["topic", "partition", "offset", "metadata", "error"])



@@ -68,6 +78,11 @@ TopicAndPartition = namedtuple("TopicAndPartition",
 KafkaMessage = namedtuple("KafkaMessage",
    ["topic", "partition", "offset", "key", "value"])

+# Define retry policy for async producer
+# Limit value: int >= 0, 0 means no retries
+RetryOptions = namedtuple("RetryOptions",
+    ["limit", "backoff_ms", "retry_on_timeouts"])
+

 #################
 #   Exceptions  #
@@ -152,6 +167,21 @@ class StaleLeaderEpochCodeError(BrokerResponseError):
    message = 'STALE_LEADER_EPOCH_CODE'


+class OffsetsLoadInProgressCode(BrokerResponseError):
+    errno = 14
+    message = 'OFFSETS_LOAD_IN_PROGRESS_CODE'
+
+
+class ConsumerCoordinatorNotAvailableCode(BrokerResponseError):
+    errno = 15
+    message = 'CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE'
+
+
+class NotCoordinatorForConsumerCode(BrokerResponseError):
+    errno = 16
+    message = 'NOT_COORDINATOR_FOR_CONSUMER_CODE'
+
+
 class KafkaUnavailableError(KafkaError):
    pass

@@ -161,7 +191,9 @@ class KafkaTimeoutError(KafkaError):


 class FailedPayloadsError(KafkaError):
-    pass
+    def __init__(self, payload, *args):
+        super(FailedPayloadsError, self).__init__(*args)
+        self.payload = payload


 class ConnectionError(KafkaError):
@@ -200,6 +232,12 @@ class KafkaConfigurationError(KafkaError):
    pass


+class AsyncProducerQueueFull(KafkaError):
+    def __init__(self, failed_msgs, *args):
+        super(AsyncProducerQueueFull, self).__init__(*args)
+        self.failed_msgs = failed_msgs
+
+
 def _iter_broker_errors():
    for name, obj in inspect.getmembers(sys.modules[__name__]):
        if inspect.isclass(obj) and issubclass(obj, BrokerResponseError) and obj != BrokerResponseError:
@@ -210,6 +248,23 @@ kafka_errors = dict([(x.errno, x) for x in _iter_broker_errors()])


 def check_error(response):
+    if isinstance(response, Exception):
+        raise response
    if response.error:
        error_class = kafka_errors.get(response.error, UnknownError)
        raise error_class(response)
+
+
+RETRY_BACKOFF_ERROR_TYPES = (
+    KafkaUnavailableError, LeaderNotAvailableError,
+    ConnectionError, FailedPayloadsError
+)
+
+
+RETRY_REFRESH_ERROR_TYPES = (
+    NotLeaderForPartitionError, UnknownTopicOrPartitionError,
+    LeaderNotAvailableError, ConnectionError
+)
+
+
+RETRY_ERROR_TYPES = RETRY_BACKOFF_ERROR_TYPES + RETRY_REFRESH_ERROR_TYPES
--- a/kafka/conn.py
+++ b/kafka/conn.py
@@ -9,7 +9,8 @@ import six

 from kafka.common import ConnectionError

-log = logging.getLogger("kafka")
+
+log = logging.getLogger(__name__)

 DEFAULT_SOCKET_TIMEOUT_SECONDS = 120
 DEFAULT_KAFKA_PORT = 9092
@@ -62,6 +63,9 @@ class KafkaConnection(local):

        self.reinit()

+    def __getnewargs__(self):
+        return (self.host, self.port, self.timeout)
+
    def __repr__(self):
        return "<KafkaConnection host=%s port=%d>" % (self.host, self.port)

@@ -114,6 +118,11 @@ class KafkaConnection(local):

    # TODO multiplex socket communication to allow for multi-threaded clients

+    def get_connected_socket(self):
+        if not self._sock:
+            self.reinit()
+        return self._sock
+
    def send(self, request_id, payload):
        """
        Send a request to Kafka
@@ -147,6 +156,10 @@ class KafkaConnection(local):
        """
        log.debug("Reading response %d from Kafka" % request_id)

+        # Make sure we have a connection
+        if not self._sock:
+            self.reinit()
+
        # Read the size off of the header
        resp = self._read_bytes(4)
        (size,) = struct.unpack('>i', resp)
@@ -157,9 +170,11 @@ class KafkaConnection(local):

    def copy(self):
        """
-        Create an inactive copy of the connection object
-        A reinit() has to be done on the copy before it can be used again
-        return a new KafkaConnection object
+        Create an inactive copy of the connection object, suitable for
+        passing to a background thread.
+
+        The returned copy is not connected; you must call reinit() before
+        using.
        """
        c = copy.deepcopy(self)
        # Python 3 doesn't copy custom attributes of the threadlocal subclass
--- a/kafka/consumer/base.py
+++ b/kafka/consumer/base.py
@@ -1,5 +1,6 @@
 from __future__ import absolute_import

+import atexit
 import logging
 import numbers
 from threading import Lock
@@ -7,12 +8,13 @@ from threading import Lock
 import kafka.common
 from kafka.common import (
    OffsetRequest, OffsetCommitRequest, OffsetFetchRequest,
-    UnknownTopicOrPartitionError
+    UnknownTopicOrPartitionError, check_error, KafkaError
 )

-from kafka.util import ReentrantTimer
+from kafka.util import kafka_bytestring, ReentrantTimer

-log = logging.getLogger("kafka")
+
+log = logging.getLogger('kafka.consumer')

 AUTO_COMMIT_MSG_COUNT = 100
 AUTO_COMMIT_INTERVAL = 5000
@@ -25,7 +27,9 @@ MAX_FETCH_BUFFER_SIZE_BYTES = FETCH_BUFFER_SIZE_BYTES * 8

 ITER_TIMEOUT_SECONDS = 60
 NO_MESSAGES_WAIT_TIME_SECONDS = 0.1
+FULL_QUEUE_WAIT_TIME_SECONDS = 0.1

+MAX_BACKOFF_SECONDS = 60

 class Consumer(object):
    """
@@ -43,12 +47,12 @@ class Consumer(object):
                 auto_commit_every_t=AUTO_COMMIT_INTERVAL):

        self.client = client
-        self.topic = topic
-        self.group = group
+        self.topic = kafka_bytestring(topic)
+        self.group = None if group is None else kafka_bytestring(group)
        self.client.load_metadata_for_topics(topic)
        self.offsets = {}

-        if not partitions:
+        if partitions is None:
            partitions = self.client.get_partition_ids_for_topic(topic)
        else:
            assert all(isinstance(x, numbers.Integral) for x in partitions)
@@ -67,37 +71,65 @@ class Consumer(object):
                                               self.commit)
            self.commit_timer.start()

-        if auto_commit:
+        # Set initial offsets
+        if self.group is not None:
            self.fetch_last_known_offsets(partitions)
        else:
            for partition in partitions:
                self.offsets[partition] = 0

+        # Register a cleanup handler
+        def cleanup(obj):
+            obj.stop()
+        self._cleanup_func = cleanup
+        atexit.register(cleanup, self)
+
+        self.partition_info = False     # Do not return partition info in msgs
+
+    def provide_partition_info(self):
+        """
+        Indicates that partition info must be returned by the consumer
+        """
+        self.partition_info = True
+
    def fetch_last_known_offsets(self, partitions=None):
-        if not partitions:
+        if self.group is None:
+            raise ValueError('KafkaClient.group must not be None')
+
+        if partitions is None:
            partitions = self.client.get_partition_ids_for_topic(self.topic)

-        def get_or_init_offset(resp):
-            try:
-                kafka.common.check_error(resp)
-                return resp.offset
-            except UnknownTopicOrPartitionError:
-                return 0
+        responses = self.client.send_offset_fetch_request(
+            self.group,
+            [OffsetFetchRequest(self.topic, p) for p in partitions],
+            fail_on_error=False
+        )

-        for partition in partitions:
-            req = OffsetFetchRequest(self.topic, partition)
-            (resp,) = self.client.send_offset_fetch_request(self.group, [req],
-                          fail_on_error=False)
-            self.offsets[partition] = get_or_init_offset(resp)
-        self.fetch_offsets = self.offsets.copy()
+        for resp in responses:
+            try:
+                check_error(resp)
+            # API spec says server wont set an error here
+            # but 0.8.1.1 does actually...
+            except UnknownTopicOrPartitionError:
+                pass
+
+            # -1 offset signals no commit is currently stored
+            if resp.offset == -1:
+                self.offsets[resp.partition] = 0
+
+            # Otherwise we committed the stored offset
+            # and need to fetch the next one
+            else:
+                self.offsets[resp.partition] = resp.offset

    def commit(self, partitions=None):
-        """
-        Commit offsets for this consumer
+        """Commit stored offsets to Kafka via OffsetCommitRequest (v0)

        Keyword Arguments:
            partitions (list): list of partitions to commit, default is to commit
                all of them
+
+        Returns: True on success, False on failure
        """

        # short circuit if nothing happened. This check is kept outside
@@ -112,23 +144,28 @@ class Consumer(object):
                return

            reqs = []
-            if not partitions:  # commit all partitions
-                partitions = self.offsets.keys()
+            if partitions is None:  # commit all partitions
+                partitions = list(self.offsets.keys())

+            log.debug('Committing new offsets for %s, partitions %s',
+                     self.topic, partitions)
            for partition in partitions:
                offset = self.offsets[partition]
-                log.debug("Commit offset %d in SimpleConsumer: "
-                          "group=%s, topic=%s, partition=%s" %
-                          (offset, self.group, self.topic, partition))
+                log.debug('Commit offset %d in SimpleConsumer: '
+                          'group=%s, topic=%s, partition=%s',
+                          offset, self.group, self.topic, partition)

                reqs.append(OffsetCommitRequest(self.topic, partition,
                                                offset, None))

-            resps = self.client.send_offset_commit_request(self.group, reqs)
-            for resp in resps:
-                kafka.common.check_error(resp)
-
-            self.count_since_commit = 0
+            try:
+                self.client.send_offset_commit_request(self.group, reqs)
+            except KafkaError as e:
+                log.error('%s saving offsets: %s', e.__class__.__name__, e)
+                return False
+            else:
+                self.count_since_commit = 0
+                return True

    def _auto_commit(self):
        """
@@ -147,6 +184,25 @@ class Consumer(object):
            self.commit_timer.stop()
            self.commit()

+        if hasattr(self, '_cleanup_func'):
+            # Remove cleanup handler now that we've stopped
+
+            # py3 supports unregistering
+            if hasattr(atexit, 'unregister'):
+                atexit.unregister(self._cleanup_func) # pylint: disable=no-member
+
+            # py2 requires removing from private attribute...
+            else:
+
+                # ValueError on list.remove() if the exithandler no longer
+                # exists is fine here
+                try:
+                    atexit._exithandlers.remove((self._cleanup_func, (self,), {}))
+                except ValueError:
+                    pass
+
+            del self._cleanup_func
+
    def pending(self, partitions=None):
        """
        Gets the pending message count
@@ -154,7 +210,7 @@ class Consumer(object):
        Keyword Arguments:
            partitions (list): list of partitions to check for, default is to check all
        """
-        if not partitions:
+        if partitions is None:
            partitions = self.offsets.keys()

        total = 0
--- a/kafka/consumer/kafka.py
+++ b/kafka/consumer/kafka.py
@@ -25,7 +25,7 @@ OffsetsStruct = namedtuple("OffsetsStruct", ["fetch", "highwater", "commit", "ta
 DEFAULT_CONSUMER_CONFIG = {
    'client_id': __name__,
    'group_id': None,
-    'metadata_broker_list': None,
+    'bootstrap_servers': [],
    'socket_timeout_ms': 30 * 1000,
    'fetch_message_max_bytes': 1024 * 1024,
    'auto_offset_reset': 'largest',
@@ -47,150 +47,100 @@ DEFAULT_CONSUMER_CONFIG = {
    'rebalance_backoff_ms': 2000,
 }

-BYTES_CONFIGURATION_KEYS = ('client_id', 'group_id')
-
+DEPRECATED_CONFIG_KEYS = {
+    'metadata_broker_list': 'bootstrap_servers',
+}

 class KafkaConsumer(object):
-    """
-    A simpler kafka consumer
-
-    .. code:: python
-
-        # A very basic 'tail' consumer, with no stored offset management
-        kafka = KafkaConsumer('topic1')
-        for m in kafka:
-          print m
-
-        # Alternate interface: next()
-        print kafka.next()
-
-        # Alternate interface: batch iteration
-        while True:
-          for m in kafka.fetch_messages():
-            print m
-          print "Done with batch - let's do another!"
-
-
-    .. code:: python
-
-        # more advanced consumer -- multiple topics w/ auto commit offset management
-        kafka = KafkaConsumer('topic1', 'topic2',
-                              group_id='my_consumer_group',
-                              auto_commit_enable=True,
-                              auto_commit_interval_ms=30 * 1000,
-                              auto_offset_reset='smallest')
-
-        # Infinite iteration
-        for m in kafka:
-          process_message(m)
-          kafka.task_done(m)
-
-        # Alternate interface: next()
-        m = kafka.next()
-        process_message(m)
-        kafka.task_done(m)
-
-        # If auto_commit_enable is False, remember to commit() periodically
-        kafka.commit()
-
-        # Batch process interface
-        while True:
-          for m in kafka.fetch_messages():
-            process_message(m)
-            kafka.task_done(m)
-
-
-    messages (m) are namedtuples with attributes:
-
-      * `m.topic`: topic name (str)
-      * `m.partition`: partition number (int)
-      * `m.offset`: message offset on topic-partition log (int)
-      * `m.key`: key (bytes - can be None)
-      * `m.value`: message (output of deserializer_class - default is raw bytes)
-
-    Configuration settings can be passed to constructor,
-    otherwise defaults will be used:
-
-    .. code:: python
-
-        client_id='kafka.consumer.kafka',
-        group_id=None,
-        fetch_message_max_bytes=1024*1024,
-        fetch_min_bytes=1,
-        fetch_wait_max_ms=100,
-        refresh_leader_backoff_ms=200,
-        metadata_broker_list=None,
-        socket_timeout_ms=30*1000,
-        auto_offset_reset='largest',
-        deserializer_class=lambda msg: msg,
-        auto_commit_enable=False,
-        auto_commit_interval_ms=60 * 1000,
-        consumer_timeout_ms=-1
-
-    Configuration parameters are described in more detail at
-    http://kafka.apache.org/documentation.html#highlevelconsumerapi
-    """
+    """A simpler kafka consumer"""
+    DEFAULT_CONFIG = deepcopy(DEFAULT_CONSUMER_CONFIG)

    def __init__(self, *topics, **configs):
        self.configure(**configs)
        self.set_topic_partitions(*topics)

    def configure(self, **configs):
-        """
+        """Configure the consumer instance
+
        Configuration settings can be passed to constructor,
        otherwise defaults will be used:

-        .. code:: python
-
-            client_id='kafka.consumer.kafka',
-            group_id=None,
-            fetch_message_max_bytes=1024*1024,
-            fetch_min_bytes=1,
-            fetch_wait_max_ms=100,
-            refresh_leader_backoff_ms=200,
-            metadata_broker_list=None,
-            socket_timeout_ms=30*1000,
-            auto_offset_reset='largest',
-            deserializer_class=lambda msg: msg,
-            auto_commit_enable=False,
-            auto_commit_interval_ms=60 * 1000,
-            auto_commit_interval_messages=None,
-            consumer_timeout_ms=-1
+        Keyword Arguments:
+            bootstrap_servers (list): List of initial broker nodes the consumer
+                should contact to bootstrap initial cluster metadata.  This does
+                not have to be the full node list.  It just needs to have at
+                least one broker that will respond to a Metadata API Request.
+            client_id (str): a unique name for this client.  Defaults to
+                'kafka.consumer.kafka'.
+            group_id (str): the name of the consumer group to join,
+                Offsets are fetched / committed to this group name.
+            fetch_message_max_bytes (int, optional): Maximum bytes for each
+                topic/partition fetch request.  Defaults to 1024*1024.
+            fetch_min_bytes (int, optional): Minimum amount of data the server
+                should return for a fetch request, otherwise wait up to
+                fetch_wait_max_ms for more data to accumulate.  Defaults to 1.
+            fetch_wait_max_ms (int, optional): Maximum time for the server to
+                block waiting for fetch_min_bytes messages to accumulate.
+                Defaults to 100.
+            refresh_leader_backoff_ms (int, optional): Milliseconds to backoff
+                when refreshing metadata on errors (subject to random jitter).
+                Defaults to 200.
+            socket_timeout_ms (int, optional): TCP socket timeout in
+                milliseconds.  Defaults to 30*1000.
+            auto_offset_reset (str, optional): A policy for resetting offsets on
+                OffsetOutOfRange errors. 'smallest' will move to the oldest
+                available message, 'largest' will move to the most recent.  Any
+                ofther value will raise the exception.  Defaults to 'largest'.
+            deserializer_class (callable, optional):  Any callable that takes a
+                raw message value and returns a deserialized value.  Defaults to
+                 lambda msg: msg.
+            auto_commit_enable (bool, optional): Enabling auto-commit will cause
+                the KafkaConsumer to periodically commit offsets without an
+                explicit call to commit().  Defaults to False.
+            auto_commit_interval_ms (int, optional):  If auto_commit_enabled,
+                the milliseconds between automatic offset commits.  Defaults to
+                60 * 1000.
+            auto_commit_interval_messages (int, optional): If
+                auto_commit_enabled, a number of messages consumed between
+                automatic offset commits.  Defaults to None (disabled).
+            consumer_timeout_ms (int, optional): number of millisecond to throw
+                a timeout exception to the consumer if no message is available
+                for consumption.  Defaults to -1 (dont throw exception).

        Configuration parameters are described in more detail at
        http://kafka.apache.org/documentation.html#highlevelconsumerapi
        """
+        configs = self._deprecate_configs(**configs)
        self._config = {}
-        for key in DEFAULT_CONSUMER_CONFIG:
-            self._config[key] = configs.pop(key, DEFAULT_CONSUMER_CONFIG[key])
+        for key in self.DEFAULT_CONFIG:
+            self._config[key] = configs.pop(key, self.DEFAULT_CONFIG[key])

        if configs:
            raise KafkaConfigurationError('Unknown configuration key(s): ' +
                                          str(list(configs.keys())))

-        # Handle str/bytes conversions
-        for config_key in BYTES_CONFIGURATION_KEYS:
-            if isinstance(self._config[config_key], six.string_types):
-                logger.warning("Converting configuration key '%s' to bytes" %
-                               config_key)
-                self._config[config_key] = self._config[config_key].encode('utf-8')
-
        if self._config['auto_commit_enable']:
            if not self._config['group_id']:
-                raise KafkaConfigurationError('KafkaConsumer configured to auto-commit without required consumer group (group_id)')
+                raise KafkaConfigurationError(
+                    'KafkaConsumer configured to auto-commit '
+                    'without required consumer group (group_id)'
+                )

        # Check auto-commit configuration
        if self._config['auto_commit_enable']:
            logger.info("Configuring consumer to auto-commit offsets")
            self._reset_auto_commit()

-        if self._config['metadata_broker_list'] is None:
-            raise KafkaConfigurationError('metadata_broker_list required to '
-                                          'configure KafkaConsumer')
+        if not self._config['bootstrap_servers']:
+            raise KafkaConfigurationError(
+                'bootstrap_servers required to configure KafkaConsumer'
+            )

-        self._client = KafkaClient(self._config['metadata_broker_list'],
-                                   client_id=self._config['client_id'],
-                                   timeout=(self._config['socket_timeout_ms'] / 1000.0))
+        self._client = KafkaClient(
+            self._config['bootstrap_servers'],
+            client_id=self._config['client_id'],
+            timeout=(self._config['socket_timeout_ms'] / 1000.0)
+        )

    def set_topic_partitions(self, *topics):
        """
@@ -220,12 +170,12 @@ class KafkaConsumer(object):
            # Consume topic1-all; topic2-partition2; topic3-partition0
            kafka.set_topic_partitions("topic1", ("topic2", 2), {"topic3": 0})

-            # Consume topic1-0 starting at offset 123, and topic2-1 at offset 456
+            # Consume topic1-0 starting at offset 12, and topic2-1 at offset 45
            # using tuples --
-            kafka.set_topic_partitions(("topic1", 0, 123), ("topic2", 1, 456))
+            kafka.set_topic_partitions(("topic1", 0, 12), ("topic2", 1, 45))

            # using dict --
-            kafka.set_topic_partitions({ ("topic1", 0): 123, ("topic2", 1): 456 })
+            kafka.set_topic_partitions({ ("topic1", 0): 12, ("topic2", 1): 45 })

        """
        self._topics = []
@@ -251,10 +201,10 @@ class KafkaConsumer(object):
            elif isinstance(arg, tuple):
                topic = kafka_bytestring(arg[0])
                partition = arg[1]
+                self._consume_topic_partition(topic, partition)
                if len(arg) == 3:
                    offset = arg[2]
                    self._offsets.fetch[(topic, partition)] = offset
-                self._consume_topic_partition(topic, partition)

            # { topic: partitions, ... } dict
            elif isinstance(arg, dict):
@@ -273,15 +223,17 @@ class KafkaConsumer(object):
                            for partition in value:
                                self._consume_topic_partition(topic, partition)
                        else:
-                            raise KafkaConfigurationError('Unknown topic type (dict key must be '
-                                                          'int or list/tuple of ints)')
+                            raise KafkaConfigurationError(
+                                'Unknown topic type '
+                                '(dict key must be int or list/tuple of ints)'
+                            )

                    # (topic, partition): offset
                    elif isinstance(key, tuple):
                        topic = kafka_bytestring(key[0])
                        partition = key[1]
                        self._consume_topic_partition(topic, partition)
-                        self._offsets.fetch[key] = value
+                        self._offsets.fetch[(topic, partition)] = value

            else:
                raise KafkaConfigurationError('Unknown topic type (%s)' % type(arg))
@@ -317,19 +269,23 @@ class KafkaConsumer(object):
        # Reset message iterator in case we were in the middle of one
        self._reset_message_iterator()

+    def close(self):
+        """Close this consumer's underlying client."""
+        self._client.close()
+
    def next(self):
-        """
-        Return a single message from the message iterator
-        If consumer_timeout_ms is set, will raise ConsumerTimeout
-        if no message is available
-        Otherwise blocks indefinitely
+        """Return the next available message

-        Note that this is also the method called internally during iteration:
+        Blocks indefinitely unless consumer_timeout_ms > 0

-        .. code:: python
+        Returns:
+            a single KafkaMessage from the message iterator

-            for m in consumer:
-                pass
+        Raises:
+            ConsumerTimeout after consumer_timeout_ms and no message
+
+        Note:
+            This is also the method called internally during iteration

        """
        self._set_consumer_timeout_start()
@@ -345,110 +301,129 @@ class KafkaConsumer(object):
            self._check_consumer_timeout()

    def fetch_messages(self):
-        """
-        Sends FetchRequests for all topic/partitions set for consumption
-        Returns a generator that yields KafkaMessage structs
-        after deserializing with the configured `deserializer_class`
+        """Sends FetchRequests for all topic/partitions set for consumption

-        Refreshes metadata on errors, and resets fetch offset on
-        OffsetOutOfRange, per the configured `auto_offset_reset` policy
+        Returns:
+            Generator that yields KafkaMessage structs
+            after deserializing with the configured `deserializer_class`

-        Key configuration parameters:
+        Note:
+            Refreshes metadata on errors, and resets fetch offset on
+            OffsetOutOfRange, per the configured `auto_offset_reset` policy
+
+        See Also:
+            Key KafkaConsumer configuration parameters:
+            * `fetch_message_max_bytes`
+            * `fetch_max_wait_ms`
+            * `fetch_min_bytes`
+            * `deserializer_class`
+            * `auto_offset_reset`

-        * `fetch_message_max_bytes`
-        * `fetch_max_wait_ms`
-        * `fetch_min_bytes`
-        * `deserializer_class`
-        * `auto_offset_reset`
        """

        max_bytes = self._config['fetch_message_max_bytes']
        max_wait_time = self._config['fetch_wait_max_ms']
        min_bytes = self._config['fetch_min_bytes']

-        # Get current fetch offsets
-        offsets = self._offsets.fetch
-        if not offsets:
-            if not self._topics:
-                raise KafkaConfigurationError('No topics or partitions configured')
-            raise KafkaConfigurationError('No fetch offsets found when calling fetch_messages')
+        if not self._topics:
+            raise KafkaConfigurationError('No topics or partitions configured')

-        fetches = []
-        for topic_partition, offset in six.iteritems(offsets):
-            fetches.append(FetchRequest(topic_partition[0], topic_partition[1], offset, max_bytes))
+        if not self._offsets.fetch:
+            raise KafkaConfigurationError(
+                'No fetch offsets found when calling fetch_messages'
+            )

-        # client.send_fetch_request will collect topic/partition requests by leader
-        # and send each group as a single FetchRequest to the correct broker
-        try:
-            responses = self._client.send_fetch_request(fetches,
-                                                        max_wait_time=max_wait_time,
-                                                        min_bytes=min_bytes,
-                                                        fail_on_error=False)
-        except FailedPayloadsError:
-            logger.warning('FailedPayloadsError attempting to fetch data from kafka')
-            self._refresh_metadata_on_error()
-            return
+        fetches = [FetchRequest(topic, partition,
+                                self._offsets.fetch[(topic, partition)],
+                                max_bytes)
+                   for (topic, partition) in self._topics]
+
+        # send_fetch_request will batch topic/partition requests by leader
+        responses = self._client.send_fetch_request(
+            fetches,
+            max_wait_time=max_wait_time,
+            min_bytes=min_bytes,
+            fail_on_error=False
+        )

        for resp in responses:
-            topic_partition = (resp.topic, resp.partition)
+
+            if isinstance(resp, FailedPayloadsError):
+                logger.warning('FailedPayloadsError attempting to fetch data')
+                self._refresh_metadata_on_error()
+                continue
+
+            topic = kafka_bytestring(resp.topic)
+            partition = resp.partition
            try:
                check_error(resp)
            except OffsetOutOfRangeError:
-                logger.warning('OffsetOutOfRange: topic %s, partition %d, offset %d '
-                               '(Highwatermark: %d)',
-                               resp.topic, resp.partition,
-                               offsets[topic_partition], resp.highwaterMark)
+                logger.warning('OffsetOutOfRange: topic %s, partition %d, '
+                               'offset %d (Highwatermark: %d)',
+                               topic, partition,
+                               self._offsets.fetch[(topic, partition)],
+                               resp.highwaterMark)
                # Reset offset
-                self._offsets.fetch[topic_partition] = self._reset_partition_offset(topic_partition)
+                self._offsets.fetch[(topic, partition)] = (
+                    self._reset_partition_offset((topic, partition))
+                )
                continue

            except NotLeaderForPartitionError:
                logger.warning("NotLeaderForPartitionError for %s - %d. "
                               "Metadata may be out of date",
-                               resp.topic, resp.partition)
+                               topic, partition)
                self._refresh_metadata_on_error()
                continue

            except RequestTimedOutError:
                logger.warning("RequestTimedOutError for %s - %d",
-                               resp.topic, resp.partition)
+                               topic, partition)
                continue

            # Track server highwater mark
-            self._offsets.highwater[topic_partition] = resp.highwaterMark
+            self._offsets.highwater[(topic, partition)] = resp.highwaterMark

            # Yield each message
            # Kafka-python could raise an exception during iteration
            # we are not catching -- user will need to address
            for (offset, message) in resp.messages:
                # deserializer_class could raise an exception here
-                msg = KafkaMessage(resp.topic,
-                                   resp.partition,
-                                   offset, message.key,
-                                   self._config['deserializer_class'](message.value))
+                val = self._config['deserializer_class'](message.value)
+                msg = KafkaMessage(topic, partition, offset, message.key, val)

-                # Only increment fetch offset if we safely got the message and deserialized
-                self._offsets.fetch[topic_partition] = offset + 1
+                # in some cases the server will return earlier messages
+                # than we requested. skip them per kafka spec
+                if offset < self._offsets.fetch[(topic, partition)]:
+                    logger.debug('message offset less than fetched offset '
+                                 'skipping: %s', msg)
+                    continue
+                # Only increment fetch offset
+                # if we safely got the message and deserialized
+                self._offsets.fetch[(topic, partition)] = offset + 1

                # Then yield to user
                yield msg

    def get_partition_offsets(self, topic, partition, request_time_ms, max_num_offsets):
-        """
-        Request available fetch offsets for a single topic/partition
+        """Request available fetch offsets for a single topic/partition

-        Arguments:
-            topic (str)
-            partition (int)
+        Keyword Arguments:
+            topic (str): topic for offset request
+            partition (int): partition for offset request
            request_time_ms (int): Used to ask for all messages before a
-                certain time (ms). There are two special values. Specify -1 to receive the latest
-                offset (i.e. the offset of the next coming message) and -2 to receive the earliest
-                available offset. Note that because offsets are pulled in descending order, asking for
-                the earliest offset will always return you a single element.
-            max_num_offsets (int)
+                certain time (ms). There are two special values.
+                Specify -1 to receive the latest offset (i.e. the offset of the
+                next coming message) and -2 to receive the earliest available
+                offset. Note that because offsets are pulled in descending
+                order, asking for the earliest offset will always return you a
+                single element.
+            max_num_offsets (int): Maximum offsets to include in the OffsetResponse

        Returns:
-            offsets (list)
+            a list of offsets in the OffsetResponse submitted for the provided
+            topic / partition. See:
+            https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI
        """
        reqs = [OffsetRequest(topic, partition, request_time_ms, max_num_offsets)]

@@ -464,7 +439,8 @@ class KafkaConsumer(object):
        return resp.offsets

    def offsets(self, group=None):
-        """
+        """Get internal consumer offset values
+
        Keyword Arguments:
            group: Either "fetch", "commit", "task_done", or "highwater".
                If no group specified, returns all groups.
@@ -483,12 +459,25 @@ class KafkaConsumer(object):
            return dict(deepcopy(getattr(self._offsets, group)))

    def task_done(self, message):
-        """
-        Mark a fetched message as consumed.
+        """Mark a fetched message as consumed.
+
        Offsets for messages marked as "task_done" will be stored back
        to the kafka cluster for this consumer group on commit()
+
+        Arguments:
+            message (KafkaMessage): the message to mark as complete
+
+        Returns:
+            True, unless the topic-partition for this message has not
+            been configured for the consumer. In normal operation, this
+            should not happen. But see github issue 364.
        """
        topic_partition = (message.topic, message.partition)
+        if topic_partition not in self._topics:
+            logger.warning('Unrecognized topic/partition in task_done message: '
+                           '{0}:{1}'.format(*topic_partition))
+            return False
+
        offset = message.offset

        # Warn on non-contiguous offsets
@@ -513,17 +502,25 @@ class KafkaConsumer(object):
        if self._should_auto_commit():
            self.commit()

+        return True
+
    def commit(self):
-        """
-        Store consumed message offsets (marked via task_done())
+        """Store consumed message offsets (marked via task_done())
        to kafka cluster for this consumer_group.

-        **Note**: this functionality requires server version >=0.8.1.1
-        See `this wiki page <https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI>`_.
+        Returns:
+            True on success, or False if no offsets were found for commit
+
+        Note:
+            this functionality requires server version >=0.8.1.1
+            https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
        """
        if not self._config['group_id']:
            logger.warning('Cannot commit without a group_id!')
-            raise KafkaConfigurationError('Attempted to commit offsets without a configured consumer group (group_id)')
+            raise KafkaConfigurationError(
+                'Attempted to commit offsets '
+                'without a configured consumer group (group_id)'
+            )

        # API supports storing metadata with each commit
        # but for now it is unused
@@ -547,13 +544,17 @@ class KafkaConsumer(object):
            if commit_offset == self._offsets.commit[topic_partition]:
                continue

-            commits.append(OffsetCommitRequest(topic_partition[0], topic_partition[1], commit_offset, metadata))
+            commits.append(
+                OffsetCommitRequest(topic_partition[0], topic_partition[1],
+                                    commit_offset, metadata)
+            )

        if commits:
            logger.info('committing consumer offsets to group %s', self._config['group_id'])
-            resps = self._client.send_offset_commit_request(self._config['group_id'],
-                                                            commits,
-                                                            fail_on_error=False)
+            resps = self._client.send_offset_commit_request(
+                kafka_bytestring(self._config['group_id']), commits,
+                fail_on_error=False
+            )

            for r in resps:
                check_error(r)
@@ -615,7 +616,7 @@ class KafkaConsumer(object):
        logger.info("Consumer fetching stored offsets")
        for topic_partition in self._topics:
            (resp,) = self._client.send_offset_fetch_request(
-                self._config['group_id'],
+                kafka_bytestring(self._config['group_id']),
                [OffsetFetchRequest(topic_partition[0], topic_partition[1])],
                fail_on_error=False)
            try:
@@ -664,7 +665,7 @@ class KafkaConsumer(object):
            # Otherwise we should re-raise the upstream exception
            # b/c it typically includes additional data about
            # the request that triggered it, and we do not want to drop that
-            raise
+            raise # pylint: disable-msg=E0704

        (offset, ) = self.get_partition_offsets(topic, partition,
                                                request_time_ms, max_num_offsets=1)
@@ -750,6 +751,22 @@ class KafkaConsumer(object):
    #

    def __repr__(self):
-        return '<KafkaConsumer topics=(%s)>' % ', '.join(["%s-%d" % topic_partition
-                                                          for topic_partition in
-                                                          self._topics])
+        return '<{0} topics=({1})>'.format(
+            self.__class__.__name__,
+            '|'.join(["%s-%d" % topic_partition
+                      for topic_partition in self._topics])
+        )
+
+    #
+    # other private methods
+    #
+
+    def _deprecate_configs(self, **configs):
+        for old, new in six.iteritems(DEPRECATED_CONFIG_KEYS):
+            if old in configs:
+                logger.warning('Deprecated Kafka Consumer configuration: %s. '
+                               'Please use %s instead.', old, new)
+                old_value = configs.pop(old)
+                if new not in configs:
+                    configs[new] = old_value
+        return configs
--- a/kafka/consumer/multiprocess.py
+++ b/kafka/consumer/multiprocess.py
@@ -1,24 +1,31 @@
 from __future__ import absolute_import

+from collections import namedtuple
 import logging
-import time
-from multiprocessing import Process, Queue as MPQueue, Event, Value
-
+from multiprocessing import Process, Manager as MPManager
 try:
-    from Queue import Empty
-except ImportError:  # python 2
-    from queue import Empty
+    import queue # python 3
+except ImportError:
+    import Queue as queue # python 2
+import time

+from ..common import KafkaError
 from .base import (
+    Consumer,
    AUTO_COMMIT_MSG_COUNT, AUTO_COMMIT_INTERVAL,
-    NO_MESSAGES_WAIT_TIME_SECONDS
+    NO_MESSAGES_WAIT_TIME_SECONDS,
+    FULL_QUEUE_WAIT_TIME_SECONDS,
+    MAX_BACKOFF_SECONDS,
 )
-from .simple import Consumer, SimpleConsumer
-
-log = logging.getLogger("kafka")
+from .simple import SimpleConsumer


-def _mp_consume(client, group, topic, chunk, queue, start, exit, pause, size):
+log = logging.getLogger(__name__)
+
+Events = namedtuple("Events", ["start", "pause", "exit"])
+
+
+def _mp_consume(client, group, topic, queue, size, events, **consumer_options):
    """
    A child process worker which consumes messages based on the
    notifications given by the controller process
@@ -28,51 +35,67 @@ def _mp_consume(client, group, topic, chunk, queue, start, exit, pause, size):
    functionality breaks unless this function is kept outside of a class
    """

-    # Make the child processes open separate socket connections
-    client.reinit()
+    # Initial interval for retries in seconds.
+    interval = 1
+    while not events.exit.is_set():
+        try:
+            # Make the child processes open separate socket connections
+            client.reinit()

-    # We will start consumers without auto-commit. Auto-commit will be
-    # done by the master controller process.
-    consumer = SimpleConsumer(client, group, topic,
-                              partitions=chunk,
-                              auto_commit=False,
-                              auto_commit_every_n=None,
-                              auto_commit_every_t=None)
+            # We will start consumers without auto-commit. Auto-commit will be
+            # done by the master controller process.
+            consumer = SimpleConsumer(client, group, topic,
+                                      auto_commit=False,
+                                      auto_commit_every_n=None,
+                                      auto_commit_every_t=None,
+                                      **consumer_options)

-    # Ensure that the consumer provides the partition information
-    consumer.provide_partition_info()
+            # Ensure that the consumer provides the partition information
+            consumer.provide_partition_info()

-    while True:
-        # Wait till the controller indicates us to start consumption
-        start.wait()
+            while True:
+                # Wait till the controller indicates us to start consumption
+                events.start.wait()

-        # If we are asked to quit, do so
-        if exit.is_set():
-            break
+                # If we are asked to quit, do so
+                if events.exit.is_set():
+                    break

-        # Consume messages and add them to the queue. If the controller
-        # indicates a specific number of messages, follow that advice
-        count = 0
+                # Consume messages and add them to the queue. If the controller
+                # indicates a specific number of messages, follow that advice
+                count = 0

-        message = consumer.get_message()
-        if message:
-            queue.put(message)
-            count += 1
+                message = consumer.get_message()
+                if message:
+                    while True:
+                        try:
+                            queue.put(message, timeout=FULL_QUEUE_WAIT_TIME_SECONDS)
+                            break
+                        except queue.Full:
+                            if events.exit.is_set(): break

-            # We have reached the required size. The controller might have
-            # more than what he needs. Wait for a while.
-            # Without this logic, it is possible that we run into a big
-            # loop consuming all available messages before the controller
-            # can reset the 'start' event
-            if count == size.value:
-                pause.wait()
+                    count += 1

-        else:
-            # In case we did not receive any message, give up the CPU for
-            # a while before we try again
-            time.sleep(NO_MESSAGES_WAIT_TIME_SECONDS)
+                    # We have reached the required size. The controller might have
+                    # more than what he needs. Wait for a while.
+                    # Without this logic, it is possible that we run into a big
+                    # loop consuming all available messages before the controller
+                    # can reset the 'start' event
+                    if count == size.value:
+                        events.pause.wait()

-    consumer.stop()
+                else:
+                    # In case we did not receive any message, give up the CPU for
+                    # a while before we try again
+                    time.sleep(NO_MESSAGES_WAIT_TIME_SECONDS)
+
+            consumer.stop()
+
+        except KafkaError as e:
+            # Retry with exponential backoff
+            log.error("Problem communicating with Kafka (%s), retrying in %d seconds..." % (e, interval))
+            time.sleep(interval)
+            interval = interval*2 if interval*2 < MAX_BACKOFF_SECONDS else MAX_BACKOFF_SECONDS


 class MultiProcessConsumer(Consumer):
@@ -83,9 +106,12 @@ class MultiProcessConsumer(Consumer):
    Arguments:
        client: a connected KafkaClient
        group: a name for this consumer, used for offset storage and must be unique
+            If you are connecting to a server that does not support offset
+            commit/fetch (any prior to 0.8.1.1), then you *must* set this to None
        topic: the topic to consume

    Keyword Arguments:
+        partitions: An optional list of partitions to consume the data from
        auto_commit: default True. Whether or not to auto commit the offsets
        auto_commit_every_n: default 100. How many messages to consume
            before a commit
@@ -102,51 +128,61 @@ class MultiProcessConsumer(Consumer):
    commit method on this class. A manual call to commit will also reset
    these triggers
    """
-    def __init__(self, client, group, topic, auto_commit=True,
+    def __init__(self, client, group, topic,
+                 partitions=None,
+                 auto_commit=True,
                 auto_commit_every_n=AUTO_COMMIT_MSG_COUNT,
                 auto_commit_every_t=AUTO_COMMIT_INTERVAL,
-                 num_procs=1, partitions_per_proc=0):
+                 num_procs=1,
+                 partitions_per_proc=0,
+                 **simple_consumer_options):

        # Initiate the base consumer class
        super(MultiProcessConsumer, self).__init__(
            client, group, topic,
-            partitions=None,
+            partitions=partitions,
            auto_commit=auto_commit,
            auto_commit_every_n=auto_commit_every_n,
            auto_commit_every_t=auto_commit_every_t)

        # Variables for managing and controlling the data flow from
        # consumer child process to master
-        self.queue = MPQueue(1024)  # Child consumers dump messages into this
-        self.start = Event()        # Indicates the consumers to start fetch
-        self.exit = Event()         # Requests the consumers to shutdown
-        self.pause = Event()        # Requests the consumers to pause fetch
-        self.size = Value('i', 0)   # Indicator of number of messages to fetch
+        manager = MPManager()
+        self.queue = manager.Queue(1024)  # Child consumers dump messages into this
+        self.events = Events(
+            start = manager.Event(),        # Indicates the consumers to start fetch
+            exit  = manager.Event(),        # Requests the consumers to shutdown
+            pause = manager.Event())        # Requests the consumers to pause fetch
+        self.size = manager.Value('i', 0)   # Indicator of number of messages to fetch

-        partitions = self.offsets.keys()
+        # dict.keys() returns a view in py3 + it's not a thread-safe operation
+        # http://blog.labix.org/2008/06/27/watch-out-for-listdictkeys-in-python-3
+        # It's safer to copy dict as it only runs during the init.
+        partitions = list(self.offsets.copy().keys())

-        # If unspecified, start one consumer per partition
+        # By default, start one consumer process for all partitions
        # The logic below ensures that
        # * we do not cross the num_procs limit
        # * we have an even distribution of partitions among processes
-        if not partitions_per_proc:
-            partitions_per_proc = round(len(partitions) * 1.0 / num_procs)
-            if partitions_per_proc < num_procs * 0.5:
-                partitions_per_proc += 1
+
+        if partitions_per_proc:
+            num_procs = len(partitions) / partitions_per_proc
+            if num_procs * partitions_per_proc < len(partitions):
+                num_procs += 1

        # The final set of chunks
-        chunker = lambda *x: [] + list(x)
-        chunks = map(chunker, *[iter(partitions)] * int(partitions_per_proc))
+        chunks = [partitions[proc::num_procs] for proc in range(num_procs)]

        self.procs = []
        for chunk in chunks:
-            chunk = filter(lambda x: x is not None, chunk)
-            args = (client.copy(),
-                    group, topic, list(chunk),
-                    self.queue, self.start, self.exit,
-                    self.pause, self.size)
+            options = {'partitions': list(chunk)}
+            if simple_consumer_options:
+                simple_consumer_options.pop('partitions', None)
+                options.update(simple_consumer_options)

-            proc = Process(target=_mp_consume, args=args)
+            args = (client.copy(), self.group, self.topic, self.queue,
+                    self.size, self.events)
+            proc = Process(target=_mp_consume, args=args, kwargs=options)
            proc.daemon = True
            proc.start()
            self.procs.append(proc)
@@ -157,9 +193,9 @@ class MultiProcessConsumer(Consumer):

    def stop(self):
        # Set exit and start off all waiting consumers
-        self.exit.set()
-        self.pause.set()
-        self.start.set()
+        self.events.exit.set()
+        self.events.pause.set()
+        self.events.start.set()

        for proc in self.procs:
            proc.join()
@@ -174,27 +210,27 @@ class MultiProcessConsumer(Consumer):
        # Trigger the consumer procs to start off.
        # We will iterate till there are no more messages available
        self.size.value = 0
-        self.pause.set()
+        self.events.pause.set()

        while True:
-            self.start.set()
+            self.events.start.set()
            try:
                # We will block for a small while so that the consumers get
                # a chance to run and put some messages in the queue
                # TODO: This is a hack and will make the consumer block for
                # at least one second. Need to find a better way of doing this
                partition, message = self.queue.get(block=True, timeout=1)
-            except Empty:
+            except queue.Empty:
                break

            # Count, check and commit messages if necessary
            self.offsets[partition] = message.offset + 1
-            self.start.clear()
+            self.events.start.clear()
            self.count_since_commit += 1
            self._auto_commit()
            yield message

-        self.start.clear()
+        self.events.start.clear()

    def get_messages(self, count=1, block=True, timeout=10):
        """
@@ -202,10 +238,12 @@ class MultiProcessConsumer(Consumer):

        Keyword Arguments:
            count: Indicates the maximum number of messages to be fetched
-            block: If True, the API will block till some messages are fetched.
-            timeout: If block is True, the function will block for the specified
-                time (in seconds) until count messages is fetched. If None,
-                it will block forever.
+            block: If True, the API will block till all messages are fetched.
+                If block is a positive integer the API will block until that
+                many messages are fetched.
+            timeout: When blocking is requested the function will block for
+                the specified time (in seconds) until count messages is
+                fetched. If None, it will block forever.
        """
        messages = []

@@ -214,7 +252,7 @@ class MultiProcessConsumer(Consumer):
        # necessary, but these will not be committed to kafka. Also, the extra
        # messages can be provided in subsequent runs
        self.size.value = count
-        self.pause.clear()
+        self.events.pause.clear()

        if timeout is not None:
            max_time = time.time() + timeout
@@ -226,22 +264,25 @@ class MultiProcessConsumer(Consumer):
            # go into overdrive and keep consuming thousands of
            # messages when the user might need only a few
            if self.queue.empty():
-                self.start.set()
+                self.events.start.set()

+            block_next_call = block is True or block > len(messages)
            try:
-                partition, message = self.queue.get(block, timeout)
-            except Empty:
+                partition, message = self.queue.get(block_next_call,
+                                                    timeout)
+            except queue.Empty:
                break

-            messages.append(message)
+            _msg = (partition, message) if self.partition_info else message
+            messages.append(_msg)
            new_offsets[partition] = message.offset + 1
            count -= 1
            if timeout is not None:
                timeout = max_time - time.time()

        self.size.value = 0
-        self.start.clear()
-        self.pause.set()
+        self.events.start.clear()
+        self.events.pause.set()

        # Update and commit offsets if necessary
        self.offsets.update(new_offsets)
--- a/kafka/consumer/simple.py
+++ b/kafka/consumer/simple.py
@@ -2,22 +2,18 @@ from __future__ import absolute_import

 try:
    from itertools import zip_longest as izip_longest, repeat  # pylint: disable-msg=E0611
-except ImportError:  # python 2
-    from itertools import izip_longest as izip_longest, repeat
+except ImportError:
+    from itertools import izip_longest as izip_longest, repeat # python 2
 import logging
+try:
+    import queue # python 3
+except ImportError:
+    import Queue as queue # python 2
+import sys
 import time

 import six

-try:
-    from Queue import Empty, Queue
-except ImportError:  # python 2
-    from queue import Empty, Queue
-
-from kafka.common import (
-    FetchRequest, OffsetRequest,
-    ConsumerFetchSizeTooSmall, ConsumerNoMoreData
-)
 from .base import (
    Consumer,
    FETCH_DEFAULT_BLOCK_TIMEOUT,
@@ -30,8 +26,16 @@ from .base import (
    ITER_TIMEOUT_SECONDS,
    NO_MESSAGES_WAIT_TIME_SECONDS
 )
+from ..common import (
+    FetchRequest, KafkaError, OffsetRequest,
+    ConsumerFetchSizeTooSmall, ConsumerNoMoreData,
+    UnknownTopicOrPartitionError, NotLeaderForPartitionError,
+    OffsetOutOfRangeError, FailedPayloadsError, check_error
+)
+
+
+log = logging.getLogger(__name__)

-log = logging.getLogger("kafka")

 class FetchContext(object):
    """
@@ -70,6 +74,8 @@ class SimpleConsumer(Consumer):
    Arguments:
        client: a connected KafkaClient
        group: a name for this consumer, used for offset storage and must be unique
+            If you are connecting to a server that does not support offset
+            commit/fetch (any prior to 0.8.1.1), then you *must* set this to None
        topic: the topic to consume

    Keyword Arguments:
@@ -94,6 +100,10 @@ class SimpleConsumer(Consumer):
             message in the iterator before exiting. None means no
             timeout, so it will wait forever.

+        auto_offset_reset: default largest. Reset partition offsets upon
+             OffsetOutOfRangeError. Valid values are largest and smallest.
+             Otherwise, do not reset the offsets and raise OffsetOutOfRangeError.
+
    Auto commit details:
    If both auto_commit_every_n and auto_commit_every_t are set, they will
    reset one another when one is triggered. These triggers simply call the
@@ -106,7 +116,8 @@ class SimpleConsumer(Consumer):
                 fetch_size_bytes=FETCH_MIN_BYTES,
                 buffer_size=FETCH_BUFFER_SIZE_BYTES,
                 max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES,
-                 iter_timeout=None):
+                 iter_timeout=None,
+                 auto_offset_reset='largest'):
        super(SimpleConsumer, self).__init__(
            client, group, topic,
            partitions=partitions,
@@ -115,55 +126,117 @@ class SimpleConsumer(Consumer):
            auto_commit_every_t=auto_commit_every_t)

        if max_buffer_size is not None and buffer_size > max_buffer_size:
-            raise ValueError("buffer_size (%d) is greater than "
-                             "max_buffer_size (%d)" %
+            raise ValueError('buffer_size (%d) is greater than '
+                             'max_buffer_size (%d)' %
                             (buffer_size, max_buffer_size))
        self.buffer_size = buffer_size
        self.max_buffer_size = max_buffer_size
-        self.partition_info = False     # Do not return partition info in msgs
        self.fetch_max_wait_time = FETCH_MAX_WAIT_TIME
        self.fetch_min_bytes = fetch_size_bytes
        self.fetch_offsets = self.offsets.copy()
        self.iter_timeout = iter_timeout
-        self.queue = Queue()
+        self.auto_offset_reset = auto_offset_reset
+        self.queue = queue.Queue()

    def __repr__(self):
        return '<SimpleConsumer group=%s, topic=%s, partitions=%s>' % \
            (self.group, self.topic, str(self.offsets.keys()))

-    def provide_partition_info(self):
-        """
-        Indicates that partition info must be returned by the consumer
-        """
-        self.partition_info = True
+    def reset_partition_offset(self, partition):
+        """Update offsets using auto_offset_reset policy (smallest|largest)

-    def seek(self, offset, whence):
+        Arguments:
+            partition (int): the partition for which offsets should be updated
+
+        Returns: Updated offset on success, None on failure
+        """
+        LATEST = -1
+        EARLIEST = -2
+        if self.auto_offset_reset == 'largest':
+            reqs = [OffsetRequest(self.topic, partition, LATEST, 1)]
+        elif self.auto_offset_reset == 'smallest':
+            reqs = [OffsetRequest(self.topic, partition, EARLIEST, 1)]
+        else:
+            # Let's raise an reasonable exception type if user calls
+            # outside of an exception context
+            if sys.exc_info() == (None, None, None):
+                raise OffsetOutOfRangeError('Cannot reset partition offsets without a '
+                                            'valid auto_offset_reset setting '
+                                            '(largest|smallest)')
+            # Otherwise we should re-raise the upstream exception
+            # b/c it typically includes additional data about
+            # the request that triggered it, and we do not want to drop that
+            raise # pylint: disable-msg=E0704
+
+        # send_offset_request
+        log.info('Resetting topic-partition offset to %s for %s:%d',
+                 self.auto_offset_reset, self.topic, partition)
+        try:
+            (resp, ) = self.client.send_offset_request(reqs)
+        except KafkaError as e:
+            log.error('%s sending offset request for %s:%d',
+                      e.__class__.__name__, self.topic, partition)
+        else:
+            self.offsets[partition] = resp.offsets[0]
+            self.fetch_offsets[partition] = resp.offsets[0]
+            return resp.offsets[0]
+
+    def seek(self, offset, whence=None, partition=None):
        """
        Alter the current offset in the consumer, similar to fseek

        Arguments:
            offset: how much to modify the offset
-            whence: where to modify it from
+            whence: where to modify it from, default is None

-                * 0 is relative to the earliest available offset (head)
-                * 1 is relative to the current offset
-                * 2 is relative to the latest known offset (tail)
+                * None is an absolute offset
+                * 0    is relative to the earliest available offset (head)
+                * 1    is relative to the current offset
+                * 2    is relative to the latest known offset (tail)
+
+            partition: modify which partition, default is None.
+                If partition is None, would modify all partitions.
        """

-        if whence == 1:  # relative to current position
-            for partition, _offset in self.offsets.items():
-                self.offsets[partition] = _offset + offset
+        if whence is None: # set an absolute offset
+            if partition is None:
+                for tmp_partition in self.offsets:
+                    self.offsets[tmp_partition] = offset
+            else:
+                self.offsets[partition] = offset
+        elif whence == 1:  # relative to current position
+            if partition is None:
+                for tmp_partition, _offset in self.offsets.items():
+                    self.offsets[tmp_partition] = _offset + offset
+            else:
+                self.offsets[partition] += offset
        elif whence in (0, 2):  # relative to beginning or end
-            # divide the request offset by number of partitions,
-            # distribute the remained evenly
-            (delta, rem) = divmod(offset, len(self.offsets))
-            deltas = {}
-            for partition, r in izip_longest(self.offsets.keys(),
-                                             repeat(1, rem), fillvalue=0):
-                deltas[partition] = delta + r
-
            reqs = []
-            for partition in self.offsets.keys():
+            deltas = {}
+            if partition is None:
+                # divide the request offset by number of partitions,
+                # distribute the remained evenly
+                (delta, rem) = divmod(offset, len(self.offsets))
+                for tmp_partition, r in izip_longest(self.offsets.keys(),
+                                                     repeat(1, rem),
+                                                     fillvalue=0):
+                    deltas[tmp_partition] = delta + r
+
+                for tmp_partition in self.offsets.keys():
+                    if whence == 0:
+                        reqs.append(OffsetRequest(self.topic,
+                                                  tmp_partition,
+                                                  -2,
+                                                  1))
+                    elif whence == 2:
+                        reqs.append(OffsetRequest(self.topic,
+                                                  tmp_partition,
+                                                  -1,
+                                                  1))
+                    else:
+                        pass
+            else:
+                deltas[partition] = offset
                if whence == 0:
                    reqs.append(OffsetRequest(self.topic, partition, -2, 1))
                elif whence == 2:
@@ -176,15 +249,15 @@ class SimpleConsumer(Consumer):
                self.offsets[resp.partition] = \
                    resp.offsets[0] + deltas[resp.partition]
        else:
-            raise ValueError("Unexpected value for `whence`, %d" % whence)
+            raise ValueError('Unexpected value for `whence`, %d' % whence)

        # Reset queue and fetch offsets since they are invalid
        self.fetch_offsets = self.offsets.copy()
+        self.count_since_commit += 1
        if self.auto_commit:
-            self.count_since_commit += 1
            self.commit()

-        self.queue = Queue()
+        self.queue = queue.Queue()

    def get_messages(self, count=1, block=True, timeout=0.1):
        """
@@ -192,42 +265,42 @@ class SimpleConsumer(Consumer):

        Keyword Arguments:
            count: Indicates the maximum number of messages to be fetched
-            block: If True, the API will block till some messages are fetched.
-            timeout: If block is True, the function will block for the specified
-                time (in seconds) until count messages is fetched. If None,
-                it will block forever.
+            block: If True, the API will block till all messages are fetched.
+                If block is a positive integer the API will block until that
+                many messages are fetched.
+            timeout: When blocking is requested the function will block for
+                the specified time (in seconds) until count messages is
+                fetched. If None, it will block forever.
        """
        messages = []
        if timeout is not None:
-            max_time = time.time() + timeout
+            timeout += time.time()

        new_offsets = {}
-        while count > 0 and (timeout is None or timeout > 0):
-            result = self._get_message(block, timeout, get_partition_info=True,
+        log.debug('getting %d messages', count)
+        while len(messages) < count:
+            block_time = timeout - time.time()
+            log.debug('calling _get_message block=%s timeout=%s', block, block_time)
+            block_next_call = block is True or block > len(messages)
+            result = self._get_message(block_next_call, block_time,
+                                       get_partition_info=True,
                                       update_offset=False)
-            if result:
-                partition, message = result
-                if self.partition_info:
-                    messages.append(result)
-                else:
-                    messages.append(message)
-                new_offsets[partition] = message.offset + 1
-                count -= 1
-            else:
-                # Ran out of messages for the last request.
-                if not block:
-                    # If we're not blocking, break.
-                    break
+            log.debug('got %s from _get_messages', result)
+            if not result:
+                if block_next_call and (timeout is None or time.time() <= timeout):
+                    continue
+                break

-            # If we have a timeout, reduce it to the
-            # appropriate value
-            if timeout is not None:
-                timeout = max_time - time.time()
+            partition, message = result
+            _msg = (partition, message) if self.partition_info else message
+            messages.append(_msg)
+            new_offsets[partition] = message.offset + 1

        # Update and commit offsets if necessary
        self.offsets.update(new_offsets)
        self.count_since_commit += len(messages)
        self._auto_commit()
+        log.debug('got %d messages: %s', len(messages), messages)
        return messages

    def get_message(self, block=True, timeout=0.1, get_partition_info=None):
@@ -241,10 +314,16 @@ class SimpleConsumer(Consumer):
        If get_partition_info is True, returns (partition, message)
        If get_partition_info is False, returns message
        """
-        if self.queue.empty():
+        start_at = time.time()
+        while self.queue.empty():
            # We're out of messages, go grab some more.
+            log.debug('internal queue empty, fetching more messages')
            with FetchContext(self, block, timeout):
                self._fetch()
+
+            if not block or time.time() > (start_at + timeout):
+                break
+
        try:
            partition, message = self.queue.get_nowait()

@@ -262,7 +341,8 @@ class SimpleConsumer(Consumer):
                return partition, message
            else:
                return message
-        except Empty:
+        except queue.Empty:
+            log.debug('internal queue empty after fetch - returning None')
            return None

    def __iter__(self):
@@ -297,21 +377,55 @@ class SimpleConsumer(Consumer):
            responses = self.client.send_fetch_request(
                requests,
                max_wait_time=int(self.fetch_max_wait_time),
-                min_bytes=self.fetch_min_bytes)
+                min_bytes=self.fetch_min_bytes,
+                fail_on_error=False
+            )

            retry_partitions = {}
            for resp in responses:
+
+                try:
+                    check_error(resp)
+                except UnknownTopicOrPartitionError:
+                    log.error('UnknownTopicOrPartitionError for %s:%d',
+                              resp.topic, resp.partition)
+                    self.client.reset_topic_metadata(resp.topic)
+                    raise
+                except NotLeaderForPartitionError:
+                    log.error('NotLeaderForPartitionError for %s:%d',
+                              resp.topic, resp.partition)
+                    self.client.reset_topic_metadata(resp.topic)
+                    continue
+                except OffsetOutOfRangeError:
+                    log.warning('OffsetOutOfRangeError for %s:%d. '
+                                'Resetting partition offset...',
+                                resp.topic, resp.partition)
+                    self.reset_partition_offset(resp.partition)
+                    # Retry this partition
+                    retry_partitions[resp.partition] = partitions[resp.partition]
+                    continue
+                except FailedPayloadsError as e:
+                    log.warning('FailedPayloadsError for %s:%d',
+                                e.payload.topic, e.payload.partition)
+                    # Retry this partition
+                    retry_partitions[e.payload.partition] = partitions[e.payload.partition]
+                    continue
+
                partition = resp.partition
                buffer_size = partitions[partition]
                try:
                    for message in resp.messages:
+                        if message.offset < self.fetch_offsets[partition]:
+                            log.debug('Skipping message %s because its offset is less than the consumer offset',
+                                      message)
+                            continue
                        # Put the message in our queue
                        self.queue.put((partition, message))
                        self.fetch_offsets[partition] = message.offset + 1
                except ConsumerFetchSizeTooSmall:
                    if (self.max_buffer_size is not None and
                            buffer_size == self.max_buffer_size):
-                        log.error("Max fetch size %d too small",
+                        log.error('Max fetch size %d too small',
                                  self.max_buffer_size)
                        raise
                    if self.max_buffer_size is None:
@@ -319,12 +433,12 @@ class SimpleConsumer(Consumer):
                    else:
                        buffer_size = min(buffer_size * 2,
                                          self.max_buffer_size)
-                    log.warn("Fetch size too small, increase to %d (2x) "
-                             "and retry", buffer_size)
+                    log.warning('Fetch size too small, increase to %d (2x) '
+                                'and retry', buffer_size)
                    retry_partitions[partition] = buffer_size
                except ConsumerNoMoreData as e:
-                    log.debug("Iteration was ended by %r", e)
+                    log.debug('Iteration was ended by %r', e)
                except StopIteration:
                    # Stop iterating through this partition
-                    log.debug("Done iterating over partition %s" % partition)
+                    log.debug('Done iterating over partition %s', partition)
            partitions = retry_partitions
--- a/kafka/partitioner/init.py
+++ b/kafka/partitioner/init.py
@@ -1,6 +1,7 @@
 from .roundrobin import RoundRobinPartitioner
-from .hashed import HashedPartitioner
+from .hashed import HashedPartitioner, Murmur2Partitioner, LegacyPartitioner

 __all__ = [
-    'RoundRobinPartitioner', 'HashedPartitioner'
+    'RoundRobinPartitioner', 'HashedPartitioner', 'Murmur2Partitioner',
+    'LegacyPartitioner'
 ]
--- a/kafka/partitioner/base.py
+++ b/kafka/partitioner/base.py
@@ -12,14 +12,13 @@ class Partitioner(object):
        """
        self.partitions = partitions

-    def partition(self, key, partitions):
+    def partition(self, key, partitions=None):
        """
        Takes a string key and num_partitions as argument and returns
        a partition to be used for the message

        Arguments:
-            partitions: The list of partitions is passed in every call. This
-                may look like an overhead, but it will be useful
-                (in future) when we handle cases like rebalancing
+            key: the key to use for partitioning
+            partitions: (optional) a list of partitions.
        """
        raise NotImplementedError('partition function has to be implemented')
--- a/kafka/partitioner/hashed.py
+++ b/kafka/partitioner/hashed.py
@@ -1,12 +1,110 @@
+import six
+
 from .base import Partitioner

-class HashedPartitioner(Partitioner):
+
+class Murmur2Partitioner(Partitioner):
    """
+    Implements a partitioner which selects the target partition based on
+    the hash of the key. Attempts to apply the same hashing
+    function as mainline java client.
+    """
+    def partition(self, key, partitions=None):
+        if not partitions:
+            partitions = self.partitions
+
+        # https://github.com/apache/kafka/blob/0.8.2/clients/src/main/java/org/apache/kafka/clients/producer/internals/Partitioner.java#L69
+        idx = (murmur2(key) & 0x7fffffff) % len(partitions)
+
+        return partitions[idx]
+
+
+class LegacyPartitioner(Partitioner):
+    """DEPRECATED -- See Issue 374
+
    Implements a partitioner which selects the target partition based on
    the hash of the key
    """
-    def partition(self, key, partitions):
+    def partition(self, key, partitions=None):
+        if not partitions:
+            partitions = self.partitions
        size = len(partitions)
        idx = hash(key) % size

        return partitions[idx]
+
+
+# Default will change to Murmur2 in 0.10 release
+HashedPartitioner = LegacyPartitioner
+
+
+# https://github.com/apache/kafka/blob/0.8.2/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L244
+def murmur2(key):
+    """Pure-python Murmur2 implementation.
+
+    Based on java client, see org.apache.kafka.common.utils.Utils.murmur2
+
+    Args:
+        key: if not a bytes type, encoded using default encoding
+
+    Returns: MurmurHash2 of key bytearray
+    """
+
+    # Convert key to bytes or bytearray
+    if isinstance(key, bytearray) or (six.PY3 and isinstance(key, bytes)):
+        data = key
+    else:
+        data = bytearray(str(key).encode())
+
+    length = len(data)
+    seed = 0x9747b28c
+    # 'm' and 'r' are mixing constants generated offline.
+    # They're not really 'magic', they just happen to work well.
+    m = 0x5bd1e995
+    r = 24
+
+    # Initialize the hash to a random value
+    h = seed ^ length
+    length4 = length // 4
+
+    for i in range(length4):
+        i4 = i * 4
+        k = ((data[i4 + 0] & 0xff) +
+            ((data[i4 + 1] & 0xff) << 8) +
+            ((data[i4 + 2] & 0xff) << 16) +
+            ((data[i4 + 3] & 0xff) << 24))
+        k &= 0xffffffff
+        k *= m
+        k &= 0xffffffff
+        k ^= (k % 0x100000000) >> r # k ^= k >>> r
+        k &= 0xffffffff
+        k *= m
+        k &= 0xffffffff
+
+        h *= m
+        h &= 0xffffffff
+        h ^= k
+        h &= 0xffffffff
+
+    # Handle the last few bytes of the input array
+    extra_bytes = length % 4
+    if extra_bytes >= 3:
+        h ^= (data[(length & ~3) + 2] & 0xff) << 16
+        h &= 0xffffffff
+    if extra_bytes >= 2:
+        h ^= (data[(length & ~3) + 1] & 0xff) << 8
+        h &= 0xffffffff
+    if extra_bytes >= 1:
+        h ^= (data[length & ~3] & 0xff)
+        h &= 0xffffffff
+        h *= m
+        h &= 0xffffffff
+
+    h ^= (h % 0x100000000) >> 13 # h >>> 13;
+    h &= 0xffffffff
+    h *= m
+    h &= 0xffffffff
+    h ^= (h % 0x100000000) >> 15 # h >>> 15;
+    h &= 0xffffffff
+
+    return h
--- a/kafka/partitioner/roundrobin.py
+++ b/kafka/partitioner/roundrobin.py
@@ -15,9 +15,9 @@ class RoundRobinPartitioner(Partitioner):
        self.partitions = partitions
        self.iterpart = cycle(partitions)

-    def partition(self, key, partitions):
+    def partition(self, key, partitions=None):
        # Refresh the partition list if necessary
-        if self.partitions != partitions:
+        if partitions and self.partitions != partitions:
            self._set_partitions(partitions)

        return next(self.iterpart)
--- a/kafka/producer/base.py
+++ b/kafka/producer/base.py
@@ -1,84 +1,223 @@
 from __future__ import absolute_import

+import atexit
 import logging
 import time

 try:
-    from queue import Empty
+    from queue import Empty, Full, Queue
 except ImportError:
-    from Queue import Empty
+    from Queue import Empty, Full, Queue
 from collections import defaultdict
-from multiprocessing import Queue, Process
+
+from threading import Thread, Event

 import six

 from kafka.common import (
-    ProduceRequest, TopicAndPartition, UnsupportedCodecError
+    ProduceRequest, ProduceResponse, TopicAndPartition, RetryOptions,
+    kafka_errors, UnsupportedCodecError, FailedPayloadsError,
+    RequestTimedOutError, AsyncProducerQueueFull, UnknownError,
+    RETRY_ERROR_TYPES, RETRY_BACKOFF_ERROR_TYPES, RETRY_REFRESH_ERROR_TYPES
 )
-from kafka.protocol import CODEC_NONE, ALL_CODECS, create_message_set

-log = logging.getLogger("kafka")
+from kafka.protocol import CODEC_NONE, ALL_CODECS, create_message_set
+from kafka.util import kafka_bytestring
+
+log = logging.getLogger('kafka.producer')

 BATCH_SEND_DEFAULT_INTERVAL = 20
 BATCH_SEND_MSG_COUNT = 20

+# unlimited
+ASYNC_QUEUE_MAXSIZE = 0
+ASYNC_QUEUE_PUT_TIMEOUT = 0
+# unlimited retries by default
+ASYNC_RETRY_LIMIT = None
+ASYNC_RETRY_BACKOFF_MS = 100
+ASYNC_RETRY_ON_TIMEOUTS = True
+ASYNC_LOG_MESSAGES_ON_ERROR = True
+
 STOP_ASYNC_PRODUCER = -1
+ASYNC_STOP_TIMEOUT_SECS = 30
+
+SYNC_FAIL_ON_ERROR_DEFAULT = True


 def _send_upstream(queue, client, codec, batch_time, batch_size,
-                   req_acks, ack_timeout):
-    """
-    Listen on the queue for a specified number of messages or till
-    a specified timeout and send them upstream to the brokers in one
-    request
+                   req_acks, ack_timeout, retry_options, stop_event,
+                   log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR,
+                   stop_timeout=ASYNC_STOP_TIMEOUT_SECS,
+                   codec_compresslevel=None):
+    """Private method to manage producing messages asynchronously

-    NOTE: Ideally, this should have been a method inside the Producer
-    class. However, multiprocessing module has issues in windows. The
-    functionality breaks unless this function is kept outside of a class
-    """
-    stop = False
-    client.reinit()
+    Listens on the queue for a specified number of messages or until
+    a specified timeout and then sends messages to the brokers in grouped
+    requests (one per broker).
+
+    Messages placed on the queue should be tuples that conform to this format:
+        ((topic, partition), message, key)
+
+    Currently does not mark messages with task_done. Do not attempt to join()!
+
+    Arguments:
+        queue (threading.Queue): the queue from which to get messages
+        client (KafkaClient): instance to use for communicating with brokers
+        codec (kafka.protocol.ALL_CODECS): compression codec to use
+        batch_time (int): interval in seconds to send message batches
+        batch_size (int): count of messages that will trigger an immediate send
+        req_acks: required acks to use with ProduceRequests. see server protocol
+        ack_timeout: timeout to wait for required acks. see server protocol
+        retry_options (RetryOptions): settings for retry limits, backoff etc
+        stop_event (threading.Event): event to monitor for shutdown signal.
+            when this event is 'set', the producer will stop sending messages.
+        log_messages_on_error (bool, optional): log stringified message-contents
+            on any produce error, otherwise only log a hash() of the contents,
+            defaults to True.
+        stop_timeout (int or float, optional): number of seconds to continue
+            retrying messages after stop_event is set, defaults to 30.
+    """
+    request_tries = {}
+
+    while not stop_event.is_set():
+        try:
+            client.reinit()
+        except Exception as e:
+            log.warn('Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms)
+            time.sleep(float(retry_options.backoff_ms) / 1000)
+        else:
+            break
+
+    stop_at = None
+    while not (stop_event.is_set() and queue.empty() and not request_tries):
+
+        # Handle stop_timeout
+        if stop_event.is_set():
+            if not stop_at:
+                stop_at = stop_timeout + time.time()
+            if time.time() > stop_at:
+                log.debug('Async producer stopping due to stop_timeout')
+                break

-    while not stop:
        timeout = batch_time
        count = batch_size
        send_at = time.time() + timeout
        msgset = defaultdict(list)

+        # Merging messages will require a bit more work to manage correctly
+        # for now, dont look for new batches if we have old ones to retry
+        if request_tries:
+            count = 0
+            log.debug('Skipping new batch collection to handle retries')
+        else:
+            log.debug('Batching size: %s, timeout: %s', count, timeout)
+
        # Keep fetching till we gather enough messages or a
        # timeout is reached
        while count > 0 and timeout >= 0:
            try:
                topic_partition, msg, key = queue.get(timeout=timeout)
-
            except Empty:
                break

            # Check if the controller has requested us to stop
            if topic_partition == STOP_ASYNC_PRODUCER:
-                stop = True
+                stop_event.set()
                break

            # Adjust the timeout to match the remaining period
            count -= 1
            timeout = send_at - time.time()
-            msgset[topic_partition].append(msg)
+            msgset[topic_partition].append((msg, key))

        # Send collected requests upstream
-        reqs = []
        for topic_partition, msg in msgset.items():
-            messages = create_message_set(msg, codec, key)
+            messages = create_message_set(msg, codec, key, codec_compresslevel)
            req = ProduceRequest(topic_partition.topic,
                                 topic_partition.partition,
-                                 messages)
-            reqs.append(req)
+                                 tuple(messages))
+            request_tries[req] = 0

-        try:
-            client.send_produce_request(reqs,
-                                        acks=req_acks,
-                                        timeout=ack_timeout)
-        except Exception:
-            log.exception("Unable to send message")
+        if not request_tries:
+            continue
+
+        reqs_to_retry, error_cls = [], None
+        retry_state = {
+            'do_backoff': False,
+            'do_refresh': False
+        }
+
+        def _handle_error(error_cls, request):
+            if issubclass(error_cls, RETRY_ERROR_TYPES) or (retry_options.retry_on_timeouts and issubclass(error_cls, RequestTimedOutError)):
+                reqs_to_retry.append(request)
+            if issubclass(error_cls, RETRY_BACKOFF_ERROR_TYPES):
+                retry_state['do_backoff'] |= True
+            if issubclass(error_cls, RETRY_REFRESH_ERROR_TYPES):
+                retry_state['do_refresh'] |= True
+
+        requests = list(request_tries.keys())
+        log.debug('Sending: %s', requests)
+        responses = client.send_produce_request(requests,
+                                                acks=req_acks,
+                                                timeout=ack_timeout,
+                                                fail_on_error=False)
+
+        log.debug('Received: %s', responses)
+        for i, response in enumerate(responses):
+            error_cls = None
+            if isinstance(response, FailedPayloadsError):
+                error_cls = response.__class__
+                orig_req = response.payload
+
+            elif isinstance(response, ProduceResponse) and response.error:
+                error_cls = kafka_errors.get(response.error, UnknownError)
+                orig_req = requests[i]
+
+            if error_cls:
+                _handle_error(error_cls, orig_req)
+                log.error('%s sending ProduceRequest (#%d of %d) '
+                          'to %s:%d with msgs %s',
+                          error_cls.__name__, (i + 1), len(requests),
+                          orig_req.topic, orig_req.partition,
+                          orig_req.messages if log_messages_on_error
+                                            else hash(orig_req.messages))
+
+        if not reqs_to_retry:
+            request_tries = {}
+            continue
+
+        # doing backoff before next retry
+        if retry_state['do_backoff'] and retry_options.backoff_ms:
+            log.warn('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms)
+            time.sleep(float(retry_options.backoff_ms) / 1000)
+
+        # refresh topic metadata before next retry
+        if retry_state['do_refresh']:
+            log.warn('Async producer forcing metadata refresh metadata before retrying')
+            try:
+                client.load_metadata_for_topics()
+            except Exception as e:
+                log.error("Async producer couldn't reload topic metadata. Error: `%s`", e.message)
+
+        # Apply retry limit, dropping messages that are over
+        request_tries = dict(
+            (key, count + 1)
+            for (key, count) in request_tries.items()
+                if key in reqs_to_retry
+                    and (retry_options.limit is None
+                    or (count < retry_options.limit))
+        )
+
+        # Log messages we are going to retry
+        for orig_req in request_tries.keys():
+            log.info('Retrying ProduceRequest to %s:%d with msgs %s',
+                     orig_req.topic, orig_req.partition,
+                     orig_req.messages if log_messages_on_error
+                                       else hash(orig_req.messages))
+
+    if request_tries or not queue.empty():
+        log.error('Stopped producer with {0} unsent messages'
+                  .format(len(request_tries) + queue.qsize()))


 class Producer(object):
@@ -86,47 +225,84 @@ class Producer(object):
    Base class to be used by producers

    Arguments:
-        client: The Kafka client instance to use
-        async: If set to true, the messages are sent asynchronously via another
-            thread (process). We will not wait for a response to these
-            WARNING!!! current implementation of async producer does not
-            guarantee message delivery.  Use at your own risk! Or help us
-            improve with a PR!
-        req_acks: A value indicating the acknowledgements that the server must
-            receive before responding to the request
-        ack_timeout: Value (in milliseconds) indicating a timeout for waiting
-            for an acknowledgement
-        batch_send: If True, messages are send in batches
-        batch_send_every_n: If set, messages are send in batches of this size
-        batch_send_every_t: If set, messages are send after this timeout
-    """
+        client (KafkaClient): instance to use for broker communications.
+            If async=True, the background thread will use client.copy(),
+            which is expected to return a thread-safe object.
+        codec (kafka.protocol.ALL_CODECS): compression codec to use.
+        req_acks (int, optional): A value indicating the acknowledgements that
+            the server must receive before responding to the request,
+            defaults to 1 (local ack).
+        ack_timeout (int, optional): millisecond timeout to wait for the
+            configured req_acks, defaults to 1000.
+        sync_fail_on_error (bool, optional): whether sync producer should
+            raise exceptions (True), or just return errors (False),
+            defaults to True.
+        async (bool, optional): send message using a background thread,
+            defaults to False.
+        batch_send_every_n (int, optional): If async is True, messages are
+            sent in batches of this size, defaults to 20.
+        batch_send_every_t (int or float, optional): If async is True,
+            messages are sent immediately after this timeout in seconds, even
+            if there are fewer than batch_send_every_n, defaults to 20.
+        async_retry_limit (int, optional): number of retries for failed messages
+            or None for unlimited, defaults to None / unlimited.
+        async_retry_backoff_ms (int, optional): milliseconds to backoff on
+            failed messages, defaults to 100.
+        async_retry_on_timeouts (bool, optional): whether to retry on
+            RequestTimeoutError, defaults to True.
+        async_queue_maxsize (int, optional): limit to the size of the
+            internal message queue in number of messages (not size), defaults
+            to 0 (no limit).
+        async_queue_put_timeout (int or float, optional): timeout seconds
+            for queue.put in send_messages for async producers -- will only
+            apply if async_queue_maxsize > 0 and the queue is Full,
+            defaults to 0 (fail immediately on full queue).
+        async_log_messages_on_error (bool, optional): set to False and the
+            async producer will only log hash() contents on failed produce
+            requests, defaults to True (log full messages). Hash logging
+            will not allow you to identify the specific message that failed,
+            but it will allow you to match failures with retries.
+        async_stop_timeout (int or float, optional): seconds to continue
+            attempting to send queued messages after producer.stop(),
+            defaults to 30.

+    Deprecated Arguments:
+        batch_send (bool, optional): If True, messages are sent by a background
+            thread in batches, defaults to False. Deprecated, use 'async'
+    """
    ACK_NOT_REQUIRED = 0            # No ack is required
    ACK_AFTER_LOCAL_WRITE = 1       # Send response after it is written to log
    ACK_AFTER_CLUSTER_COMMIT = -1   # Send response after data is committed
-
    DEFAULT_ACK_TIMEOUT = 1000

-    def __init__(self, client, async=False,
+    def __init__(self, client,
                 req_acks=ACK_AFTER_LOCAL_WRITE,
                 ack_timeout=DEFAULT_ACK_TIMEOUT,
                 codec=None,
-                 batch_send=False,
+                 codec_compresslevel=None,
+                 sync_fail_on_error=SYNC_FAIL_ON_ERROR_DEFAULT,
+                 async=False,
+                 batch_send=False, # deprecated, use async
                 batch_send_every_n=BATCH_SEND_MSG_COUNT,
-                 batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
+                 batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL,
+                 async_retry_limit=ASYNC_RETRY_LIMIT,
+                 async_retry_backoff_ms=ASYNC_RETRY_BACKOFF_MS,
+                 async_retry_on_timeouts=ASYNC_RETRY_ON_TIMEOUTS,
+                 async_queue_maxsize=ASYNC_QUEUE_MAXSIZE,
+                 async_queue_put_timeout=ASYNC_QUEUE_PUT_TIMEOUT,
+                 async_log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR,
+                 async_stop_timeout=ASYNC_STOP_TIMEOUT_SECS):

-        if batch_send:
-            async = True
+        if async:
            assert batch_send_every_n > 0
            assert batch_send_every_t > 0
-        else:
-            batch_send_every_n = 1
-            batch_send_every_t = 3600
+            assert async_queue_maxsize >= 0

        self.client = client
        self.async = async
        self.req_acks = req_acks
        self.ack_timeout = ack_timeout
+        self.stopped = False

        if codec is None:
            codec = CODEC_NONE
@@ -134,24 +310,39 @@ class Producer(object):
            raise UnsupportedCodecError("Codec 0x%02x unsupported" % codec)

        self.codec = codec
+        self.codec_compresslevel = codec_compresslevel

        if self.async:
-            log.warning("async producer does not guarantee message delivery!")
-            log.warning("Current implementation does not retry Failed messages")
-            log.warning("Use at your own risk! (or help improve with a PR!)")
-            self.queue = Queue()  # Messages are sent through this queue
-            self.proc = Process(target=_send_upstream,
-                                args=(self.queue,
-                                      self.client.copy(),
-                                      self.codec,
-                                      batch_send_every_t,
-                                      batch_send_every_n,
-                                      self.req_acks,
-                                      self.ack_timeout))
+            # Messages are sent through this queue
+            self.queue = Queue(async_queue_maxsize)
+            self.async_queue_put_timeout = async_queue_put_timeout
+            async_retry_options = RetryOptions(
+                limit=async_retry_limit,
+                backoff_ms=async_retry_backoff_ms,
+                retry_on_timeouts=async_retry_on_timeouts)
+            self.thread_stop_event = Event()
+            self.thread = Thread(
+                target=_send_upstream,
+                args=(self.queue, self.client.copy(), self.codec,
+                      batch_send_every_t, batch_send_every_n,
+                      self.req_acks, self.ack_timeout,
+                      async_retry_options, self.thread_stop_event),
+                kwargs={'log_messages_on_error': async_log_messages_on_error,
+                        'stop_timeout': async_stop_timeout,
+                        'codec_compresslevel': self.codec_compresslevel}
+            )

-            # Process will die if main thread exits
-            self.proc.daemon = True
-            self.proc.start()
+            # Thread will die if main thread exits
+            self.thread.daemon = True
+            self.thread.start()
+
+            def cleanup(obj):
+                if not obj.stopped:
+                    obj.stop()
+            self._cleanup_func = cleanup
+            atexit.register(cleanup, self)
+        else:
+            self.sync_fail_on_error = sync_fail_on_error

    def send_messages(self, topic, partition, *msg):
        """
@@ -169,6 +360,7 @@ class Producer(object):

        All messages produced via this method will set the message 'key' to Null
        """
+        topic = kafka_bytestring(topic)
        return self._send_messages(topic, partition, *msg)

    def _send_messages(self, topic, partition, *msg, **kwargs):
@@ -178,37 +370,93 @@ class Producer(object):
        if not isinstance(msg, (list, tuple)):
            raise TypeError("msg is not a list or tuple!")

-        # Raise TypeError if any message is not encoded as bytes
-        if any(not isinstance(m, six.binary_type) for m in msg):
-            raise TypeError("all produce message payloads must be type bytes")
+        for m in msg:
+            # The protocol allows to have key & payload with null values both,
+            # (https://goo.gl/o694yN) but having (null,null) pair doesn't make sense.
+            if m is None:
+                if key is None:
+                    raise TypeError("key and payload can't be null in one")
+            # Raise TypeError if any non-null message is not encoded as bytes
+            elif not isinstance(m, six.binary_type):
+                raise TypeError("all produce message payloads must be null or type bytes")
+
+        # Raise TypeError if topic is not encoded as bytes
+        if not isinstance(topic, six.binary_type):
+            raise TypeError("the topic must be type bytes")

        # Raise TypeError if the key is not encoded as bytes
        if key is not None and not isinstance(key, six.binary_type):
            raise TypeError("the key must be type bytes")

        if self.async:
-            for m in msg:
-                self.queue.put((TopicAndPartition(topic, partition), m, key))
+            for idx, m in enumerate(msg):
+                try:
+                    item = (TopicAndPartition(topic, partition), m, key)
+                    if self.async_queue_put_timeout == 0:
+                        self.queue.put_nowait(item)
+                    else:
+                        self.queue.put(item, True, self.async_queue_put_timeout)
+                except Full:
+                    raise AsyncProducerQueueFull(
+                        msg[idx:],
+                        'Producer async queue overfilled. '
+                        'Current queue size %d.' % self.queue.qsize())
            resp = []
        else:
-            messages = create_message_set(msg, self.codec, key)
+            messages = create_message_set([(m, key) for m in msg], self.codec, key, self.codec_compresslevel)
            req = ProduceRequest(topic, partition, messages)
            try:
-                resp = self.client.send_produce_request([req], acks=self.req_acks,
-                                                        timeout=self.ack_timeout)
+                resp = self.client.send_produce_request(
+                    [req], acks=self.req_acks, timeout=self.ack_timeout,
+                    fail_on_error=self.sync_fail_on_error
+                )
            except Exception:
                log.exception("Unable to send messages")
                raise
        return resp

-    def stop(self, timeout=1):
+    def stop(self, timeout=None):
        """
-        Stop the producer. Optionally wait for the specified timeout before
-        forcefully cleaning up.
+        Stop the producer (async mode). Blocks until async thread completes.
        """
+        if timeout is not None:
+            log.warning('timeout argument to stop() is deprecated - '
+                        'it will be removed in future release')
+
+        if not self.async:
+            log.warning('producer.stop() called, but producer is not async')
+            return
+
+        if self.stopped:
+            log.warning('producer.stop() called, but producer is already stopped')
+            return
+
        if self.async:
            self.queue.put((STOP_ASYNC_PRODUCER, None, None))
-            self.proc.join(timeout)
+            self.thread_stop_event.set()
+            self.thread.join()

-            if self.proc.is_alive():
-                self.proc.terminate()
+        if hasattr(self, '_cleanup_func'):
+            # Remove cleanup handler now that we've stopped
+
+            # py3 supports unregistering
+            if hasattr(atexit, 'unregister'):
+                atexit.unregister(self._cleanup_func) # pylint: disable=no-member
+
+            # py2 requires removing from private attribute...
+            else:
+
+                # ValueError on list.remove() if the exithandler no longer exists
+                # but that is fine here
+                try:
+                    atexit._exithandlers.remove((self._cleanup_func, (self,), {}))
+                except ValueError:
+                    pass
+
+            del self._cleanup_func
+
+        self.stopped = True
+
+    def __del__(self):
+        if not self.stopped:
+            self.stop()
--- a/kafka/producer/keyed.py
+++ b/kafka/producer/keyed.py
@@ -1,50 +1,31 @@
 from __future__ import absolute_import

 import logging
+import warnings

-from kafka.partitioner import HashedPartitioner
-from .base import (
-    Producer, BATCH_SEND_DEFAULT_INTERVAL,
-    BATCH_SEND_MSG_COUNT
-)
+from .base import Producer
+from ..partitioner import HashedPartitioner
+from ..util import kafka_bytestring

-log = logging.getLogger("kafka")
+
+log = logging.getLogger(__name__)


 class KeyedProducer(Producer):
    """
    A producer which distributes messages to partitions based on the key

-    Arguments:
-        client: The kafka client instance
+    See Producer class for Arguments

-    Keyword Arguments:
+    Additional Arguments:
        partitioner: A partitioner class that will be used to get the partition
-            to send the message to. Must be derived from Partitioner
-        async: If True, the messages are sent asynchronously via another
-            thread (process). We will not wait for a response to these
-        ack_timeout: Value (in milliseconds) indicating a timeout for waiting
-            for an acknowledgement
-        batch_send: If True, messages are send in batches
-        batch_send_every_n: If set, messages are send in batches of this size
-        batch_send_every_t: If set, messages are send after this timeout
+            to send the message to. Must be derived from Partitioner.
+            Defaults to HashedPartitioner.
    """
-    def __init__(self, client, partitioner=None, async=False,
-                 req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
-                 ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
-                 codec=None,
-                 batch_send=False,
-                 batch_send_every_n=BATCH_SEND_MSG_COUNT,
-                 batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
-        if not partitioner:
-            partitioner = HashedPartitioner
-        self.partitioner_class = partitioner
+    def __init__(self, *args, **kwargs):
+        self.partitioner_class = kwargs.pop('partitioner', HashedPartitioner)
        self.partitioners = {}
-
-        super(KeyedProducer, self).__init__(client, async, req_acks,
-                                            ack_timeout, codec, batch_send,
-                                            batch_send_every_n,
-                                            batch_send_every_t)
+        super(KeyedProducer, self).__init__(*args, **kwargs)

    def _next_partition(self, topic, key):
        if topic not in self.partitioners:
@@ -54,15 +35,17 @@ class KeyedProducer(Producer):
            self.partitioners[topic] = self.partitioner_class(self.client.get_partition_ids_for_topic(topic))

        partitioner = self.partitioners[topic]
-        return partitioner.partition(key, self.client.get_partition_ids_for_topic(topic))
+        return partitioner.partition(key)

-    def send_messages(self,topic,key,*msg):
+    def send_messages(self, topic, key, *msg):
+        topic = kafka_bytestring(topic)
        partition = self._next_partition(topic, key)
-        return self._send_messages(topic, partition, *msg,key=key)
+        return self._send_messages(topic, partition, *msg, key=key)

+    # DEPRECATED
    def send(self, topic, key, msg):
-        partition = self._next_partition(topic, key)
-        return self._send_messages(topic, partition, msg, key=key)
+        warnings.warn("KeyedProducer.send is deprecated in favor of send_messages", DeprecationWarning)
+        return self.send_messages(topic, key, msg)

    def __repr__(self):
        return '<KeyedProducer batch=%s>' % self.async
--- a/kafka/producer/simple.py
+++ b/kafka/producer/simple.py
@@ -1,57 +1,34 @@
 from __future__ import absolute_import

+from itertools import cycle
 import logging
 import random
 import six

-from itertools import cycle
-
 from six.moves import xrange

-from .base import (
-    Producer, BATCH_SEND_DEFAULT_INTERVAL,
-    BATCH_SEND_MSG_COUNT
-)
+from .base import Producer

-log = logging.getLogger("kafka")
+
+log = logging.getLogger(__name__)


 class SimpleProducer(Producer):
-    """
-    A simple, round-robin producer. Each message goes to exactly one partition
+    """A simple, round-robin producer.

-    Arguments:
-        client: The Kafka client instance to use
+    See Producer class for Base Arguments

-    Keyword Arguments:
-        async: If True, the messages are sent asynchronously via another
-            thread (process). We will not wait for a response to these
-        req_acks: A value indicating the acknowledgements that the server must
-            receive before responding to the request
-        ack_timeout: Value (in milliseconds) indicating a timeout for waiting
-            for an acknowledgement
-        batch_send: If True, messages are send in batches
-        batch_send_every_n: If set, messages are send in batches of this size
-        batch_send_every_t: If set, messages are send after this timeout
-        random_start: If true, randomize the initial partition which the
+    Additional Arguments:
+        random_start (bool, optional): randomize the initial partition which
            the first message block will be published to, otherwise
            if false, the first message block will always publish
-            to partition 0 before cycling through each partition
+            to partition 0 before cycling through each partition,
+            defaults to True.
    """
-    def __init__(self, client, async=False,
-                 req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
-                 ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
-                 codec=None,
-                 batch_send=False,
-                 batch_send_every_n=BATCH_SEND_MSG_COUNT,
-                 batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL,
-                 random_start=True):
+    def __init__(self, *args, **kwargs):
        self.partition_cycles = {}
-        self.random_start = random_start
-        super(SimpleProducer, self).__init__(client, async, req_acks,
-                                             ack_timeout, codec, batch_send,
-                                             batch_send_every_n,
-                                             batch_send_every_t)
+        self.random_start = kwargs.pop('random_start', True)
+        super(SimpleProducer, self).__init__(*args, **kwargs)

    def _next_partition(self, topic):
        if topic not in self.partition_cycles:
--- a/kafka/protocol.py
+++ b/kafka/protocol.py
@@ -14,14 +14,16 @@ from kafka.common import (
    MetadataResponse, ProduceResponse, FetchResponse,
    OffsetResponse, OffsetCommitResponse, OffsetFetchResponse,
    ProtocolError, BufferUnderflowError, ChecksumError,
-    ConsumerFetchSizeTooSmall, UnsupportedCodecError
+    ConsumerFetchSizeTooSmall, UnsupportedCodecError,
+    ConsumerMetadataResponse
 )
 from kafka.util import (
    crc32, read_short_string, read_int_string, relative_unpack,
    write_short_string, write_int_string, group_by_topic_and_partition
 )

-log = logging.getLogger("kafka")
+
+log = logging.getLogger(__name__)

 ATTRIBUTE_CODEC_MASK = 0x03
 CODEC_NONE = 0x00
@@ -42,19 +44,21 @@ class KafkaProtocol(object):
    METADATA_KEY = 3
    OFFSET_COMMIT_KEY = 8
    OFFSET_FETCH_KEY = 9
+    CONSUMER_METADATA_KEY = 10

    ###################
    #   Private API   #
    ###################

    @classmethod
-    def _encode_message_header(cls, client_id, correlation_id, request_key):
+    def _encode_message_header(cls, client_id, correlation_id, request_key,
+                               version=0):
        """
        Encode the common request envelope
        """
        return struct.pack('>hhih%ds' % len(client_id),
                           request_key,          # ApiKey
-                           0,                    # ApiVersion
+                           version,              # ApiVersion
                           correlation_id,       # CorrelationId
                           len(client_id),       # ClientId size
                           client_id)            # ClientId
@@ -231,12 +235,12 @@ class KafkaProtocol(object):
        """
        ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)

-        for i in range(num_topics):
+        for _ in range(num_topics):
            ((strlen,), cur) = relative_unpack('>h', data, cur)
            topic = data[cur:cur + strlen]
            cur += strlen
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
-            for i in range(num_partitions):
+            for _ in range(num_partitions):
                ((partition, error, offset), cur) = relative_unpack('>ihq',
                                                                    data, cur)

@@ -288,11 +292,11 @@ class KafkaProtocol(object):
        """
        ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)

-        for i in range(num_topics):
+        for _ in range(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)

-            for i in range(num_partitions):
+            for j in range(num_partitions):
                ((partition, error, highwater_mark_offset), cur) = \
                    relative_unpack('>ihq', data, cur)

@@ -336,16 +340,16 @@ class KafkaProtocol(object):
        """
        ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)

-        for i in range(num_topics):
+        for _ in range(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)

-            for i in range(num_partitions):
+            for _ in range(num_partitions):
                ((partition, error, num_offsets,), cur) = \
                    relative_unpack('>ihi', data, cur)

                offsets = []
-                for j in range(num_offsets):
+                for k in range(num_offsets):
                    ((offset,), cur) = relative_unpack('>q', data, cur)
                    offsets.append(offset)

@@ -391,7 +395,7 @@ class KafkaProtocol(object):

        # Broker info
        brokers = []
-        for i in range(numbrokers):
+        for _ in range(numbrokers):
            ((nodeId, ), cur) = relative_unpack('>i', data, cur)
            (host, cur) = read_short_string(data, cur)
            ((port,), cur) = relative_unpack('>i', data, cur)
@@ -401,13 +405,13 @@ class KafkaProtocol(object):
        ((num_topics,), cur) = relative_unpack('>i', data, cur)
        topic_metadata = []

-        for i in range(num_topics):
+        for _ in range(num_topics):
            ((topic_error,), cur) = relative_unpack('>h', data, cur)
            (topic_name, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)
            partition_metadata = []

-            for j in range(num_partitions):
+            for _ in range(num_partitions):
                ((partition_error_code, partition, leader, numReplicas), cur) = \
                    relative_unpack('>hiii', data, cur)

@@ -428,6 +432,38 @@ class KafkaProtocol(object):

        return MetadataResponse(brokers, topic_metadata)

+    @classmethod
+    def encode_consumer_metadata_request(cls, client_id, correlation_id, payloads):
+        """
+        Encode a ConsumerMetadataRequest
+
+        Arguments:
+            client_id: string
+            correlation_id: int
+            payloads: string (consumer group)
+        """
+        message = []
+        message.append(cls._encode_message_header(client_id, correlation_id,
+                                                  KafkaProtocol.CONSUMER_METADATA_KEY))
+        message.append(struct.pack('>h%ds' % len(payloads), len(payloads), payloads))
+
+        msg = b''.join(message)
+        return write_int_string(msg)
+
+    @classmethod
+    def decode_consumer_metadata_response(cls, data):
+        """
+        Decode bytes to a ConsumerMetadataResponse
+
+        Arguments:
+            data: bytes to decode
+        """
+        ((correlation_id, error, nodeId), cur) = relative_unpack('>ihi', data, 0)
+        (host, cur) = read_short_string(data, cur)
+        ((port,), cur) = relative_unpack('>i', data, cur)
+
+        return ConsumerMetadataResponse(error, nodeId, host, port)
+
    @classmethod
    def encode_offset_commit_request(cls, client_id, correlation_id,
                                     group, payloads):
@@ -470,31 +506,37 @@ class KafkaProtocol(object):
        ((correlation_id,), cur) = relative_unpack('>i', data, 0)
        ((num_topics,), cur) = relative_unpack('>i', data, cur)

-        for i in xrange(num_topics):
+        for _ in xrange(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)

-            for i in xrange(num_partitions):
+            for _ in xrange(num_partitions):
                ((partition, error), cur) = relative_unpack('>ih', data, cur)
                yield OffsetCommitResponse(topic, partition, error)

    @classmethod
    def encode_offset_fetch_request(cls, client_id, correlation_id,
-                                    group, payloads):
+                                    group, payloads, from_kafka=False):
        """
-        Encode some OffsetFetchRequest structs
+        Encode some OffsetFetchRequest structs. The request is encoded using
+        version 0 if from_kafka is false, indicating a request for Zookeeper
+        offsets. It is encoded using version 1 otherwise, indicating a request
+        for Kafka offsets.

        Arguments:
            client_id: string
            correlation_id: int
            group: string, the consumer group you are fetching offsets for
            payloads: list of OffsetFetchRequest
+            from_kafka: bool, default False, set True for Kafka-committed offsets
        """
        grouped_payloads = group_by_topic_and_partition(payloads)

        message = []
+        reqver = 1 if from_kafka else 0
        message.append(cls._encode_message_header(client_id, correlation_id,
-                                                  KafkaProtocol.OFFSET_FETCH_KEY))
+                                                  KafkaProtocol.OFFSET_FETCH_KEY,
+                                                  version=reqver))

        message.append(write_short_string(group))
        message.append(struct.pack('>i', len(grouped_payloads)))
@@ -521,11 +563,11 @@ class KafkaProtocol(object):
        ((correlation_id,), cur) = relative_unpack('>i', data, 0)
        ((num_topics,), cur) = relative_unpack('>i', data, cur)

-        for i in range(num_topics):
+        for _ in range(num_topics):
            (topic, cur) = read_short_string(data, cur)
            ((num_partitions,), cur) = relative_unpack('>i', data, cur)

-            for i in range(num_partitions):
+            for _ in range(num_partitions):
                ((partition, offset), cur) = relative_unpack('>iq', data, cur)
                (metadata, cur) = read_short_string(data, cur)
                ((error,), cur) = relative_unpack('>h', data, cur)
@@ -546,7 +588,7 @@ def create_message(payload, key=None):
    return Message(0, 0, key, payload)


-def create_gzip_message(payloads, key=None):
+def create_gzip_message(payloads, key=None, compresslevel=None):
    """
    Construct a Gzipped Message containing multiple Messages

@@ -559,9 +601,9 @@ def create_gzip_message(payloads, key=None):

    """
    message_set = KafkaProtocol._encode_message_set(
-        [create_message(payload, key) for payload in payloads])
+        [create_message(payload, pl_key) for payload, pl_key in payloads])

-    gzipped = gzip_encode(message_set)
+    gzipped = gzip_encode(message_set, compresslevel=compresslevel)
    codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP

    return Message(0, 0x00 | codec, key, gzipped)
@@ -580,7 +622,7 @@ def create_snappy_message(payloads, key=None):

    """
    message_set = KafkaProtocol._encode_message_set(
-        [create_message(payload, key) for payload in payloads])
+        [create_message(payload, pl_key) for payload, pl_key in payloads])

    snapped = snappy_encode(message_set)
    codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY
@@ -588,16 +630,16 @@ def create_snappy_message(payloads, key=None):
    return Message(0, 0x00 | codec, key, snapped)


-def create_message_set(messages, codec=CODEC_NONE, key=None):
+def create_message_set(messages, codec=CODEC_NONE, key=None, compresslevel=None):
    """Create a message set using the given codec.

    If codec is CODEC_NONE, return a list of raw Kafka messages. Otherwise,
    return a list containing a single codec-encoded message.
    """
    if codec == CODEC_NONE:
-        return [create_message(m, key) for m in messages]
+        return [create_message(m, k) for m, k in messages]
    elif codec == CODEC_GZIP:
-        return [create_gzip_message(messages, key)]
+        return [create_gzip_message(messages, key, compresslevel)]
    elif codec == CODEC_SNAPPY:
        return [create_snappy_message(messages, key)]
    else:
--- a/kafka/queue.py
+++ b/kafka/queue.py
@@ -1,215 +0,0 @@
-from __future__ import absolute_import
-
-from copy import copy
-import logging
-from multiprocessing import Process, Queue, Event
-from Queue import Empty
-import time
-
-from kafka.client import KafkaClient, FetchRequest, ProduceRequest
-
-log = logging.getLogger("kafka")
-
-raise NotImplementedError("Still need to refactor this class")
-
-
-class KafkaConsumerProcess(Process):
-    def __init__(self, client, topic, partition, out_queue, barrier,
-                 consumer_fetch_size=1024, consumer_sleep=200):
-        self.client = copy(client)
-        self.topic = topic
-        self.partition = partition
-        self.out_queue = out_queue
-        self.barrier = barrier
-        self.consumer_fetch_size = consumer_fetch_size
-        self.consumer_sleep = consumer_sleep / 1000.
-        log.info("Initializing %s" % self)
-        Process.__init__(self)
-
-    def __str__(self):
-        return "[KafkaConsumerProcess: topic=%s, \
-            partition=%s, sleep=%s]" % \
-            (self.topic, self.partition, self.consumer_sleep)
-
-    def run(self):
-        self.barrier.wait()
-        log.info("Starting %s" % self)
-        fetchRequest = FetchRequest(self.topic, self.partition,
-                                    offset=0, size=self.consumer_fetch_size)
-
-        while True:
-            if self.barrier.is_set() is False:
-                log.info("Shutdown %s" % self)
-                self.client.close()
-                break
-
-            lastOffset = fetchRequest.offset
-            (messages, fetchRequest) = self.client.get_message_set(fetchRequest)
-
-            if fetchRequest.offset == lastOffset:
-                log.debug("No more data for this partition, "
-                          "sleeping a bit (200ms)")
-                time.sleep(self.consumer_sleep)
-                continue
-
-            for message in messages:
-                self.out_queue.put(message)
-
-
-class KafkaProducerProcess(Process):
-    def __init__(self, client, topic, in_queue, barrier,
-                 producer_flush_buffer=500,
-                 producer_flush_timeout=2000,
-                 producer_timeout=100):
-
-        self.client = copy(client)
-        self.topic = topic
-        self.in_queue = in_queue
-        self.barrier = barrier
-        self.producer_flush_buffer = producer_flush_buffer
-        self.producer_flush_timeout = producer_flush_timeout / 1000.
-        self.producer_timeout = producer_timeout / 1000.
-        log.info("Initializing %s" % self)
-        Process.__init__(self)
-
-    def __str__(self):
-        return "[KafkaProducerProcess: topic=%s, \
-            flush_buffer=%s, flush_timeout=%s, timeout=%s]" % \
-            (self.topic,
-                self.producer_flush_buffer,
-                self.producer_flush_timeout,
-                self.producer_timeout)
-
-    def run(self):
-        self.barrier.wait()
-        log.info("Starting %s" % self)
-        messages = []
-        last_produce = time.time()
-
-        def flush(messages):
-            self.client.send_message_set(ProduceRequest(self.topic, -1,
-                                                        messages))
-            del messages[:]
-
-        while True:
-            if self.barrier.is_set() is False:
-                log.info("Shutdown %s, flushing messages" % self)
-                flush(messages)
-                self.client.close()
-                break
-
-            if len(messages) > self.producer_flush_buffer:
-                log.debug("Message count threshold reached. Flushing messages")
-                flush(messages)
-                last_produce = time.time()
-
-            elif (time.time() - last_produce) > self.producer_flush_timeout:
-                log.debug("Producer timeout reached. Flushing messages")
-                flush(messages)
-                last_produce = time.time()
-
-            try:
-                msg = KafkaClient.create_message(
-                    self.in_queue.get(True, self.producer_timeout))
-                messages.append(msg)
-
-            except Empty:
-                continue
-
-
-class KafkaQueue(object):
-    def __init__(self, client, topic, partitions,
-                 producer_config=None, consumer_config=None):
-        """
-        KafkaQueue a Queue-like object backed by a Kafka producer and some
-        number of consumers
-
-        Messages are eagerly loaded by the consumer in batches of size
-        consumer_fetch_size.
-        Messages are buffered in the producer thread until
-        producer_flush_timeout or producer_flush_buffer is reached.
-
-        Arguments:
-            client: KafkaClient object
-            topic: str, the topic name
-            partitions: list of ints, the partions to consume from
-            producer_config: dict, see below
-            consumer_config: dict, see below
-
-        Consumer Config
-        ===============
-        consumer_fetch_size: int, number of bytes to fetch in one call
-                             to Kafka. Default is 1024
-        consumer_sleep: int, time in milliseconds a consumer should sleep
-                        when it reaches the end of a partition. Default is 200
-
-        Producer Config
-        ===============
-        producer_timeout: int, time in milliseconds a producer should
-                          wait for messages to enqueue for producing.
-                          Default is 100
-        producer_flush_timeout: int, time in milliseconds a producer
-                                should allow messages to accumulate before
-                                sending to Kafka. Default is 2000
-        producer_flush_buffer: int, number of messages a producer should
-                               allow to accumulate. Default is 500
-
-        """
-        producer_config = {} if producer_config is None else producer_config
-        consumer_config = {} if consumer_config is None else consumer_config
-
-        self.in_queue = Queue()
-        self.out_queue = Queue()
-        self.consumers = []
-        self.barrier = Event()
-
-        # Initialize and start consumer threads
-        for partition in partitions:
-            consumer = KafkaConsumerProcess(client, topic, partition,
-                                            self.in_queue, self.barrier,
-                                            **consumer_config)
-            consumer.start()
-            self.consumers.append(consumer)
-
-        # Initialize and start producer thread
-        self.producer = KafkaProducerProcess(client, topic, self.out_queue,
-                                             self.barrier, **producer_config)
-        self.producer.start()
-
-        # Trigger everything to start
-        self.barrier.set()
-
-    def get(self, block=True, timeout=None):
-        """
-        Consume a message from Kafka
-
-        Arguments:
-            block: boolean, default True
-            timeout: int, number of seconds to wait when blocking, default None
-
-        Returns:
-            msg: str, the payload from Kafka
-        """
-        return self.in_queue.get(block, timeout).payload
-
-    def put(self, msg, block=True, timeout=None):
-        """
-        Send a message to Kafka
-
-        Arguments:
-            msg: std, the message to send
-            block: boolean, default True
-            timeout: int, number of seconds to wait when blocking, default None
-        """
-        self.out_queue.put(msg, block, timeout)
-
-    def close(self):
-        """
-        Close the internal queues and Kafka consumers/producer
-        """
-        self.in_queue.close()
-        self.out_queue.close()
-        self.barrier.clear()
-        self.producer.join()
-        for consumer in self.consumers:
-            consumer.join()
--- a/kafka/util.py
+++ b/kafka/util.py
@@ -82,6 +82,9 @@ def relative_unpack(fmt, data, cur):
 def group_by_topic_and_partition(tuples):
    out = collections.defaultdict(dict)
    for t in tuples:
+        assert t.topic not in out or t.partition not in out[t.topic], \
+               'Duplicate {0}s for {1} {2}'.format(t.__class__.__name__,
+                                                   t.topic, t.partition)
        out[t.topic][t.partition] = t
    return out

@@ -151,3 +154,6 @@ class ReentrantTimer(object):
        # noinspection PyAttributeOutsideInit
        self.timer = None
        self.fn = None
+
+    def __del__(self):
+        self.stop()
--- a/kafka/version.py
+++ b/kafka/version.py
@@ -0,0 +1 @@
+__version__ = '0.9.5'
--- a/pylint.rc
+++ b/pylint.rc
@@ -0,0 +1,2 @@
+[TYPECHECK]
+ignored-classes=SyncManager
--- a/servers/0.8.0/resources/kafka.properties
+++ b/servers/0.8.0/resources/kafka.properties
@@ -35,6 +35,10 @@ log.dirs={tmp_dir}/data
 num.partitions={partitions}
 default.replication.factor={replicas}

+## Short Replica Lag -- Drops failed brokers out of ISR
+replica.lag.time.max.ms=1000
+replica.socket.timeout.ms=1000
+
 ############################# Log Flush Policy #############################

 log.flush.interval.messages=10000
@@ -49,7 +53,11 @@ log.cleanup.interval.mins=1
 ############################# Zookeeper #############################

 zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
+
+# Timeout in ms for connecting to zookeeper
 zookeeper.connection.timeout.ms=1000000
+# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
+zookeeper.session.timeout.ms=500

 kafka.metrics.polling.interval.secs=5
 kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter
--- a/servers/0.8.1.1/resources/kafka.properties
+++ b/servers/0.8.1.1/resources/kafka.properties
@@ -63,6 +63,10 @@ log.dirs={tmp_dir}/data
 num.partitions={partitions}
 default.replication.factor={replicas}

+## Short Replica Lag -- Drops failed brokers out of ISR
+replica.lag.time.max.ms=1000
+replica.socket.timeout.ms=1000
+
 ############################# Log Flush Policy #############################

 # Messages are immediately written to the filesystem but by default we only fsync() to sync
@@ -116,3 +120,5 @@ zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}

 # Timeout in ms for connecting to zookeeper
 zookeeper.connection.timeout.ms=1000000
+# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
+zookeeper.session.timeout.ms=500
--- a/servers/0.8.1/resources/kafka.properties
+++ b/servers/0.8.1/resources/kafka.properties
@@ -35,6 +35,10 @@ log.dirs={tmp_dir}/data
 num.partitions={partitions}
 default.replication.factor={replicas}

+## Short Replica Lag -- Drops failed brokers out of ISR
+replica.lag.time.max.ms=1000
+replica.socket.timeout.ms=1000
+
 ############################# Log Flush Policy #############################

 log.flush.interval.messages=10000
@@ -56,4 +60,8 @@ log.cleaner.enable=false
 # You can also append an optional chroot string to the urls to specify the
 # root directory for all kafka znodes.
 zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
+
+# Timeout in ms for connecting to zookeeper
 zookeeper.connection.timeout.ms=1000000
+# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
+zookeeper.session.timeout.ms=500
--- a/servers/0.8.2.0/resources/kafka.properties
+++ b/servers/0.8.2.0/resources/kafka.properties
@@ -63,6 +63,10 @@ log.dirs={tmp_dir}/data
 num.partitions={partitions}
 default.replication.factor={replicas}

+## Short Replica Lag -- Drops failed brokers out of ISR
+replica.lag.time.max.ms=1000
+replica.socket.timeout.ms=1000
+
 ############################# Log Flush Policy #############################

 # Messages are immediately written to the filesystem but by default we only fsync() to sync
@@ -116,3 +120,5 @@ zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}

 # Timeout in ms for connecting to zookeeper
 zookeeper.connection.timeout.ms=1000000
+# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
+zookeeper.session.timeout.ms=500
--- a/servers/0.8.2.1/resources/kafka.properties
+++ b/servers/0.8.2.1/resources/kafka.properties
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.server.KafkaConfig for additional details and defaults
+
+############################# Server Basics #############################
+
+# The id of the broker. This must be set to a unique integer for each broker.
+broker.id={broker_id}
+
+############################# Socket Server Settings #############################
+
+# The port the socket server listens on
+port={port}
+
+# Hostname the broker will bind to. If not set, the server will bind to all interfaces
+host.name={host}
+
+# Hostname the broker will advertise to producers and consumers. If not set, it uses the
+# value for "host.name" if configured.  Otherwise, it will use the value returned from
+# java.net.InetAddress.getCanonicalHostName().
+#advertised.host.name=<hostname routable by clients>
+
+# The port to publish to ZooKeeper for clients to use. If this is not set,
+# it will publish the same port that the broker binds to.
+#advertised.port=<port accessible by clients>
+
+# The number of threads handling network requests
+num.network.threads=2
+ 
+# The number of threads doing disk I/O
+num.io.threads=8
+
+# The send buffer (SO_SNDBUF) used by the socket server
+socket.send.buffer.bytes=1048576
+
+# The receive buffer (SO_RCVBUF) used by the socket server
+socket.receive.buffer.bytes=1048576
+
+# The maximum size of a request that the socket server will accept (protection against OOM)
+socket.request.max.bytes=104857600
+
+
+############################# Log Basics #############################
+
+# A comma seperated list of directories under which to store log files
+log.dirs={tmp_dir}/data
+
+# The default number of log partitions per topic. More partitions allow greater
+# parallelism for consumption, but this will also result in more files across
+# the brokers.
+num.partitions={partitions}
+default.replication.factor={replicas}
+
+## Short Replica Lag -- Drops failed brokers out of ISR
+replica.lag.time.max.ms=1000
+replica.socket.timeout.ms=1000
+
+############################# Log Flush Policy #############################
+
+# Messages are immediately written to the filesystem but by default we only fsync() to sync
+# the OS cache lazily. The following configurations control the flush of data to disk. 
+# There are a few important trade-offs here:
+#    1. Durability: Unflushed data may be lost if you are not using replication.
+#    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
+#    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 
+# The settings below allow one to configure the flush policy to flush data after a period of time or
+# every N messages (or both). This can be done globally and overridden on a per-topic basis.
+
+# The number of messages to accept before forcing a flush of data to disk
+#log.flush.interval.messages=10000
+
+# The maximum amount of time a message can sit in a log before we force a flush
+#log.flush.interval.ms=1000
+
+############################# Log Retention Policy #############################
+
+# The following configurations control the disposal of log segments. The policy can
+# be set to delete segments after a period of time, or after a given size has accumulated.
+# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
+# from the end of the log.
+
+# The minimum age of a log file to be eligible for deletion
+log.retention.hours=168
+
+# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
+# segments don't drop below log.retention.bytes.
+#log.retention.bytes=1073741824
+
+# The maximum size of a log segment file. When this size is reached a new log segment will be created.
+log.segment.bytes=536870912
+
+# The interval at which log segments are checked to see if they can be deleted according 
+# to the retention policies
+log.retention.check.interval.ms=60000
+
+# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
+# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
+log.cleaner.enable=false
+
+############################# Zookeeper #############################
+
+# Zookeeper connection string (see zookeeper docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all kafka znodes.
+zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
+
+# Timeout in ms for connecting to zookeeper
+zookeeper.connection.timeout.ms=1000000
+# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
+zookeeper.session.timeout.ms=500
--- a/servers/0.8.2.1/resources/log4j.properties
+++ b/servers/0.8.2.1/resources/log4j.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger=INFO, stdout
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+log4j.logger.kafka=DEBUG, stdout
+log4j.logger.org.I0Itec.zkclient.ZkClient=INFO, stdout
+log4j.logger.org.apache.zookeeper=INFO, stdout
--- a/servers/0.8.2.1/resources/zookeeper.properties
+++ b/servers/0.8.2.1/resources/zookeeper.properties
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# the directory where the snapshot is stored.
+dataDir={tmp_dir}
+# the port at which the clients will connect
+clientPort={port}
+clientPortAddress={host}
+# disable the per-ip limit on the number of connections since this is a non-production config
+maxClientCnxns=0
--- a/servers/0.8.2.2/resources/kafka.properties
+++ b/servers/0.8.2.2/resources/kafka.properties
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.server.KafkaConfig for additional details and defaults
+
+############################# Server Basics #############################
+
+# The id of the broker. This must be set to a unique integer for each broker.
+broker.id={broker_id}
+
+############################# Socket Server Settings #############################
+
+# The port the socket server listens on
+port={port}
+
+# Hostname the broker will bind to. If not set, the server will bind to all interfaces
+host.name={host}
+
+# Hostname the broker will advertise to producers and consumers. If not set, it uses the
+# value for "host.name" if configured.  Otherwise, it will use the value returned from
+# java.net.InetAddress.getCanonicalHostName().
+#advertised.host.name=<hostname routable by clients>
+
+# The port to publish to ZooKeeper for clients to use. If this is not set,
+# it will publish the same port that the broker binds to.
+#advertised.port=<port accessible by clients>
+
+# The number of threads handling network requests
+num.network.threads=2
+ 
+# The number of threads doing disk I/O
+num.io.threads=8
+
+# The send buffer (SO_SNDBUF) used by the socket server
+socket.send.buffer.bytes=1048576
+
+# The receive buffer (SO_RCVBUF) used by the socket server
+socket.receive.buffer.bytes=1048576
+
+# The maximum size of a request that the socket server will accept (protection against OOM)
+socket.request.max.bytes=104857600
+
+
+############################# Log Basics #############################
+
+# A comma seperated list of directories under which to store log files
+log.dirs={tmp_dir}/data
+
+# The default number of log partitions per topic. More partitions allow greater
+# parallelism for consumption, but this will also result in more files across
+# the brokers.
+num.partitions={partitions}
+default.replication.factor={replicas}
+
+## Short Replica Lag -- Drops failed brokers out of ISR
+replica.lag.time.max.ms=1000
+replica.socket.timeout.ms=1000
+
+############################# Log Flush Policy #############################
+
+# Messages are immediately written to the filesystem but by default we only fsync() to sync
+# the OS cache lazily. The following configurations control the flush of data to disk. 
+# There are a few important trade-offs here:
+#    1. Durability: Unflushed data may be lost if you are not using replication.
+#    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
+#    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 
+# The settings below allow one to configure the flush policy to flush data after a period of time or
+# every N messages (or both). This can be done globally and overridden on a per-topic basis.
+
+# The number of messages to accept before forcing a flush of data to disk
+#log.flush.interval.messages=10000
+
+# The maximum amount of time a message can sit in a log before we force a flush
+#log.flush.interval.ms=1000
+
+############################# Log Retention Policy #############################
+
+# The following configurations control the disposal of log segments. The policy can
+# be set to delete segments after a period of time, or after a given size has accumulated.
+# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
+# from the end of the log.
+
+# The minimum age of a log file to be eligible for deletion
+log.retention.hours=168
+
+# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
+# segments don't drop below log.retention.bytes.
+#log.retention.bytes=1073741824
+
+# The maximum size of a log segment file. When this size is reached a new log segment will be created.
+log.segment.bytes=536870912
+
+# The interval at which log segments are checked to see if they can be deleted according 
+# to the retention policies
+log.retention.check.interval.ms=60000
+
+# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
+# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
+log.cleaner.enable=false
+
+############################# Zookeeper #############################
+
+# Zookeeper connection string (see zookeeper docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all kafka znodes.
+zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
+
+# Timeout in ms for connecting to zookeeper
+zookeeper.connection.timeout.ms=1000000
+# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
+zookeeper.session.timeout.ms=500
--- a/servers/0.8.2.2/resources/log4j.properties
+++ b/servers/0.8.2.2/resources/log4j.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger=INFO, stdout
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+log4j.logger.kafka=DEBUG, stdout
+log4j.logger.org.I0Itec.zkclient.ZkClient=INFO, stdout
+log4j.logger.org.apache.zookeeper=INFO, stdout
--- a/servers/0.8.2.2/resources/zookeeper.properties
+++ b/servers/0.8.2.2/resources/zookeeper.properties
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# the directory where the snapshot is stored.
+dataDir={tmp_dir}
+# the port at which the clients will connect
+clientPort={port}
+clientPortAddress={host}
+# disable the per-ip limit on the number of connections since this is a non-production config
+maxClientCnxns=0
--- a/servers/0.9.0.0/resources/kafka.properties
+++ b/servers/0.9.0.0/resources/kafka.properties
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.server.KafkaConfig for additional details and defaults
+
+############################# Server Basics #############################
+
+# The id of the broker. This must be set to a unique integer for each broker.
+broker.id={broker_id}
+
+############################# Socket Server Settings #############################
+
+# The port the socket server listens on
+port={port}
+
+# Hostname the broker will bind to. If not set, the server will bind to all interfaces
+host.name={host}
+
+# Hostname the broker will advertise to producers and consumers. If not set, it uses the
+# value for "host.name" if configured.  Otherwise, it will use the value returned from
+# java.net.InetAddress.getCanonicalHostName().
+#advertised.host.name=<hostname routable by clients>
+
+# The port to publish to ZooKeeper for clients to use. If this is not set,
+# it will publish the same port that the broker binds to.
+#advertised.port=<port accessible by clients>
+
+# The number of threads handling network requests
+num.network.threads=2
+ 
+# The number of threads doing disk I/O
+num.io.threads=8
+
+# The send buffer (SO_SNDBUF) used by the socket server
+socket.send.buffer.bytes=1048576
+
+# The receive buffer (SO_RCVBUF) used by the socket server
+socket.receive.buffer.bytes=1048576
+
+# The maximum size of a request that the socket server will accept (protection against OOM)
+socket.request.max.bytes=104857600
+
+
+############################# Log Basics #############################
+
+# A comma seperated list of directories under which to store log files
+log.dirs={tmp_dir}/data
+
+# The default number of log partitions per topic. More partitions allow greater
+# parallelism for consumption, but this will also result in more files across
+# the brokers.
+num.partitions={partitions}
+default.replication.factor={replicas}
+
+## Short Replica Lag -- Drops failed brokers out of ISR
+replica.lag.time.max.ms=1000
+replica.socket.timeout.ms=1000
+
+############################# Log Flush Policy #############################
+
+# Messages are immediately written to the filesystem but by default we only fsync() to sync
+# the OS cache lazily. The following configurations control the flush of data to disk. 
+# There are a few important trade-offs here:
+#    1. Durability: Unflushed data may be lost if you are not using replication.
+#    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
+#    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 
+# The settings below allow one to configure the flush policy to flush data after a period of time or
+# every N messages (or both). This can be done globally and overridden on a per-topic basis.
+
+# The number of messages to accept before forcing a flush of data to disk
+#log.flush.interval.messages=10000
+
+# The maximum amount of time a message can sit in a log before we force a flush
+#log.flush.interval.ms=1000
+
+############################# Log Retention Policy #############################
+
+# The following configurations control the disposal of log segments. The policy can
+# be set to delete segments after a period of time, or after a given size has accumulated.
+# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
+# from the end of the log.
+
+# The minimum age of a log file to be eligible for deletion
+log.retention.hours=168
+
+# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
+# segments don't drop below log.retention.bytes.
+#log.retention.bytes=1073741824
+
+# The maximum size of a log segment file. When this size is reached a new log segment will be created.
+log.segment.bytes=536870912
+
+# The interval at which log segments are checked to see if they can be deleted according 
+# to the retention policies
+log.retention.check.interval.ms=60000
+
+# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
+# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
+log.cleaner.enable=false
+
+############################# Zookeeper #############################
+
+# Zookeeper connection string (see zookeeper docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all kafka znodes.
+zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
+
+# Timeout in ms for connecting to zookeeper
+zookeeper.connection.timeout.ms=1000000
+# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
+zookeeper.session.timeout.ms=500
--- a/servers/0.9.0.0/resources/log4j.properties
+++ b/servers/0.9.0.0/resources/log4j.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger=INFO, stdout
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+log4j.logger.kafka=DEBUG, stdout
+log4j.logger.org.I0Itec.zkclient.ZkClient=INFO, stdout
+log4j.logger.org.apache.zookeeper=INFO, stdout
--- a/servers/0.9.0.0/resources/zookeeper.properties
+++ b/servers/0.9.0.0/resources/zookeeper.properties
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# the directory where the snapshot is stored.
+dataDir={tmp_dir}
+# the port at which the clients will connect
+clientPort={port}
+clientPortAddress={host}
+# disable the per-ip limit on the number of connections since this is a non-production config
+maxClientCnxns=0
--- a/setup.py
+++ b/setup.py
@@ -1,10 +1,10 @@
 import sys
-
+import os
 from setuptools import setup, Command

-with open('VERSION', 'r') as v:
-    __version__ = v.read().rstrip()
-
+# Pull version from source without importing
+# since we can't import something we haven't built yet :)
+exec(open('kafka/version.py').read())

 class Tox(Command):

@@ -26,6 +26,10 @@ test_require = ['tox', 'mock']
 if sys.version_info < (2, 7):
    test_require.append('unittest2')

+here = os.path.abspath(os.path.dirname(__file__))
+
+with open(os.path.join(here, 'README.rst')) as f:
+    README = f.read()

 setup(
    name="kafka-python",
@@ -41,20 +45,15 @@ setup(
        "kafka.producer",
    ],

-    author="David Arthur",
-    author_email="mumrah@gmail.com",
-    url="https://github.com/mumrah/kafka-python",
+    author="Dana Powers",
+    author_email="dana.powers@gmail.com",
+    url="https://github.com/dpkp/kafka-python",
    license="Apache License 2.0",
    description="Pure Python client for Apache Kafka",
-    long_description="""
-This module provides low-level protocol support for Apache Kafka as well as
-high-level consumer and producer classes. Request batching is supported by the
-protocol as well as broker-aware request routing. Gzip and Snappy compression
-is also supported for message sets.
-""",
+    long_description=README,
    keywords="apache kafka",
    install_requires=['six'],
-    classifiers = [
+    classifiers=[
        "Development Status :: 4 - Beta",
        "Intended Audience :: Developers",
        "License :: OSI Approved :: Apache Software License",
@@ -62,6 +61,10 @@ is also supported for message sets.
        "Programming Language :: Python :: 2",
        "Programming Language :: Python :: 2.6",
        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.3",
+        "Programming Language :: Python :: 3.4",
+        "Programming Language :: Python :: 3.5",
        "Programming Language :: Python :: Implementation :: PyPy",
        "Topic :: Software Development :: Libraries :: Python Modules",
    ]
--- a/test/fixtures.py
+++ b/test/fixtures.py
@@ -4,13 +4,18 @@ import os.path
 import shutil
 import subprocess
 import tempfile
+import time
 from six.moves import urllib
 import uuid

-from six.moves.urllib.parse import urlparse  # pylint: disable-msg=E0611
+from six.moves.urllib.parse import urlparse # pylint: disable-msg=E0611,F0401
 from test.service import ExternalService, SpawnedService
 from test.testutil import get_open_port

+
+log = logging.getLogger(__name__)
+
+
 class Fixture(object):
    kafka_version = os.environ.get('KAFKA_VERSION', '0.8.0')
    scala_version = os.environ.get("SCALA_VERSION", '2.8.0')
@@ -35,21 +40,21 @@ class Fixture(object):
        output_file = os.path.join(output_dir, distfile + '.tgz')

        if os.path.isfile(output_file):
-            logging.info("Found file already on disk: %s", output_file)
+            log.info("Found file already on disk: %s", output_file)
            return output_file

        # New tarballs are .tgz, older ones are sometimes .tar.gz
        try:
            url = url_base + distfile + '.tgz'
-            logging.info("Attempting to download %s", url)
+            log.info("Attempting to download %s", url)
            response = urllib.request.urlopen(url)
        except urllib.error.HTTPError:
-            logging.exception("HTTP Error")
+            log.exception("HTTP Error")
            url = url_base + distfile + '.tar.gz'
-            logging.info("Attempting to download %s", url)
+            log.info("Attempting to download %s", url)
            response = urllib.request.urlopen(url)

-        logging.info("Saving distribution file to %s", output_file)
+        log.info("Saving distribution file to %s", output_file)
        with open(output_file, 'w') as output_file_fd:
            output_file_fd.write(response.read())

@@ -101,14 +106,14 @@ class ZookeeperFixture(Fixture):
        self.child = None

    def out(self, message):
-        logging.info("*** Zookeeper [%s:%d]: %s", self.host, self.port, message)
+        log.info("*** Zookeeper [%s:%d]: %s", self.host, self.port, message)

    def open(self):
        self.tmp_dir = tempfile.mkdtemp()
        self.out("Running local instance...")
-        logging.info("  host    = %s", self.host)
-        logging.info("  port    = %s", self.port)
-        logging.info("  tmp_dir = %s", self.tmp_dir)
+        log.info("  host    = %s", self.host)
+        log.info("  port    = %s", self.port)
+        log.info("  tmp_dir = %s", self.tmp_dir)

        # Generate configs
        template = self.test_resource("zookeeper.properties")
@@ -118,12 +123,21 @@ class ZookeeperFixture(Fixture):
        # Configure Zookeeper child process
        args = self.kafka_run_class_args("org.apache.zookeeper.server.quorum.QuorumPeerMain", properties)
        env = self.kafka_run_class_env()
-        self.child = SpawnedService(args, env)

        # Party!
        self.out("Starting...")
-        self.child.start()
-        self.child.wait_for(r"binding to port")
+        timeout = 5
+        max_timeout = 30
+        backoff = 1
+        while True:
+            self.child = SpawnedService(args, env)
+            self.child.start()
+            timeout = min(timeout, max_timeout)
+            if self.child.wait_for(r"binding to port", timeout=timeout):
+                break
+            self.child.stop()
+            timeout *= 2
+            time.sleep(backoff)
        self.out("Done!")

    def close(self):
@@ -167,7 +181,7 @@ class KafkaFixture(Fixture):
        self.running = False

    def out(self, message):
-        logging.info("*** Kafka [%s:%d]: %s", self.host, self.port, message)
+        log.info("*** Kafka [%s:%d]: %s", self.host, self.port, message)

    def open(self):
        if self.running:
@@ -176,15 +190,15 @@ class KafkaFixture(Fixture):

        self.tmp_dir = tempfile.mkdtemp()
        self.out("Running local instance...")
-        logging.info("  host       = %s", self.host)
-        logging.info("  port       = %s", self.port)
-        logging.info("  broker_id  = %s", self.broker_id)
-        logging.info("  zk_host    = %s", self.zk_host)
-        logging.info("  zk_port    = %s", self.zk_port)
-        logging.info("  zk_chroot  = %s", self.zk_chroot)
-        logging.info("  replicas   = %s", self.replicas)
-        logging.info("  partitions = %s", self.partitions)
-        logging.info("  tmp_dir    = %s", self.tmp_dir)
+        log.info("  host       = %s", self.host)
+        log.info("  port       = %s", self.port)
+        log.info("  broker_id  = %s", self.broker_id)
+        log.info("  zk_host    = %s", self.zk_host)
+        log.info("  zk_port    = %s", self.zk_port)
+        log.info("  zk_chroot  = %s", self.zk_chroot)
+        log.info("  replicas   = %s", self.replicas)
+        log.info("  partitions = %s", self.partitions)
+        log.info("  tmp_dir    = %s", self.tmp_dir)

        # Create directories
        os.mkdir(os.path.join(self.tmp_dir, "logs"))
@@ -195,11 +209,6 @@ class KafkaFixture(Fixture):
        properties = os.path.join(self.tmp_dir, "kafka.properties")
        self.render_template(template, properties, vars(self))

-        # Configure Kafka child process
-        args = self.kafka_run_class_args("kafka.Kafka", properties)
-        env = self.kafka_run_class_env()
-        self.child = SpawnedService(args, env)
-
        # Party!
        self.out("Creating Zookeeper chroot node...")
        args = self.kafka_run_class_args("org.apache.zookeeper.ZooKeeperMain",
@@ -218,8 +227,24 @@ class KafkaFixture(Fixture):
        self.out("Done!")

        self.out("Starting...")
-        self.child.start()
-        self.child.wait_for(r"\[Kafka Server %d\], Started" % self.broker_id)
+
+        # Configure Kafka child process
+        args = self.kafka_run_class_args("kafka.Kafka", properties)
+        env = self.kafka_run_class_env()
+
+        timeout = 5
+        max_timeout = 30
+        backoff = 1
+        while True:
+            self.child = SpawnedService(args, env)
+            self.child.start()
+            timeout = min(timeout, max_timeout)
+            if self.child.wait_for(r"\[Kafka Server %d\], Started" %
+                                   self.broker_id, timeout=timeout):
+                break
+            self.child.stop()
+            timeout *= 2
+            time.sleep(backoff)
        self.out("Done!")
        self.running = True

--- a/test/service.py
+++ b/test/service.py
@@ -11,9 +11,13 @@ __all__ = [

 ]

+
+log = logging.getLogger(__name__)
+
+
 class ExternalService(object):
    def __init__(self, host, port):
-        logging.info("Using already running service at %s:%d", host, port)
+        log.info("Using already running service at %s:%d", host, port)
        self.host = host
        self.port = port

@@ -36,19 +40,38 @@ class SpawnedService(threading.Thread):
        self.captured_stderr = []

        self.should_die = threading.Event()
+        self.child = None
+        self.alive = False

    def run(self):
        self.run_with_handles()

-    def run_with_handles(self):
+    def _spawn(self):
+        if self.alive: return
+        if self.child and self.child.poll() is None: return
+
        self.child = subprocess.Popen(
            self.args,
            env=self.env,
            bufsize=1,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
-        alive = True
+        self.alive = True

+    def _despawn(self):
+        if self.child.poll() is None:
+            self.child.terminate()
+        self.alive = False
+        for _ in range(50):
+            if self.child.poll() is not None:
+                self.child = None
+                break
+            time.sleep(0.1)
+        else:
+            self.child.kill()
+
+    def run_with_handles(self):
+        self._spawn()
        while True:
            (rds, _, _) = select.select([self.child.stdout, self.child.stderr], [], [], 1)

@@ -60,26 +83,22 @@ class SpawnedService(threading.Thread):
                line = self.child.stderr.readline()
                self.captured_stderr.append(line.decode('utf-8'))

-            if self.should_die.is_set():
-                self.child.terminate()
-                alive = False
+            if self.child.poll() is not None:
+                self.dump_logs()
+                self._spawn()

-            poll_results = self.child.poll()
-            if poll_results is not None:
-                if not alive:
-                    break
-                else:
-                    self.dump_logs()
-                    raise RuntimeError("Subprocess has died. Aborting. (args=%s)" % ' '.join(str(x) for x in self.args))
+            if self.should_die.is_set():
+                self._despawn()
+                break

    def dump_logs(self):
-        logging.critical('stderr')
+        log.critical('stderr')
        for line in self.captured_stderr:
-            logging.critical(line.rstrip())
+            log.critical(line.rstrip())

-        logging.critical('stdout')
+        log.critical('stdout')
        for line in self.captured_stdout:
-            logging.critical(line.rstrip())
+            log.critical(line.rstrip())

    def wait_for(self, pattern, timeout=30):
        t1 = time.time()
@@ -89,17 +108,18 @@ class SpawnedService(threading.Thread):
                try:
                    self.child.kill()
                except:
-                    logging.exception("Received exception when killing child process")
+                    log.exception("Received exception when killing child process")
                self.dump_logs()

-                raise RuntimeError("Waiting for %r timed out after %d seconds" % (pattern, timeout))
+                log.error("Waiting for %r timed out after %d seconds", pattern, timeout)
+                return False

            if re.search(pattern, '\n'.join(self.captured_stdout), re.IGNORECASE) is not None:
-                logging.info("Found pattern %r in %d seconds via stdout", pattern, (t2 - t1))
-                return
+                log.info("Found pattern %r in %d seconds via stdout", pattern, (t2 - t1))
+                return True
            if re.search(pattern, '\n'.join(self.captured_stderr), re.IGNORECASE) is not None:
-                logging.info("Found pattern %r in %d seconds via stderr", pattern, (t2 - t1))
-                return
+                log.info("Found pattern %r in %d seconds via stderr", pattern, (t2 - t1))
+                return True
            time.sleep(0.1)

    def start(self):
--- a/test/test_client.py
+++ b/test/test_client.py
@@ -117,21 +117,21 @@ class TestKafkaClient(unittest.TestCase):
        ]

        topics = [
-            TopicMetadata('topic_1', NO_ERROR, [
-                PartitionMetadata('topic_1', 0, 1, [1, 2], [1, 2], NO_ERROR)
+            TopicMetadata(b'topic_1', NO_ERROR, [
+                PartitionMetadata(b'topic_1', 0, 1, [1, 2], [1, 2], NO_ERROR)
            ]),
-            TopicMetadata('topic_noleader', NO_ERROR, [
-                PartitionMetadata('topic_noleader', 0, -1, [], [],
+            TopicMetadata(b'topic_noleader', NO_ERROR, [
+                PartitionMetadata(b'topic_noleader', 0, -1, [], [],
                                  NO_LEADER),
-                PartitionMetadata('topic_noleader', 1, -1, [], [],
+                PartitionMetadata(b'topic_noleader', 1, -1, [], [],
                                  NO_LEADER),
            ]),
-            TopicMetadata('topic_no_partitions', NO_LEADER, []),
-            TopicMetadata('topic_unknown', UNKNOWN_TOPIC_OR_PARTITION, []),
-            TopicMetadata('topic_3', NO_ERROR, [
-                PartitionMetadata('topic_3', 0, 0, [0, 1], [0, 1], NO_ERROR),
-                PartitionMetadata('topic_3', 1, 1, [1, 0], [1, 0], NO_ERROR),
-                PartitionMetadata('topic_3', 2, 0, [0, 1], [0, 1], NO_ERROR)
+            TopicMetadata(b'topic_no_partitions', NO_LEADER, []),
+            TopicMetadata(b'topic_unknown', UNKNOWN_TOPIC_OR_PARTITION, []),
+            TopicMetadata(b'topic_3', NO_ERROR, [
+                PartitionMetadata(b'topic_3', 0, 0, [0, 1], [0, 1], NO_ERROR),
+                PartitionMetadata(b'topic_3', 1, 1, [1, 0], [1, 0], NO_ERROR),
+                PartitionMetadata(b'topic_3', 2, 0, [0, 1], [0, 1], NO_ERROR)
            ])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
@@ -139,12 +139,12 @@ class TestKafkaClient(unittest.TestCase):
        # client loads metadata at init
        client = KafkaClient(hosts=['broker_1:4567'])
        self.assertDictEqual({
-            TopicAndPartition('topic_1', 0): brokers[1],
-            TopicAndPartition('topic_noleader', 0): None,
-            TopicAndPartition('topic_noleader', 1): None,
-            TopicAndPartition('topic_3', 0): brokers[0],
-            TopicAndPartition('topic_3', 1): brokers[1],
-            TopicAndPartition('topic_3', 2): brokers[0]},
+            TopicAndPartition(b'topic_1', 0): brokers[1],
+            TopicAndPartition(b'topic_noleader', 0): None,
+            TopicAndPartition(b'topic_noleader', 1): None,
+            TopicAndPartition(b'topic_3', 0): brokers[0],
+            TopicAndPartition(b'topic_3', 1): brokers[1],
+            TopicAndPartition(b'topic_3', 2): brokers[0]},
            client.topics_to_brokers)

        # if we ask for metadata explicitly, it should raise errors
@@ -156,6 +156,7 @@ class TestKafkaClient(unittest.TestCase):

        # This should not raise
        client.load_metadata_for_topics('topic_no_leader')
+        client.load_metadata_for_topics(b'topic_no_leader')

    @patch('kafka.client.KafkaConnection')
    @patch('kafka.client.KafkaProtocol')
@@ -169,11 +170,11 @@ class TestKafkaClient(unittest.TestCase):
        ]

        topics = [
-            TopicMetadata('topic_still_creating', NO_LEADER, []),
-            TopicMetadata('topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
-            TopicMetadata('topic_noleaders', NO_ERROR, [
-                PartitionMetadata('topic_noleaders', 0, -1, [], [], NO_LEADER),
-                PartitionMetadata('topic_noleaders', 1, -1, [], [], NO_LEADER),
+            TopicMetadata(b'topic_still_creating', NO_LEADER, []),
+            TopicMetadata(b'topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
+            TopicMetadata(b'topic_noleaders', NO_ERROR, [
+                PartitionMetadata(b'topic_noleaders', 0, -1, [], [], NO_LEADER),
+                PartitionMetadata(b'topic_noleaders', 1, -1, [], [], NO_LEADER),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
@@ -188,8 +189,8 @@ class TestKafkaClient(unittest.TestCase):
        self.assertTrue(client.has_metadata_for_topic('topic_noleaders'))

    @patch('kafka.client.KafkaConnection')
-    @patch('kafka.client.KafkaProtocol')
-    def test_ensure_topic_exists(self, protocol, conn):
+    @patch('kafka.client.KafkaProtocol.decode_metadata_response')
+    def test_ensure_topic_exists(self, decode_metadata_response, conn):

        conn.recv.return_value = 'response'  # anything but None

@@ -199,14 +200,14 @@ class TestKafkaClient(unittest.TestCase):
        ]

        topics = [
-            TopicMetadata('topic_still_creating', NO_LEADER, []),
-            TopicMetadata('topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
-            TopicMetadata('topic_noleaders', NO_ERROR, [
-                PartitionMetadata('topic_noleaders', 0, -1, [], [], NO_LEADER),
-                PartitionMetadata('topic_noleaders', 1, -1, [], [], NO_LEADER),
+            TopicMetadata(b'topic_still_creating', NO_LEADER, []),
+            TopicMetadata(b'topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
+            TopicMetadata(b'topic_noleaders', NO_ERROR, [
+                PartitionMetadata(b'topic_noleaders', 0, -1, [], [], NO_LEADER),
+                PartitionMetadata(b'topic_noleaders', 1, -1, [], [], NO_LEADER),
            ]),
        ]
-        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
+        decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = KafkaClient(hosts=['broker_1:4567'])

@@ -218,6 +219,7 @@ class TestKafkaClient(unittest.TestCase):

        # This should not raise
        client.ensure_topic_exists('topic_noleaders', timeout=1)
+        client.ensure_topic_exists(b'topic_noleaders', timeout=1)

    @patch('kafka.client.KafkaConnection')
    @patch('kafka.client.KafkaProtocol')
@@ -269,8 +271,8 @@ class TestKafkaClient(unittest.TestCase):
        ]

        topics = [
-            TopicMetadata('topic_no_partitions', NO_LEADER, []),
-            TopicMetadata('topic_unknown', UNKNOWN_TOPIC_OR_PARTITION, []),
+            TopicMetadata(b'topic_no_partitions', NO_LEADER, []),
+            TopicMetadata(b'topic_unknown', UNKNOWN_TOPIC_OR_PARTITION, []),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

@@ -279,10 +281,10 @@ class TestKafkaClient(unittest.TestCase):
        self.assertDictEqual({}, client.topics_to_brokers)

        with self.assertRaises(LeaderNotAvailableError):
-            client._get_leader_for_partition('topic_no_partitions', 0)
+            client._get_leader_for_partition(b'topic_no_partitions', 0)

        with self.assertRaises(UnknownTopicOrPartitionError):
-            client._get_leader_for_partition('topic_unknown', 0)
+            client._get_leader_for_partition(b'topic_unknown', 0)

    @patch('kafka.client.KafkaConnection')
    @patch('kafka.client.KafkaProtocol')
@@ -401,3 +403,11 @@ class TestKafkaClient(unittest.TestCase):
                with self.assertRaises(ConnectionError):
                    KafkaConnection("nowhere", 1234, 1.0)
            self.assertGreaterEqual(t.interval, 1.0)
+
+    def test_correlation_rollover(self):
+        with patch.object(KafkaClient, 'load_metadata_for_topics'):
+            big_num = 2**31 - 3
+            client = KafkaClient(hosts=[], correlation_id=big_num)
+            self.assertEqual(big_num + 1, client._next_id())
+            self.assertEqual(big_num + 2, client._next_id())
+            self.assertEqual(0, client._next_id())
--- a/test/test_client_integration.py
+++ b/test/test_client_integration.py
@@ -2,13 +2,13 @@ import os

 from kafka.common import (
    FetchRequest, OffsetCommitRequest, OffsetFetchRequest,
-    KafkaTimeoutError
+    KafkaTimeoutError, ProduceRequest
 )
+from kafka.protocol import create_message

 from test.fixtures import ZookeeperFixture, KafkaFixture
-from test.testutil import (
-    KafkaIntegrationTestCase, kafka_versions
-)
+from test.testutil import KafkaIntegrationTestCase, kafka_versions
+

 class TestKafkaClientIntegration(KafkaIntegrationTestCase):
    @classmethod
@@ -29,11 +29,11 @@ class TestKafkaClientIntegration(KafkaIntegrationTestCase):

    @kafka_versions("all")
    def test_consume_none(self):
-        fetch = FetchRequest(self.topic, 0, 0, 1024)
+        fetch = FetchRequest(self.bytes_topic, 0, 0, 1024)

        fetch_resp, = self.client.send_fetch_request([fetch])
        self.assertEqual(fetch_resp.error, 0)
-        self.assertEqual(fetch_resp.topic, self.topic)
+        self.assertEqual(fetch_resp.topic, self.bytes_topic)
        self.assertEqual(fetch_resp.partition, 0)

        messages = list(fetch_resp.messages)
@@ -50,17 +50,46 @@ class TestKafkaClientIntegration(KafkaIntegrationTestCase):
        with self.assertRaises(KafkaTimeoutError):
            self.client.ensure_topic_exists(b"this_topic_doesnt_exist", timeout=0)

+    @kafka_versions('all')
+    def test_send_produce_request_maintains_request_response_order(self):
+
+        self.client.ensure_topic_exists(b'foo')
+        self.client.ensure_topic_exists(b'bar')
+
+        requests = [
+            ProduceRequest(
+                b'foo', 0,
+                [create_message(b'a'), create_message(b'b')]),
+            ProduceRequest(
+                b'bar', 1,
+                [create_message(b'a'), create_message(b'b')]),
+            ProduceRequest(
+                b'foo', 1,
+                [create_message(b'a'), create_message(b'b')]),
+            ProduceRequest(
+                b'bar', 0,
+                [create_message(b'a'), create_message(b'b')]),
+        ]
+
+        responses = self.client.send_produce_request(requests)
+        while len(responses):
+            request = requests.pop()
+            response = responses.pop()
+            self.assertEqual(request.topic, response.topic)
+            self.assertEqual(request.partition, response.partition)
+
+
    ####################
    #   Offset Tests   #
    ####################

-    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.0")
+    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
    def test_commit_fetch_offsets(self):
-        req = OffsetCommitRequest(self.topic, 0, 42, b"metadata")
+        req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata")
        (resp,) = self.client.send_offset_commit_request(b"group", [req])
        self.assertEqual(resp.error, 0)

-        req = OffsetFetchRequest(self.topic, 0)
+        req = OffsetFetchRequest(self.bytes_topic, 0)
        (resp,) = self.client.send_offset_fetch_request(b"group", [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, 42)
--- a/test/test_codec.py
+++ b/test/test_codec.py
@@ -13,16 +13,16 @@ from test.testutil import random_string
 class TestCodec(unittest.TestCase):
    def test_gzip(self):
        for i in xrange(1000):
-            s1 = random_string(100)
-            s2 = gzip_decode(gzip_encode(s1))
-            self.assertEqual(s1, s2)
+            b1 = random_string(100).encode('utf-8')
+            b2 = gzip_decode(gzip_encode(b1))
+            self.assertEqual(b1, b2)

    @unittest.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy(self):
        for i in xrange(1000):
-            s1 = random_string(100)
-            s2 = snappy_decode(snappy_encode(s1))
-            self.assertEqual(s1, s2)
+            b1 = random_string(100).encode('utf-8')
+            b2 = snappy_decode(snappy_encode(b1))
+            self.assertEqual(b1, b2)

    @unittest.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy_detect_xerial(self):
--- a/test/test_conn.py
+++ b/test/test_conn.py
@@ -1,5 +1,7 @@
+import logging
 import socket
 import struct
+from threading import Thread

 import mock
 from . import unittest
@@ -9,6 +11,10 @@ from kafka.conn import KafkaConnection, collect_hosts, DEFAULT_SOCKET_TIMEOUT_SE

 class ConnTest(unittest.TestCase):
    def setUp(self):
+
+        # kafka.conn debug logging is verbose, so only enable in conn tests
+        logging.getLogger('kafka.conn').setLevel(logging.DEBUG)
+
        self.config = {
            'host': 'localhost',
            'port': 9090,
@@ -44,6 +50,11 @@ class ConnTest(unittest.TestCase):
        # Reset any mock counts caused by __init__
        self.MockCreateConn.reset_mock()

+    def tearDown(self):
+        # Return connection logging to INFO
+        logging.getLogger('kafka.conn').setLevel(logging.INFO)
+
+
    def test_collect_hosts__happy_path(self):
        hosts = "localhost:1234,localhost"
        results = collect_hosts(hosts)
@@ -154,6 +165,23 @@ class ConnTest(unittest.TestCase):
        self.assertEqual(self.conn.recv(self.config['request_id']), self.config['payload'])
        self.assertEqual(self.conn.recv(self.config['request_id']), self.config['payload2'])

+    def test_get_connected_socket(self):
+        s = self.conn.get_connected_socket()
+
+        self.assertEqual(s, self.MockCreateConn())
+
+    def test_get_connected_socket_on_dirty_conn(self):
+        # Dirty the connection
+        try:
+            self.conn._raise_connection_error()
+        except ConnectionError:
+            pass
+
+        # Test that get_connected_socket tries to connect
+        self.assertEqual(self.MockCreateConn.call_count, 0)
+        self.conn.get_connected_socket()
+        self.assertEqual(self.MockCreateConn.call_count, 1)
+
    def test_close__object_is_reusable(self):

        # test that sending to a closed connection
@@ -162,3 +190,54 @@ class ConnTest(unittest.TestCase):
        self.conn.send(self.config['request_id'], self.config['payload'])
        self.assertEqual(self.MockCreateConn.call_count, 1)
        self.conn._sock.sendall.assert_called_with(self.config['payload'])
+
+
+class TestKafkaConnection(unittest.TestCase):
+
+    def setUp(self):
+        # kafka.conn debug logging is verbose, so only enable in conn tests
+        logging.getLogger('kafka.conn').setLevel(logging.DEBUG)
+
+    def tearDown(self):
+        # Return connection logging to INFO
+        logging.getLogger('kafka.conn').setLevel(logging.INFO)
+
+    @mock.patch('socket.create_connection')
+    def test_copy(self, socket):
+        """KafkaConnection copies work as expected"""
+
+        conn = KafkaConnection('kafka', 9092)
+        self.assertEqual(socket.call_count, 1)
+
+        copy = conn.copy()
+        self.assertEqual(socket.call_count, 1)
+        self.assertEqual(copy.host, 'kafka')
+        self.assertEqual(copy.port, 9092)
+        self.assertEqual(copy._sock, None)
+
+        copy.reinit()
+        self.assertEqual(socket.call_count, 2)
+        self.assertNotEqual(copy._sock, None)
+
+    @mock.patch('socket.create_connection')
+    def test_copy_thread(self, socket):
+        """KafkaConnection copies work in other threads"""
+
+        err = []
+        copy = KafkaConnection('kafka', 9092).copy()
+
+        def thread_func(err, copy):
+            try:
+                self.assertEqual(copy.host, 'kafka')
+                self.assertEqual(copy.port, 9092)
+                self.assertNotEqual(copy._sock, None)
+            except Exception as e:
+                err.append(e)
+            else:
+                err.append(None)
+        thread = Thread(target=thread_func, args=(err, copy))
+        thread.start()
+        thread.join()
+
+        self.assertEqual(err, [None])
+        self.assertEqual(socket.call_count, 2)
--- a/test/test_consumer.py
+++ b/test/test_consumer.py
@@ -1,9 +1,14 @@

-from mock import MagicMock
+from mock import MagicMock, patch
 from . import unittest

-from kafka import SimpleConsumer, KafkaConsumer
-from kafka.common import KafkaConfigurationError
+from kafka import SimpleConsumer, KafkaConsumer, MultiProcessConsumer
+from kafka.common import (
+    KafkaConfigurationError, FetchResponse, OffsetFetchResponse,
+    FailedPayloadsError, OffsetAndMessage,
+    NotLeaderForPartitionError, UnknownTopicOrPartitionError
+)
+

 class TestKafkaConsumer(unittest.TestCase):
    def test_non_integer_partitions(self):
@@ -13,3 +18,120 @@ class TestKafkaConsumer(unittest.TestCase):
    def test_broker_list_required(self):
        with self.assertRaises(KafkaConfigurationError):
            KafkaConsumer()
+
+
+class TestMultiProcessConsumer(unittest.TestCase):
+    def test_partition_list(self):
+        client = MagicMock()
+        partitions = (0,)
+        with patch.object(MultiProcessConsumer, 'fetch_last_known_offsets') as fetch_last_known_offsets:
+            MultiProcessConsumer(client, 'testing-group', 'testing-topic', partitions=partitions)
+            self.assertEqual(fetch_last_known_offsets.call_args[0], (partitions,) )
+        self.assertEqual(client.get_partition_ids_for_topic.call_count, 0) # pylint: disable=no-member
+
+class TestSimpleConsumer(unittest.TestCase):
+    def test_simple_consumer_failed_payloads(self):
+        client = MagicMock()
+        consumer = SimpleConsumer(client, group=None,
+                                  topic='topic', partitions=[0, 1],
+                                  auto_commit=False)
+
+        def failed_payloads(payload):
+            return FailedPayloadsError(payload)
+
+        client.send_fetch_request.side_effect = self.fail_requests_factory(failed_payloads)
+
+        # This should not raise an exception
+        consumer.get_messages(5)
+
+    def test_simple_consumer_leader_change(self):
+        client = MagicMock()
+        consumer = SimpleConsumer(client, group=None,
+                                  topic='topic', partitions=[0, 1],
+                                  auto_commit=False)
+
+        # Mock so that only the first request gets a valid response
+        def not_leader(request):
+            return FetchResponse(request.topic, request.partition,
+                                 NotLeaderForPartitionError.errno, -1, ())
+
+        client.send_fetch_request.side_effect = self.fail_requests_factory(not_leader)
+
+        # This should not raise an exception
+        consumer.get_messages(20)
+
+        # client should have updated metadata
+        self.assertGreaterEqual(client.reset_topic_metadata.call_count, 1)
+        self.assertGreaterEqual(client.load_metadata_for_topics.call_count, 1)
+
+    def test_simple_consumer_unknown_topic_partition(self):
+        client = MagicMock()
+        consumer = SimpleConsumer(client, group=None,
+                                  topic='topic', partitions=[0, 1],
+                                  auto_commit=False)
+
+        # Mock so that only the first request gets a valid response
+        def unknown_topic_partition(request):
+            return FetchResponse(request.topic, request.partition,
+                                 UnknownTopicOrPartitionError.errno, -1, ())
+
+        client.send_fetch_request.side_effect = self.fail_requests_factory(unknown_topic_partition)
+
+        # This should not raise an exception
+        with self.assertRaises(UnknownTopicOrPartitionError):
+            consumer.get_messages(20)
+
+    def test_simple_consumer_commit_does_not_raise(self):
+        client = MagicMock()
+        client.get_partition_ids_for_topic.return_value = [0, 1]
+
+        def mock_offset_fetch_request(group, payloads, **kwargs):
+            return [OffsetFetchResponse(p.topic, p.partition, 0, b'', 0) for p in payloads]
+
+        client.send_offset_fetch_request.side_effect = mock_offset_fetch_request
+
+        def mock_offset_commit_request(group, payloads, **kwargs):
+            raise FailedPayloadsError(payloads[0])
+
+        client.send_offset_commit_request.side_effect = mock_offset_commit_request
+
+        consumer = SimpleConsumer(client, group='foobar',
+                                  topic='topic', partitions=[0, 1],
+                                  auto_commit=False)
+
+        # Mock internal commit check
+        consumer.count_since_commit = 10
+
+        # This should not raise an exception
+        self.assertFalse(consumer.commit(partitions=[0, 1]))
+
+    def test_simple_consumer_reset_partition_offset(self):
+        client = MagicMock()
+
+        def mock_offset_request(payloads, **kwargs):
+            raise FailedPayloadsError(payloads[0])
+
+        client.send_offset_request.side_effect = mock_offset_request
+
+        consumer = SimpleConsumer(client, group='foobar',
+                                  topic='topic', partitions=[0, 1],
+                                  auto_commit=False)
+
+        # This should not raise an exception
+        self.assertEqual(consumer.reset_partition_offset(0), None)
+
+    @staticmethod
+    def fail_requests_factory(error_factory):
+        # Mock so that only the first request gets a valid response
+        def fail_requests(payloads, **kwargs):
+            responses = [
+                FetchResponse(payloads[0].topic, payloads[0].partition, 0, 0,
+                              (OffsetAndMessage(
+                                  payloads[0].offset + i,
+                                  "msg %d" % (payloads[0].offset + i))
+                               for i in range(10))),
+            ]
+            for failure in payloads[1:]:
+                responses.append(error_factory(failure))
+            return responses
+        return fail_requests
--- a/test/test_consumer_integration.py
+++ b/test/test_consumer_integration.py
@@ -3,9 +3,12 @@ import os

 from six.moves import xrange

-from kafka import SimpleConsumer, MultiProcessConsumer, KafkaConsumer, create_message
+from kafka import (
+    KafkaConsumer, MultiProcessConsumer, SimpleConsumer, create_message
+)
 from kafka.common import (
-    ProduceRequest, ConsumerFetchSizeTooSmall, ConsumerTimeout
+    ProduceRequest, ConsumerFetchSizeTooSmall, ConsumerTimeout,
+    OffsetOutOfRangeError
 )
 from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES

@@ -14,6 +17,7 @@ from test.testutil import (
    KafkaIntegrationTestCase, kafka_versions, random_string, Timer
 )

+
 class TestConsumerIntegration(KafkaIntegrationTestCase):
    @classmethod
    def setUpClass(cls):
@@ -37,7 +41,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):

    def send_messages(self, partition, messages):
        messages = [ create_message(self.msg(str(msg))) for msg in messages ]
-        produce = ProduceRequest(self.topic, partition, messages = messages)
+        produce = ProduceRequest(self.bytes_topic, partition, messages = messages)
        resp, = self.client.send_produce_request([produce])
        self.assertEqual(resp.error, 0)

@@ -53,6 +57,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
    def consumer(self, **kwargs):
        if os.environ['KAFKA_VERSION'] == "0.8.0":
            # Kafka 0.8.0 simply doesn't support offset requests, so hard code it being off
+            kwargs['group'] = None
            kwargs['auto_commit'] = False
        else:
            kwargs.setdefault('auto_commit', True)
@@ -61,7 +66,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
        group = kwargs.pop('group', self.id().encode('utf-8'))
        topic = kwargs.pop('topic', self.topic)

-        if consumer_class == SimpleConsumer:
+        if consumer_class in [SimpleConsumer, MultiProcessConsumer]:
            kwargs.setdefault('iter_timeout', 0)

        return consumer_class(self.client, group, topic, **kwargs)
@@ -69,7 +74,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
    def kafka_consumer(self, **configs):
        brokers = '%s:%d' % (self.server.host, self.server.port)
        consumer = KafkaConsumer(self.topic,
-                                 metadata_broker_list=brokers,
+                                 bootstrap_servers=brokers,
                                 **configs)
        return consumer

@@ -85,6 +90,65 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):

        consumer.stop()

+    @kafka_versions('all')
+    def test_simple_consumer_smallest_offset_reset(self):
+        self.send_messages(0, range(0, 100))
+        self.send_messages(1, range(100, 200))
+
+        consumer = self.consumer(auto_offset_reset='smallest')
+        # Move fetch offset ahead of 300 message (out of range)
+        consumer.seek(300, 2)
+        # Since auto_offset_reset is set to smallest we should read all 200
+        # messages from beginning.
+        self.assert_message_count([message for message in consumer], 200)
+
+    @kafka_versions('all')
+    def test_simple_consumer_largest_offset_reset(self):
+        self.send_messages(0, range(0, 100))
+        self.send_messages(1, range(100, 200))
+
+        # Default largest
+        consumer = self.consumer()
+        # Move fetch offset ahead of 300 message (out of range)
+        consumer.seek(300, 2)
+        # Since auto_offset_reset is set to largest we should not read any
+        # messages.
+        self.assert_message_count([message for message in consumer], 0)
+        # Send 200 new messages to the queue
+        self.send_messages(0, range(200, 300))
+        self.send_messages(1, range(300, 400))
+        # Since the offset is set to largest we should read all the new messages.
+        self.assert_message_count([message for message in consumer], 200)
+
+    @kafka_versions('all')
+    def test_simple_consumer_no_reset(self):
+        self.send_messages(0, range(0, 100))
+        self.send_messages(1, range(100, 200))
+
+        # Default largest
+        consumer = self.consumer(auto_offset_reset=None)
+        # Move fetch offset ahead of 300 message (out of range)
+        consumer.seek(300, 2)
+        with self.assertRaises(OffsetOutOfRangeError):
+            consumer.get_message()
+
+    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
+    def test_simple_consumer_load_initial_offsets(self):
+        self.send_messages(0, range(0, 100))
+        self.send_messages(1, range(100, 200))
+
+        # Create 1st consumer and change offsets
+        consumer = self.consumer()
+        self.assertEqual(consumer.offsets, {0: 0, 1: 0})
+        consumer.offsets.update({0:51, 1:101})
+        # Update counter after manual offsets update
+        consumer.count_since_commit += 1
+        consumer.commit()
+
+        # Create 2nd consumer and check initial offsets
+        consumer = self.consumer(auto_commit=False)
+        self.assertEqual(consumer.offsets, {0: 51, 1: 101})
+
    @kafka_versions("all")
    def test_simple_consumer__seek(self):
        self.send_messages(0, range(0, 100))
@@ -100,17 +164,31 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
        consumer.seek(-13, 2)
        self.assert_message_count([ message for message in consumer ], 13)

+        # Set absolute offset
+        consumer.seek(100)
+        self.assert_message_count([ message for message in consumer ], 0)
+        consumer.seek(100, partition=0)
+        self.assert_message_count([ message for message in consumer ], 0)
+        consumer.seek(101, partition=1)
+        self.assert_message_count([ message for message in consumer ], 0)
+        consumer.seek(90, partition=0)
+        self.assert_message_count([ message for message in consumer ], 10)
+        consumer.seek(20, partition=1)
+        self.assert_message_count([ message for message in consumer ], 80)
+        consumer.seek(0, partition=1)
+        self.assert_message_count([ message for message in consumer ], 100)
+
        consumer.stop()

    @kafka_versions("all")
    def test_simple_consumer_blocking(self):
        consumer = self.consumer()

-        # Ask for 5 messages, nothing in queue, block 5 seconds
+        # Ask for 5 messages, nothing in queue, block 1 second
        with Timer() as t:
-            messages = consumer.get_messages(block=True, timeout=5)
+            messages = consumer.get_messages(block=True, timeout=1)
            self.assert_message_count(messages, 0)
-        self.assertGreaterEqual(t.interval, 5)
+        self.assertGreaterEqual(t.interval, 1)

        self.send_messages(0, range(0, 10))

@@ -120,11 +198,19 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
            self.assert_message_count(messages, 5)
        self.assertLessEqual(t.interval, 1)

-        # Ask for 10 messages, get 5 back, block 5 seconds
+        # Ask for 10 messages, get 5 back, block 1 second
        with Timer() as t:
-            messages = consumer.get_messages(count=10, block=True, timeout=5)
+            messages = consumer.get_messages(count=10, block=True, timeout=1)
            self.assert_message_count(messages, 5)
-        self.assertGreaterEqual(t.interval, 5)
+        self.assertGreaterEqual(t.interval, 1)
+
+        # Ask for 10 messages, 5 in queue, ask to block for 1 message or 1
+        # second, get 5 back, no blocking
+        self.send_messages(0, range(0, 5))
+        with Timer() as t:
+            messages = consumer.get_messages(count=10, block=1, timeout=1)
+            self.assert_message_count(messages, 5)
+        self.assertLessEqual(t.interval, 1)

        consumer.stop()

@@ -172,12 +258,12 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
    def test_multi_process_consumer_blocking(self):
        consumer = self.consumer(consumer = MultiProcessConsumer)

-        # Ask for 5 messages, No messages in queue, block 5 seconds
+        # Ask for 5 messages, No messages in queue, block 1 second
        with Timer() as t:
-            messages = consumer.get_messages(block=True, timeout=5)
+            messages = consumer.get_messages(block=True, timeout=1)
            self.assert_message_count(messages, 0)

-        self.assertGreaterEqual(t.interval, 5)
+        self.assertGreaterEqual(t.interval, 1)

        # Send 10 messages
        self.send_messages(0, range(0, 10))
@@ -188,11 +274,21 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
            self.assert_message_count(messages, 5)
        self.assertLessEqual(t.interval, 1)

-        # Ask for 10 messages, 5 in queue, block 5 seconds
+        # Ask for 10 messages, 5 in queue, block 1 second
        with Timer() as t:
-            messages = consumer.get_messages(count=10, block=True, timeout=5)
+            messages = consumer.get_messages(count=10, block=True, timeout=1)
            self.assert_message_count(messages, 5)
-        self.assertGreaterEqual(t.interval, 4.95)
+        self.assertGreaterEqual(t.interval, 1)
+
+        # Ask for 10 messages, 5 in queue, ask to block for 1 message or 1
+        # second, get at least one back, no blocking
+        self.send_messages(0, range(0, 5))
+        with Timer() as t:
+            messages = consumer.get_messages(count=10, block=1, timeout=1)
+            received_message_count = len(messages)
+            self.assertGreaterEqual(received_message_count, 1)
+            self.assert_message_count(messages, received_message_count)
+        self.assertLessEqual(t.interval, 1)

        consumer.stop()

@@ -201,7 +297,10 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
        self.send_messages(0, range(0, 10))
        self.send_messages(1, range(10, 20))

-        consumer = MultiProcessConsumer(self.client, "group1", self.topic, auto_commit=False)
+        # set group to None and auto_commit to False to avoid interactions w/
+        # offset commit/fetch apis
+        consumer = MultiProcessConsumer(self.client, None, self.topic,
+                                        auto_commit=False, iter_timeout=0)

        self.assertEqual(consumer.pending(), 20)
        self.assertEqual(consumer.pending(partitions=[0]), 10)
@@ -209,6 +308,24 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):

        consumer.stop()

+    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
+    def test_multi_process_consumer_load_initial_offsets(self):
+        self.send_messages(0, range(0, 10))
+        self.send_messages(1, range(10, 20))
+
+        # Create 1st consumer and change offsets
+        consumer = self.consumer()
+        self.assertEqual(consumer.offsets, {0: 0, 1: 0})
+        consumer.offsets.update({0:5, 1:15})
+        # Update counter after manual offsets update
+        consumer.count_since_commit += 1
+        consumer.commit()
+
+        # Create 2nd consumer and check initial offsets
+        consumer = self.consumer(consumer = MultiProcessConsumer,
+                                 auto_commit=False)
+        self.assertEqual(consumer.offsets, {0: 5, 1: 15})
+
    @kafka_versions("all")
    def test_large_messages(self):
        # Produce 10 "normal" size messages
@@ -257,7 +374,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):

        big_consumer.stop()

-    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.0")
+    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
    def test_offset_behavior__resuming_behavior(self):
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))
@@ -284,6 +401,41 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
        consumer1.stop()
        consumer2.stop()

+    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
+    def test_multi_process_offset_behavior__resuming_behavior(self):
+        self.send_messages(0, range(0, 100))
+        self.send_messages(1, range(100, 200))
+
+        # Start a consumer
+        consumer1 = self.consumer(
+            consumer=MultiProcessConsumer,
+            auto_commit_every_t = None,
+            auto_commit_every_n = 20,
+            )
+
+        # Grab the first 195 messages
+        output_msgs1 = []
+        idx = 0
+        for message in consumer1:
+            output_msgs1.append(message.message.value)
+            idx += 1
+            if idx >= 195:
+                break
+        self.assert_message_count(output_msgs1, 195)
+
+        # The total offset across both partitions should be at 180
+        consumer2 = self.consumer(
+            consumer=MultiProcessConsumer,
+            auto_commit_every_t = None,
+            auto_commit_every_n = 20,
+            )
+
+        # 181-200
+        self.assert_message_count([ message for message in consumer2 ], 20)
+
+        consumer1.stop()
+        consumer2.stop()
+
    # TODO: Make this a unit test -- should not require integration
    @kafka_versions("all")
    def test_fetch_buffer_size(self):
@@ -330,7 +482,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
        consumer = self.kafka_consumer(auto_offset_reset='smallest',
                                       consumer_timeout_ms=TIMEOUT_MS)

-        # Ask for 5 messages, nothing in queue, block 5 seconds
+        # Ask for 5 messages, nothing in queue, block 500ms
        with Timer() as t:
            with self.assertRaises(ConsumerTimeout):
                msg = consumer.next()
@@ -347,7 +499,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
        self.assertEqual(len(messages), 5)
        self.assertLess(t.interval, TIMEOUT_MS / 1000.0 )

-        # Ask for 10 messages, get 5 back, block 5 seconds
+        # Ask for 10 messages, get 5 back, block 500ms
        messages = set()
        with Timer() as t:
            with self.assertRaises(ConsumerTimeout):
@@ -357,9 +509,9 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
        self.assertEqual(len(messages), 5)
        self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 )

-    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.0")
+    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
    def test_kafka_consumer__offset_commit_resume(self):
-        GROUP_ID = random_string(10)
+        GROUP_ID = random_string(10).encode('utf-8')

        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))
--- a/test/test_failover_integration.py
+++ b/test/test_failover_integration.py
@@ -2,11 +2,10 @@ import logging
 import os
 import time

-from . import unittest
-
-from kafka import KafkaClient, SimpleConsumer
+from kafka import KafkaClient, SimpleConsumer, KeyedProducer
 from kafka.common import TopicAndPartition, FailedPayloadsError, ConnectionError
 from kafka.producer.base import Producer
+from kafka.util import kafka_bytestring

 from test.fixtures import ZookeeperFixture, KafkaFixture
 from test.testutil import (
@@ -14,46 +13,56 @@ from test.testutil import (
 )


+log = logging.getLogger(__name__)
+
+
 class TestFailover(KafkaIntegrationTestCase):
    create_client = False

-    @classmethod
-    def setUpClass(cls):  # noqa
+    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            return

        zk_chroot = random_string(10)
-        replicas = 2
-        partitions = 2
+        replicas = 3
+        partitions = 3

-        # mini zookeeper, 2 kafka brokers
-        cls.zk = ZookeeperFixture.instance()
-        kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
-        cls.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]
+        # mini zookeeper, 3 kafka brokers
+        self.zk = ZookeeperFixture.instance()
+        kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions]
+        self.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]

-        hosts = ['%s:%d' % (b.host, b.port) for b in cls.brokers]
-        cls.client = KafkaClient(hosts)
+        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
+        self.client = KafkaClient(hosts)
+        super(TestFailover, self).setUp()

-    @classmethod
-    def tearDownClass(cls):
+    def tearDown(self):
+        super(TestFailover, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

-        cls.client.close()
-        for broker in cls.brokers:
+        self.client.close()
+        for broker in self.brokers:
            broker.close()
-        cls.zk.close()
+        self.zk.close()

    @kafka_versions("all")
    def test_switch_leader(self):
        topic = self.topic
        partition = 0

-        # Test the base class Producer -- send_messages to a specific partition
+        # Testing the base Producer class here so that we can easily send
+        # messages to a specific partition, kill the leader for that partition
+        # and check that after another broker takes leadership the producer
+        # is able to resume sending messages
+
+        # require that the server commit messages to all in-sync replicas
+        # so that failover doesn't lose any messages on server-side
+        # and we can assert that server-side message count equals client-side
        producer = Producer(self.client, async=False,
                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT)

-        # Send 10 random messages
+        # Send 100 random messages to a specific partition
        self._send_random_messages(producer, topic, partition, 100)

        # kill leader for partition
@@ -65,12 +74,12 @@ class TestFailover(KafkaIntegrationTestCase):
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
-                logging.debug("attempting to send 'success' message after leader killed")
+                log.debug("attempting to send 'success' message after leader killed")
                producer.send_messages(topic, partition, b'success')
-                logging.debug("success!")
+                log.debug("success!")
                recovered = True
            except (FailedPayloadsError, ConnectionError):
-                logging.debug("caught exception sending message -- will retry")
+                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
@@ -80,63 +89,132 @@ class TestFailover(KafkaIntegrationTestCase):
        self._send_random_messages(producer, topic, partition, 100)

        # count number of messages
-        # Should be equal to 10 before + 1 recovery + 10 after
-        self.assert_message_count(topic, 201, partitions=(partition,))
+        # Should be equal to 100 before + 1 recovery + 100 after
+        # at_least=True because exactly once delivery isn't really a thing
+        self.assert_message_count(topic, 201, partitions=(partition,),
+                                  at_least=True)

-
-    #@kafka_versions("all")
-    @unittest.skip("async producer does not support reliable failover yet")
+    @kafka_versions("all")
    def test_switch_leader_async(self):
        topic = self.topic
        partition = 0

        # Test the base class Producer -- send_messages to a specific partition
-        producer = Producer(self.client, async=True)
+        producer = Producer(self.client, async=True,
+                            batch_send_every_n=15,
+                            batch_send_every_t=3,
+                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT,
+                            async_log_messages_on_error=False)

        # Send 10 random messages
        self._send_random_messages(producer, topic, partition, 10)
+        self._send_random_messages(producer, topic, partition + 1, 10)

        # kill leader for partition
        self._kill_leader(topic, partition)

-        logging.debug("attempting to send 'success' message after leader killed")
+        log.debug("attempting to send 'success' message after leader killed")

        # in async mode, this should return immediately
-        producer.send_messages(topic, partition, 'success')
+        producer.send_messages(topic, partition, b'success')
+        producer.send_messages(topic, partition + 1, b'success')

        # send to new leader
        self._send_random_messages(producer, topic, partition, 10)
+        self._send_random_messages(producer, topic, partition + 1, 10)

-        # wait until producer queue is empty
-        while not producer.queue.empty():
-            time.sleep(0.1)
+        # Stop the producer and wait for it to shutdown
        producer.stop()
+        started = time.time()
+        timeout = 60
+        while (time.time() - started) < timeout:
+            if not producer.thread.is_alive():
+                break
+            time.sleep(0.1)
+        else:
+            self.fail('timeout waiting for producer queue to empty')

        # count number of messages
        # Should be equal to 10 before + 1 recovery + 10 after
-        self.assert_message_count(topic, 21, partitions=(partition,))
+        # at_least=True because exactly once delivery isn't really a thing
+        self.assert_message_count(topic, 21, partitions=(partition,),
+                                  at_least=True)
+        self.assert_message_count(topic, 21, partitions=(partition + 1,),
+                                  at_least=True)
+
+    @kafka_versions("all")
+    def test_switch_leader_keyed_producer(self):
+        topic = self.topic
+
+        producer = KeyedProducer(self.client, async=False)
+
+        # Send 10 random messages
+        for _ in range(10):
+            key = random_string(3).encode('utf-8')
+            msg = random_string(10).encode('utf-8')
+            producer.send_messages(topic, key, msg)
+
+        # kill leader for partition 0
+        self._kill_leader(topic, 0)
+
+        recovered = False
+        started = time.time()
+        timeout = 60
+        while not recovered and (time.time() - started) < timeout:
+            try:
+                key = random_string(3).encode('utf-8')
+                msg = random_string(10).encode('utf-8')
+                producer.send_messages(topic, key, msg)
+                if producer.partitioners[kafka_bytestring(topic)].partition(key) == 0:
+                    recovered = True
+            except (FailedPayloadsError, ConnectionError):
+                log.debug("caught exception sending message -- will retry")
+                continue
+
+        # Verify we successfully sent the message
+        self.assertTrue(recovered)
+
+        # send some more messages just to make sure no more exceptions
+        for _ in range(10):
+            key = random_string(3).encode('utf-8')
+            msg = random_string(10).encode('utf-8')
+            producer.send_messages(topic, key, msg)
+
+    @kafka_versions("all")
+    def test_switch_leader_simple_consumer(self):
+        producer = Producer(self.client, async=False)
+        consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10)
+        self._send_random_messages(producer, self.topic, 0, 2)
+        consumer.get_messages()
+        self._kill_leader(self.topic, 0)
+        consumer.get_messages()

    def _send_random_messages(self, producer, topic, partition, n):
        for j in range(n):
-            logging.debug('_send_random_message to %s:%d -- try %d', topic, partition, j)
-            resp = producer.send_messages(topic, partition, random_string(10))
-            if len(resp) > 0:
-                self.assertEqual(resp[0].error, 0)
-            logging.debug('_send_random_message to %s:%d -- try %d success', topic, partition, j)
+            msg = 'msg {0}: {1}'.format(j, random_string(10))
+            log.debug('_send_random_message %s to %s:%d', msg, topic, partition)
+            while True:
+                try:
+                    producer.send_messages(topic, partition, msg.encode('utf-8'))
+                except:
+                    log.exception('failure in _send_random_messages - retrying')
+                    continue
+                else:
+                    break

    def _kill_leader(self, topic, partition):
-        leader = self.client.topics_to_brokers[TopicAndPartition(topic, partition)]
+        leader = self.client.topics_to_brokers[TopicAndPartition(kafka_bytestring(topic), partition)]
        broker = self.brokers[leader.nodeId]
        broker.close()
        return broker

-    def assert_message_count(self, topic, check_count, timeout=10, partitions=None):
+    def assert_message_count(self, topic, check_count, timeout=10,
+                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = KafkaClient(hosts)
-        group = random_string(10)
-        consumer = SimpleConsumer(client, group, topic,
+        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)
@@ -145,10 +223,17 @@ class TestFailover(KafkaIntegrationTestCase):
        pending = consumer.pending(partitions)

        # Keep checking if it isn't immediately correct, subject to timeout
-        while pending != check_count and (time.time() - started_at < timeout):
+        while pending < check_count and (time.time() - started_at < timeout):
            pending = consumer.pending(partitions)
+            time.sleep(0.5)

        consumer.stop()
        client.close()

-        self.assertEqual(pending, check_count)
+        if pending < check_count:
+            self.fail('Too few pending messages: found %d, expected %d' %
+                      (pending, check_count))
+        elif pending > check_count and not at_least:
+            self.fail('Too many pending messages: found %d, expected %d' %
+                      (pending, check_count))
+        return True
--- a/test/test_partitioner.py
+++ b/test/test_partitioner.py
@@ -0,0 +1,23 @@
+import six
+from . import unittest
+
+from kafka.partitioner import (Murmur2Partitioner)
+
+class TestMurmurPartitioner(unittest.TestCase):
+    def test_hash_bytes(self):
+        p = Murmur2Partitioner(range(1000))
+        self.assertEqual(p.partition(bytearray(b'test')), p.partition(b'test'))
+
+    def test_hash_encoding(self):
+        p = Murmur2Partitioner(range(1000))
+        self.assertEqual(p.partition('test'), p.partition(u'test'))
+
+    def test_murmur2_java_compatibility(self):
+        p = Murmur2Partitioner(range(1000))
+        # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner
+        self.assertEqual(681, p.partition(b''))
+        self.assertEqual(524, p.partition(b'a'))
+        self.assertEqual(434, p.partition(b'ab'))
+        self.assertEqual(107, p.partition(b'abc'))
+        self.assertEqual(566, p.partition(b'123456789'))
+        self.assertEqual(742, p.partition(b'\x00 '))
--- a/test/test_producer.py
+++ b/test/test_producer.py
@@ -1,11 +1,29 @@
 # -*- coding: utf-8 -*-

+import collections
 import logging
+import time

-from mock import MagicMock
+from mock import MagicMock, patch
 from . import unittest

-from kafka.producer.base import Producer
+from kafka import KafkaClient, SimpleProducer, KeyedProducer
+from kafka.common import (
+    AsyncProducerQueueFull, FailedPayloadsError, NotLeaderForPartitionError,
+    ProduceResponse, RetryOptions, TopicAndPartition
+)
+from kafka.producer.base import Producer, _send_upstream
+from kafka.protocol import CODEC_NONE
+
+import threading
+try:
+    from queue import Empty, Queue
+except ImportError:
+    from Queue import Empty, Queue
+try:
+    xrange
+except NameError:
+    xrange = range


 class TestKafkaProducer(unittest.TestCase):
@@ -15,7 +33,8 @@ class TestKafkaProducer(unittest.TestCase):
        topic = b"test-topic"
        partition = 0

-        bad_data_types = (u'你怎么样?', 12, ['a', 'list'], ('a', 'tuple'), {'a': 'dict'})
+        bad_data_types = (u'你怎么样?', 12, ['a', 'list'],
+                          ('a', 'tuple'), {'a': 'dict'}, None,)
        for m in bad_data_types:
            with self.assertRaises(TypeError):
                logging.debug("attempting to send message of type %s", type(m))
@@ -26,9 +45,26 @@ class TestKafkaProducer(unittest.TestCase):
            # This should not raise an exception
            producer.send_messages(topic, partition, m)

-    def test_topic_message_types(self):
-        from kafka.producer.simple import SimpleProducer
+    def test_keyedproducer_message_types(self):
+        client = MagicMock()
+        client.get_partition_ids_for_topic.return_value = [0, 1]
+        producer = KeyedProducer(client)
+        topic = b"test-topic"
+        key = b"testkey"

+        bad_data_types = (u'你怎么样?', 12, ['a', 'list'],
+                          ('a', 'tuple'), {'a': 'dict'},)
+        for m in bad_data_types:
+            with self.assertRaises(TypeError):
+                logging.debug("attempting to send message of type %s", type(m))
+                producer.send_messages(topic, key, m)
+
+        good_data_types = (b'a string!', None,)
+        for m in good_data_types:
+            # This should not raise an exception
+            producer.send_messages(topic, key, m)
+
+    def test_topic_message_types(self):
        client = MagicMock()

        def partitions(topic):
@@ -40,3 +76,188 @@ class TestKafkaProducer(unittest.TestCase):
        topic = b"test-topic"
        producer.send_messages(topic, b'hi')
        assert client.send_produce_request.called
+
+    @patch('kafka.producer.base._send_upstream')
+    def test_producer_async_queue_overfilled(self, mock):
+        queue_size = 2
+        producer = Producer(MagicMock(), async=True,
+                            async_queue_maxsize=queue_size)
+
+        topic = b'test-topic'
+        partition = 0
+        message = b'test-message'
+
+        with self.assertRaises(AsyncProducerQueueFull):
+            message_list = [message] * (queue_size + 1)
+            producer.send_messages(topic, partition, *message_list)
+        self.assertEqual(producer.queue.qsize(), queue_size)
+        for _ in xrange(producer.queue.qsize()):
+            producer.queue.get()
+
+    def test_producer_sync_fail_on_error(self):
+        error = FailedPayloadsError('failure')
+        with patch.object(KafkaClient, 'load_metadata_for_topics'):
+            with patch.object(KafkaClient, 'get_partition_ids_for_topic', return_value=[0, 1]):
+                with patch.object(KafkaClient, '_send_broker_aware_request', return_value = [error]):
+
+                    client = KafkaClient(MagicMock())
+                    producer = SimpleProducer(client, async=False, sync_fail_on_error=False)
+
+                    # This should not raise
+                    (response,) = producer.send_messages('foobar', b'test message')
+                    self.assertEqual(response, error)
+
+                    producer = SimpleProducer(client, async=False, sync_fail_on_error=True)
+                    with self.assertRaises(FailedPayloadsError):
+                        producer.send_messages('foobar', b'test message')
+
+    def test_cleanup_is_not_called_on_stopped_producer(self):
+        producer = Producer(MagicMock(), async=True)
+        producer.stopped = True
+        with patch.object(producer, 'stop') as mocked_stop:
+            producer._cleanup_func(producer)
+            self.assertEqual(mocked_stop.call_count, 0)
+
+    def test_cleanup_is_called_on_running_producer(self):
+        producer = Producer(MagicMock(), async=True)
+        producer.stopped = False
+        with patch.object(producer, 'stop') as mocked_stop:
+            producer._cleanup_func(producer)
+            self.assertEqual(mocked_stop.call_count, 1)
+
+
+class TestKafkaProducerSendUpstream(unittest.TestCase):
+
+    def setUp(self):
+        self.client = MagicMock()
+        self.queue = Queue()
+
+    def _run_process(self, retries_limit=3, sleep_timeout=1):
+        # run _send_upstream process with the queue
+        stop_event = threading.Event()
+        retry_options = RetryOptions(limit=retries_limit,
+                                     backoff_ms=50,
+                                     retry_on_timeouts=False)
+        self.thread = threading.Thread(
+            target=_send_upstream,
+            args=(self.queue, self.client, CODEC_NONE,
+                  0.3, # batch time (seconds)
+                  3, # batch length
+                  Producer.ACK_AFTER_LOCAL_WRITE,
+                  Producer.DEFAULT_ACK_TIMEOUT,
+                  retry_options,
+                  stop_event))
+        self.thread.daemon = True
+        self.thread.start()
+        time.sleep(sleep_timeout)
+        stop_event.set()
+
+    def test_wo_retries(self):
+
+        # lets create a queue and add 10 messages for 1 partition
+        for i in range(10):
+            self.queue.put((TopicAndPartition("test", 0), "msg %i", "key %i"))
+
+        self._run_process()
+
+        # the queue should be void at the end of the test
+        self.assertEqual(self.queue.empty(), True)
+
+        # there should be 4 non-void cals:
+        # 3 batches of 3 msgs each + 1 batch of 1 message
+        self.assertEqual(self.client.send_produce_request.call_count, 4)
+
+    def test_first_send_failed(self):
+
+        # lets create a queue and add 10 messages for 10 different partitions
+        # to show how retries should work ideally
+        for i in range(10):
+            self.queue.put((TopicAndPartition("test", i), "msg %i", "key %i"))
+
+        # Mock offsets counter for closure
+        offsets = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
+        self.client.is_first_time = True
+        def send_side_effect(reqs, *args, **kwargs):
+            if self.client.is_first_time:
+                self.client.is_first_time = False
+                return [FailedPayloadsError(req) for req in reqs]
+            responses = []
+            for req in reqs:
+                offset = offsets[req.topic][req.partition]
+                offsets[req.topic][req.partition] += len(req.messages)
+                responses.append(
+                    ProduceResponse(req.topic, req.partition, 0, offset)
+                )
+            return responses
+
+        self.client.send_produce_request.side_effect = send_side_effect
+
+        self._run_process(2)
+
+        # the queue should be void at the end of the test
+        self.assertEqual(self.queue.empty(), True)
+
+        # there should be 5 non-void calls: 1st failed batch of 3 msgs
+        # plus 3 batches of 3 msgs each + 1 batch of 1 message
+        self.assertEqual(self.client.send_produce_request.call_count, 5)
+
+    def test_with_limited_retries(self):
+
+        # lets create a queue and add 10 messages for 10 different partitions
+        # to show how retries should work ideally
+        for i in range(10):
+            self.queue.put((TopicAndPartition("test", i), "msg %i" % i, "key %i" % i))
+
+        def send_side_effect(reqs, *args, **kwargs):
+            return [FailedPayloadsError(req) for req in reqs]
+
+        self.client.send_produce_request.side_effect = send_side_effect
+
+        self._run_process(3, 3)
+
+        # the queue should be void at the end of the test
+        self.assertEqual(self.queue.empty(), True)
+
+        # there should be 16 non-void calls:
+        # 3 initial batches of 3 msgs each + 1 initial batch of 1 msg +
+        # 3 retries of the batches above = (1 + 3 retries) * 4 batches = 16
+        self.assertEqual(self.client.send_produce_request.call_count, 16)
+
+    def test_async_producer_not_leader(self):
+
+        for i in range(10):
+            self.queue.put((TopicAndPartition("test", i), "msg %i", "key %i"))
+
+        # Mock offsets counter for closure
+        offsets = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
+        self.client.is_first_time = True
+        def send_side_effect(reqs, *args, **kwargs):
+            if self.client.is_first_time:
+                self.client.is_first_time = False
+                return [ProduceResponse(req.topic, req.partition,
+                                        NotLeaderForPartitionError.errno, -1)
+                        for req in reqs]
+
+            responses = []
+            for req in reqs:
+                offset = offsets[req.topic][req.partition]
+                offsets[req.topic][req.partition] += len(req.messages)
+                responses.append(
+                    ProduceResponse(req.topic, req.partition, 0, offset)
+                )
+            return responses
+
+        self.client.send_produce_request.side_effect = send_side_effect
+
+        self._run_process(2)
+
+        # the queue should be void at the end of the test
+        self.assertEqual(self.queue.empty(), True)
+
+        # there should be 5 non-void calls: 1st failed batch of 3 msgs
+        # + 3 batches of 3 msgs each + 1 batch of 1 msg = 1 + 3 + 1 = 5
+        self.assertEqual(self.client.send_produce_request.call_count, 5)
+
+    def tearDown(self):
+        for _ in xrange(self.queue.qsize()):
+            self.queue.get()
--- a/test/test_producer_integration.py
+++ b/test/test_producer_integration.py
@@ -14,12 +14,13 @@ from kafka.common import (
    FetchRequest, ProduceRequest,
    UnknownTopicOrPartitionError, LeaderNotAvailableError
 )
+from kafka.producer.base import Producer

 from test.fixtures import ZookeeperFixture, KafkaFixture
 from test.testutil import KafkaIntegrationTestCase, kafka_versions

+
 class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
-    topic = b'produce_topic'

    @classmethod
    def setUpClass(cls):  # noqa
@@ -71,9 +72,9 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
        start_offset = self.current_offset(self.topic, 0)

        message1 = create_gzip_message([
-            ("Gzipped 1 %d" % i).encode('utf-8') for i in range(100)])
+            (("Gzipped 1 %d" % i).encode('utf-8'), None) for i in range(100)])
        message2 = create_gzip_message([
-            ("Gzipped 2 %d" % i).encode('utf-8') for i in range(100)])
+            (("Gzipped 2 %d" % i).encode('utf-8'), None) for i in range(100)])

        self.assert_produce_request(
            [ message1, message2 ],
@@ -87,8 +88,8 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
        start_offset = self.current_offset(self.topic, 0)

        self.assert_produce_request([
-                create_snappy_message(["Snappy 1 %d" % i for i in range(100)]),
-                create_snappy_message(["Snappy 2 %d" % i for i in range(100)]),
+                create_snappy_message([("Snappy 1 %d" % i, None) for i in range(100)]),
+                create_snappy_message([("Snappy 2 %d" % i, None) for i in range(100)]),
            ],
            start_offset,
            200,
@@ -102,13 +103,13 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
        messages = [
            create_message(b"Just a plain message"),
            create_gzip_message([
-                ("Gzipped %d" % i).encode('utf-8') for i in range(100)]),
+                (("Gzipped %d" % i).encode('utf-8'), None) for i in range(100)]),
        ]

        # All snappy integration tests fail with nosnappyjava
        if False and has_snappy():
            msg_count += 100
-            messages.append(create_snappy_message(["Snappy %d" % i for i in range(100)]))
+            messages.append(create_snappy_message([("Snappy %d" % i, None) for i in range(100)]))

        self.assert_produce_request(messages, start_offset, msg_count)

@@ -118,7 +119,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):

        self.assert_produce_request([
            create_gzip_message([
-                ("Gzipped batch 1, message %d" % i).encode('utf-8')
+                (("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
                for i in range(50000)])
            ],
            start_offset,
@@ -127,7 +128,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):

        self.assert_produce_request([
            create_gzip_message([
-                ("Gzipped batch 1, message %d" % i).encode('utf-8')
+                (("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
                for i in range(50000)])
            ],
            start_offset+50000,
@@ -140,25 +141,26 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):

    @kafka_versions("all")
    def test_simple_producer(self):
-        start_offset0 = self.current_offset(self.topic, 0)
-        start_offset1 = self.current_offset(self.topic, 1)
+        partitions = self.client.get_partition_ids_for_topic(self.topic)
+        start_offsets = [self.current_offset(self.topic, p) for p in partitions]
+
        producer = SimpleProducer(self.client, random_start=False)

        # Goes to first partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two"))
-        self.assert_produce_response(resp, start_offset0)
+        self.assert_produce_response(resp, start_offsets[0])

        # Goes to the next partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("three"))
-        self.assert_produce_response(resp, start_offset1)
+        self.assert_produce_response(resp, start_offsets[1])

-        self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two") ])
-        self.assert_fetch_offset(1, start_offset1, [ self.msg("three") ])
+        self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two") ])
+        self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("three") ])

        # Goes back to the first partition because there's only two partitions
        resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five"))
-        self.assert_produce_response(resp, start_offset0+2)
-        self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ])
+        self.assert_produce_response(resp, start_offsets[0]+2)
+        self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ])

        producer.stop()

@@ -194,111 +196,38 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
        self.assertEqual(resp3[0].partition, 0)

    @kafka_versions("all")
-    def test_round_robin_partitioner(self):
-        start_offset0 = self.current_offset(self.topic, 0)
-        start_offset1 = self.current_offset(self.topic, 1)
+    def test_async_simple_producer(self):
+        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
+        start_offset = self.current_offset(self.topic, partition)

-        producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
-        resp1 = producer.send(self.topic, self.key("key1"), self.msg("one"))
-        resp2 = producer.send(self.topic, self.key("key2"), self.msg("two"))
-        resp3 = producer.send(self.topic, self.key("key3"), self.msg("three"))
-        resp4 = producer.send(self.topic, self.key("key4"), self.msg("four"))
-
-        self.assert_produce_response(resp1, start_offset0+0)
-        self.assert_produce_response(resp2, start_offset1+0)
-        self.assert_produce_response(resp3, start_offset0+1)
-        self.assert_produce_response(resp4, start_offset1+1)
-
-        self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("three") ])
-        self.assert_fetch_offset(1, start_offset1, [ self.msg("two"), self.msg("four")  ])
-
-        producer.stop()
-
-    @kafka_versions("all")
-    def test_hashed_partitioner(self):
-        start_offset0 = self.current_offset(self.topic, 0)
-        start_offset1 = self.current_offset(self.topic, 1)
-
-        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
-        resp1 = producer.send(self.topic, self.key("1"), self.msg("one"))
-        resp2 = producer.send(self.topic, self.key("2"), self.msg("two"))
-        resp3 = producer.send(self.topic, self.key("3"), self.msg("three"))
-        resp4 = producer.send(self.topic, self.key("3"), self.msg("four"))
-        resp5 = producer.send(self.topic, self.key("4"), self.msg("five"))
-
-        offsets = {0: start_offset0, 1: start_offset1}
-        messages = {0: [], 1: []}
-
-        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
-        resps = [resp1, resp2, resp3, resp4, resp5]
-        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]
-
-        for key, resp, msg in zip(keys, resps, msgs):
-            k = hash(key) % 2
-            offset = offsets[k]
-            self.assert_produce_response(resp, offset)
-            offsets[k] += 1
-            messages[k].append(msg)
-
-        self.assert_fetch_offset(0, start_offset0, messages[0])
-        self.assert_fetch_offset(1, start_offset1, messages[1])
-
-        producer.stop()
-
-    @kafka_versions("all")
-    def test_acks_none(self):
-        start_offset0 = self.current_offset(self.topic, 0)
-
-        producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_NOT_REQUIRED,
-            random_start=False)
+        producer = SimpleProducer(self.client, async=True, random_start=False)
        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assertEqual(len(resp), 0)

-        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
+        # flush messages
        producer.stop()

-    @kafka_versions("all")
-    def test_acks_local_write(self):
-        start_offset0 = self.current_offset(self.topic, 0)
+        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])

-        producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
-            random_start=False)
-        resp = producer.send_messages(self.topic, self.msg("one"))
-
-        self.assert_produce_response(resp, start_offset0)
-        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
-
-        producer.stop()
-
-    @kafka_versions("all")
-    def test_acks_cluster_commit(self):
-        start_offset0 = self.current_offset(self.topic, 0)
-
-        producer = SimpleProducer(
-            self.client,
-            req_acks=SimpleProducer.ACK_AFTER_CLUSTER_COMMIT,
-            random_start=False)
-
-        resp = producer.send_messages(self.topic, self.msg("one"))
-        self.assert_produce_response(resp, start_offset0)
-        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
-
-        producer.stop()

    @kafka_versions("all")
    def test_batched_simple_producer__triggers_by_message(self):
-        start_offset0 = self.current_offset(self.topic, 0)
-        start_offset1 = self.current_offset(self.topic, 1)
+        partitions = self.client.get_partition_ids_for_topic(self.topic)
+        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

+        # Configure batch producer
+        batch_messages = 5
+        batch_interval = 5
        producer = SimpleProducer(
            self.client,
-            batch_send=True,
-            batch_send_every_n=5,
-            batch_send_every_t=20,
+            async=True,
+            batch_send_every_n=batch_messages,
+            batch_send_every_t=batch_interval,
            random_start=False)

-        # Send 5 messages and do a fetch
-        resp = producer.send_messages(self.topic,
+        # Send 4 messages -- should not trigger a batch
+        resp = producer.send_messages(
+            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
@@ -309,10 +238,12 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
-        self.assert_fetch_offset(0, start_offset0, [])
-        self.assert_fetch_offset(1, start_offset1, [])
+        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
+        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

-        resp = producer.send_messages(self.topic,
+        # send 3 more messages -- should trigger batch on first 5
+        resp = producer.send_messages(
+            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
@@ -321,34 +252,48 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

-        self.assert_fetch_offset(0, start_offset0, [
+        # Wait until producer has pulled all messages from internal queue
+        # this should signal that the first batch was sent, and the producer
+        # is now waiting for enough messages to batch again (or a timeout)
+        timeout = 5
+        start = time.time()
+        while not producer.queue.empty():
+            if time.time() - start > timeout:
+                self.fail('timeout waiting for producer queue to empty')
+            time.sleep(0.1)
+
+        # send messages groups all *msgs in a single call to the same partition
+        # so we should see all messages from the first call in one partition
+        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

-        self.assert_fetch_offset(1, start_offset1, [
+        # Because we are batching every 5 messages, we should only see one
+        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
-        #    self.msg("six"),
-        #    self.msg("seven"),
        ])

        producer.stop()

    @kafka_versions("all")
    def test_batched_simple_producer__triggers_by_time(self):
-        start_offset0 = self.current_offset(self.topic, 0)
-        start_offset1 = self.current_offset(self.topic, 1)
+        partitions = self.client.get_partition_ids_for_topic(self.topic)
+        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

-        producer = SimpleProducer(self.client,
-            batch_send=True,
+        batch_interval = 5
+        producer = SimpleProducer(
+            self.client,
+            async=True,
            batch_send_every_n=100,
-            batch_send_every_t=5,
+            batch_send_every_t=batch_interval,
            random_start=False)

        # Send 5 messages and do a fetch
-        resp = producer.send_messages(self.topic,
+        resp = producer.send_messages(
+            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
@@ -359,8 +304,8 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
-        self.assert_fetch_offset(0, start_offset0, [])
-        self.assert_fetch_offset(1, start_offset1, [])
+        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
+        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        resp = producer.send_messages(self.topic,
            self.msg("five"),
@@ -372,16 +317,16 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
        self.assertEqual(len(resp), 0)

        # Wait the timeout out
-        time.sleep(5)
+        time.sleep(batch_interval)

-        self.assert_fetch_offset(0, start_offset0, [
+        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

-        self.assert_fetch_offset(1, start_offset1, [
+        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
@@ -389,40 +334,168 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):

        producer.stop()

+
+    ############################
+    #   KeyedProducer Tests    #
+    ############################
+
+    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.0")
+    def test_keyedproducer_null_payload(self):
+        partitions = self.client.get_partition_ids_for_topic(self.topic)
+        start_offsets = [self.current_offset(self.topic, p) for p in partitions]
+
+        producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
+        key = "test"
+
+        resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
+        self.assert_produce_response(resp, start_offsets[0])
+        resp = producer.send_messages(self.topic, self.key("key2"), None)
+        self.assert_produce_response(resp, start_offsets[1])
+        resp = producer.send_messages(self.topic, self.key("key3"), None)
+        self.assert_produce_response(resp, start_offsets[0]+1)
+        resp = producer.send_messages(self.topic, self.key("key4"), self.msg("four"))
+        self.assert_produce_response(resp, start_offsets[1]+1)
+
+        self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), None ])
+        self.assert_fetch_offset(partitions[1], start_offsets[1], [ None, self.msg("four") ])
+
+        producer.stop()
+
    @kafka_versions("all")
-    def test_async_simple_producer(self):
-        start_offset0 = self.current_offset(self.topic, 0)
+    def test_round_robin_partitioner(self):
+        partitions = self.client.get_partition_ids_for_topic(self.topic)
+        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

-        producer = SimpleProducer(self.client, async=True, random_start=False)
-        resp = producer.send_messages(self.topic, self.msg("one"))
-        self.assertEqual(len(resp), 0)
+        producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
+        resp1 = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
+        resp2 = producer.send_messages(self.topic, self.key("key2"), self.msg("two"))
+        resp3 = producer.send_messages(self.topic, self.key("key3"), self.msg("three"))
+        resp4 = producer.send_messages(self.topic, self.key("key4"), self.msg("four"))

-        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
+        self.assert_produce_response(resp1, start_offsets[0]+0)
+        self.assert_produce_response(resp2, start_offsets[1]+0)
+        self.assert_produce_response(resp3, start_offsets[0]+1)
+        self.assert_produce_response(resp4, start_offsets[1]+1)
+
+        self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("three") ])
+        self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("two"), self.msg("four")  ])
+
+        producer.stop()
+
+    @kafka_versions("all")
+    def test_hashed_partitioner(self):
+        partitions = self.client.get_partition_ids_for_topic(self.topic)
+        start_offsets = [self.current_offset(self.topic, p) for p in partitions]
+
+        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
+        resp1 = producer.send_messages(self.topic, self.key("1"), self.msg("one"))
+        resp2 = producer.send_messages(self.topic, self.key("2"), self.msg("two"))
+        resp3 = producer.send_messages(self.topic, self.key("3"), self.msg("three"))
+        resp4 = producer.send_messages(self.topic, self.key("3"), self.msg("four"))
+        resp5 = producer.send_messages(self.topic, self.key("4"), self.msg("five"))
+
+        offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]}
+        messages = {partitions[0]: [], partitions[1]: []}
+
+        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
+        resps = [resp1, resp2, resp3, resp4, resp5]
+        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]
+
+        for key, resp, msg in zip(keys, resps, msgs):
+            k = hash(key) % 2
+            partition = partitions[k]
+            offset = offsets[partition]
+            self.assert_produce_response(resp, offset)
+            offsets[partition] += 1
+            messages[partition].append(msg)
+
+        self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]])
+        self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]])

        producer.stop()

    @kafka_versions("all")
    def test_async_keyed_producer(self):
-        start_offset0 = self.current_offset(self.topic, 0)
+        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
+        start_offset = self.current_offset(self.topic, partition)

        producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True)

-        resp = producer.send(self.topic, self.key("key1"), self.msg("one"))
+        resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
        self.assertEqual(len(resp), 0)

-        self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
+        # wait for the server to report a new highwatermark
+        while self.current_offset(self.topic, partition) == start_offset:
+          time.sleep(0.1)
+
+        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])

        producer.stop()

-    def assert_produce_request(self, messages, initial_offset, message_ct):
-        produce = ProduceRequest(self.topic, 0, messages=messages)
+    ############################
+    #   Producer ACK Tests     #
+    ############################
+
+    @kafka_versions("all")
+    def test_acks_none(self):
+        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
+        start_offset = self.current_offset(self.topic, partition)
+
+        producer = Producer(
+            self.client,
+            req_acks=Producer.ACK_NOT_REQUIRED,
+        )
+        resp = producer.send_messages(self.topic, partition, self.msg("one"))
+
+        # No response from produce request with no acks required
+        self.assertEqual(len(resp), 0)
+
+        # But the message should still have been delivered
+        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
+        producer.stop()
+
+    @kafka_versions("all")
+    def test_acks_local_write(self):
+        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
+        start_offset = self.current_offset(self.topic, partition)
+
+        producer = Producer(
+            self.client,
+            req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
+        )
+        resp = producer.send_messages(self.topic, partition, self.msg("one"))
+
+        self.assert_produce_response(resp, start_offset)
+        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
+
+        producer.stop()
+
+    @kafka_versions("all")
+    def test_acks_cluster_commit(self):
+        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
+        start_offset = self.current_offset(self.topic, partition)
+
+        producer = Producer(
+            self.client,
+            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT,
+        )
+
+        resp = producer.send_messages(self.topic, partition, self.msg("one"))
+        self.assert_produce_response(resp, start_offset)
+        self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
+
+        producer.stop()
+
+    def assert_produce_request(self, messages, initial_offset, message_ct,
+                               partition=0):
+        produce = ProduceRequest(self.bytes_topic, partition, messages=messages)

        # There should only be one response message from the server.
        # This will throw an exception if there's more than one.
        resp = self.client.send_produce_request([ produce ])
        self.assert_produce_response(resp, initial_offset)

-        self.assertEqual(self.current_offset(self.topic, 0), initial_offset + message_ct)
+        self.assertEqual(self.current_offset(self.topic, partition), initial_offset + message_ct)

    def assert_produce_response(self, resp, initial_offset):
        self.assertEqual(len(resp), 1)
@@ -433,7 +506,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
        # There should only be one response message from the server.
        # This will throw an exception if there's more than one.

-        resp, = self.client.send_fetch_request([ FetchRequest(self.topic, partition, start_offset, 1024) ])
+        resp, = self.client.send_fetch_request([ FetchRequest(self.bytes_topic, partition, start_offset, 1024) ])

        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.partition, partition)
--- a/test/test_protocol.py
+++ b/test/test_protocol.py
@@ -13,7 +13,7 @@ from kafka.common import (
    ProduceResponse, FetchResponse, OffsetAndMessage,
    BrokerMetadata, TopicMetadata, PartitionMetadata, TopicAndPartition,
    KafkaUnavailableError, UnsupportedCodecError, ConsumerFetchSizeTooSmall,
-    ProtocolError
+    ProtocolError, ConsumerMetadataResponse
 )
 from kafka.protocol import (
    ATTRIBUTE_CODEC_MASK, CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, KafkaProtocol,
@@ -32,7 +32,7 @@ class TestProtocol(unittest.TestCase):
        self.assertEqual(msg.value, payload)

    def test_create_gzip(self):
-        payloads = [b"v1", b"v2"]
+        payloads = [(b"v1", None), (b"v2", None)]
        msg = create_gzip_message(payloads)
        self.assertEqual(msg.magic, 0)
        self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
@@ -59,9 +59,39 @@ class TestProtocol(unittest.TestCase):

        self.assertEqual(decoded, expect)

+    def test_create_gzip_keyed(self):
+        payloads = [(b"v1", b"k1"), (b"v2", b"k2")]
+        msg = create_gzip_message(payloads)
+        self.assertEqual(msg.magic, 0)
+        self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
+        self.assertEqual(msg.key, None)
+        # Need to decode to check since gzipped payload is non-deterministic
+        decoded = gzip_decode(msg.value)
+        expect = b"".join([
+            struct.pack(">q", 0),          # MsgSet Offset
+            struct.pack(">i", 18),         # Msg Size
+            struct.pack(">i", 1474775406), # CRC
+            struct.pack(">bb", 0, 0),      # Magic, flags
+            struct.pack(">i", 2),          # Length of key
+            b"k1",                         # Key
+            struct.pack(">i", 2),          # Length of value
+            b"v1",                         # Value
+
+            struct.pack(">q", 0),          # MsgSet Offset
+            struct.pack(">i", 18),         # Msg Size
+            struct.pack(">i", -16383415),  # CRC
+            struct.pack(">bb", 0, 0),      # Magic, flags
+            struct.pack(">i", 2),          # Length of key
+            b"k2",                         # Key
+            struct.pack(">i", 2),          # Length of value
+            b"v2",                         # Value
+        ])
+
+        self.assertEqual(decoded, expect)
+
    @unittest.skipUnless(has_snappy(), "Snappy not available")
    def test_create_snappy(self):
-        payloads = [b"v1", b"v2"]
+        payloads = [(b"v1", None), (b"v2", None)]
        msg = create_snappy_message(payloads)
        self.assertEqual(msg.magic, 0)
        self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
@@ -87,6 +117,36 @@ class TestProtocol(unittest.TestCase):

        self.assertEqual(decoded, expect)

+    @unittest.skipUnless(has_snappy(), "Snappy not available")
+    def test_create_snappy_keyed(self):
+        payloads = [(b"v1", b"k1"), (b"v2", b"k2")]
+        msg = create_snappy_message(payloads)
+        self.assertEqual(msg.magic, 0)
+        self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
+        self.assertEqual(msg.key, None)
+        decoded = snappy_decode(msg.value)
+        expect = b"".join([
+            struct.pack(">q", 0),          # MsgSet Offset
+            struct.pack(">i", 18),         # Msg Size
+            struct.pack(">i", 1474775406), # CRC
+            struct.pack(">bb", 0, 0),      # Magic, flags
+            struct.pack(">i", 2),          # Length of key
+            b"k1",                         # Key
+            struct.pack(">i", 2),          # Length of value
+            b"v1",                         # Value
+
+            struct.pack(">q", 0),          # MsgSet Offset
+            struct.pack(">i", 18),         # Msg Size
+            struct.pack(">i", -16383415),  # CRC
+            struct.pack(">bb", 0, 0),      # Magic, flags
+            struct.pack(">i", 2),          # Length of key
+            b"k2",                         # Key
+            struct.pack(">i", 2),          # Length of value
+            b"v2",                         # Value
+        ])
+
+        self.assertEqual(decoded, expect)
+
    def test_encode_message_header(self):
        expect = b"".join([
            struct.pack(">h", 10),             # API Key
@@ -500,6 +560,34 @@ class TestProtocol(unittest.TestCase):
        decoded = KafkaProtocol.decode_metadata_response(encoded)
        self.assertEqual(decoded, (node_brokers, topic_partitions))

+    def test_encode_consumer_metadata_request(self):
+        expected = b"".join([
+            struct.pack(">i", 17),         # Total length of the request
+            struct.pack('>h', 10),         # API key consumer metadata
+            struct.pack('>h', 0),          # API version
+            struct.pack('>i', 4),          # Correlation ID
+            struct.pack('>h3s', 3, b"cid"),# The client ID
+            struct.pack('>h2s', 2, b"g1"), # Group "g1"
+        ])
+
+        encoded = KafkaProtocol.encode_consumer_metadata_request(b"cid", 4, b"g1")
+
+        self.assertEqual(encoded, expected)
+
+    def test_decode_consumer_metadata_response(self):
+        encoded = b"".join([
+            struct.pack(">i", 42),                                 # Correlation ID
+            struct.pack(">h", 0),                                  # No Error
+            struct.pack(">i", 1),                                  # Broker ID
+            struct.pack(">h23s", 23, b"brokers1.kafka.rdio.com"),  # Broker Host
+            struct.pack(">i", 1000),                               # Broker Port
+        ])
+
+        results = KafkaProtocol.decode_consumer_metadata_response(encoded)
+        self.assertEqual(results,
+            ConsumerMetadataResponse(error = 0, nodeId = 1, host = b'brokers1.kafka.rdio.com', port = 1000)
+        )
+
    def test_encode_offset_request(self):
        expected = b"".join([
            struct.pack(">i", 21),         # Total length of the request
@@ -701,7 +789,7 @@ class TestProtocol(unittest.TestCase):
                    yield

    def test_create_message_set(self):
-        messages = [1, 2, 3]
+        messages = [(1, "k1"), (2, "k2"), (3, "k3")]

        # Default codec is CODEC_NONE. Expect list of regular messages.
        expect = [sentinel.message] * len(messages)
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -107,7 +107,6 @@ class UtilTest(unittest.TestCase):
        t = kafka.common.TopicAndPartition

        l = [
-            t("a", 1),
            t("a", 1),
            t("a", 2),
            t("a", 3),
@@ -124,3 +123,8 @@ class UtilTest(unittest.TestCase):
                3: t("b", 3),
            }
        })
+
+        # should not be able to group duplicate topic-partitions
+        t1 = t("a", 1)
+        with self.assertRaises(AssertionError):
+            kafka.util.group_by_topic_and_partition([t1, t1])
--- a/test/testutil.py
+++ b/test/testutil.py
@@ -12,6 +12,7 @@ from . import unittest

 from kafka import KafkaClient
 from kafka.common import OffsetRequest
+from kafka.util import kafka_bytestring

 __all__ = [
    'random_string',
@@ -22,8 +23,7 @@ __all__ = [
 ]

 def random_string(l):
-    s = "".join(random.choice(string.ascii_letters) for i in xrange(l))
-    return s.encode('utf-8')
+    return "".join(random.choice(string.ascii_letters) for i in xrange(l))

 def kafka_versions(*versions):
    def kafka_versions(func):
@@ -50,6 +50,8 @@ def get_open_port():
 class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
+    bytes_topic = None
+    zk = None
    server = None

    def setUp(self):
@@ -58,8 +60,9 @@ class KafkaIntegrationTestCase(unittest.TestCase):
            return

        if not self.topic:
-            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10).decode('utf-8'))
-            self.topic = topic.encode('utf-8')
+            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
+            self.topic = topic
+            self.bytes_topic = topic.encode('utf-8')

        if self.create_client:
            self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port))
@@ -77,8 +80,15 @@ class KafkaIntegrationTestCase(unittest.TestCase):
            self.client.close()

    def current_offset(self, topic, partition):
-        offsets, = self.client.send_offset_request([ OffsetRequest(topic, partition, -1, 1) ])
-        return offsets.offsets[0]
+        try:
+            offsets, = self.client.send_offset_request([ OffsetRequest(kafka_bytestring(topic), partition, -1, 1) ])
+        except:
+            # XXX: We've seen some UnknownErrors here and cant debug w/o server logs
+            self.zk.child.dump_logs()
+            self.server.child.dump_logs()
+            raise
+        else:
+            return offsets.offsets[0]

    def msgs(self, iterable):
        return [ self.msg(x) for x in iterable ]
@@ -103,3 +113,8 @@ class Timer(object):
        self.interval = self.end - self.start

 logging.basicConfig(level=logging.DEBUG)
+logging.getLogger('test.fixtures').setLevel(logging.ERROR)
+logging.getLogger('test.service').setLevel(logging.ERROR)
+
+# kafka.conn debug logging is verbose, disable in tests by default
+logging.getLogger('kafka.conn').setLevel(logging.INFO)
--- a/tox.ini
+++ b/tox.ini
@@ -1,6 +1,21 @@
 [tox]
-envlist = lint, py26, py27, pypy, py33, py34
+envlist = lint, py26, py27, pypy, py33, py34, py35, docs
+
 [testenv]
+deps =
+    nose
+    nose-timer
+    coverage
+    mock
+    python-snappy
+commands =
+    nosetests {posargs:-v -x --with-id --id-file={envdir}/.noseids --with-timer --timer-top-n 10 --with-coverage --cover-erase --cover-package kafka}
+setenv =
+    NOSE_LOGFORMAT = %(asctime)s - %(thread)d - %(name)s - %(levelname)s - %(message)s
+    PROJECT_ROOT = {toxinidir}
+passenv = KAFKA_VERSION
+
+[testenv:py26]
 deps =
    six
    unittest2
@@ -9,21 +24,11 @@ deps =
    coverage
    mock
    python-snappy
-commands =
-    nosetests {posargs:-v --with-id --id-file={envdir}/.noseids --with-timer --timer-top-n 10 --with-coverage --cover-erase --cover-package kafka}
-setenv =
-    PROJECT_ROOT = {toxinidir}

-[testenv:py33]
-deps =
-    nose
-    nose-timer
-    coverage
-    mock
-    python-snappy
-
-[testenv:py34]
+[testenv:py27]
 deps =
+    six
+    unittest2
    nose
    nose-timer
    coverage
@@ -36,4 +41,14 @@ deps =
    unittest2
    mock
    pylint
-commands = pylint {posargs: -E --ignore=queue.py kafka test}
+commands = pylint --rcfile=pylint.rc {posargs: -E kafka test}
+
+[testenv:docs]
+deps =
+    sphinxcontrib-napoleon
+    sphinx_rtd_theme
+    sphinx
+
+commands =
+    sphinx-apidoc -o docs/apidoc/ kafka/
+    sphinx-build -b html docs/ docs/_build
--- a/travis_selector.sh
+++ b/travis_selector.sh
@@ -3,6 +3,8 @@

 if [ $1 == "pypy" ]; then
    echo "pypy"
+elif [ $1 == "3.5" ]; then
+    echo "py35"
 elif [ $1 == "3.4" ]; then
    echo "py34"
 elif [ $1 == "3.3" ]; then