Merge tag '0.9.5' into debian/unstable
This commit is contained in:
20
.travis.yml
20
.travis.yml
@@ -5,6 +5,7 @@ python:
|
|||||||
- 2.7
|
- 2.7
|
||||||
- 3.3
|
- 3.3
|
||||||
- 3.4
|
- 3.4
|
||||||
|
- 3.5
|
||||||
- pypy
|
- pypy
|
||||||
|
|
||||||
env:
|
env:
|
||||||
@@ -12,18 +13,27 @@ env:
|
|||||||
- KAFKA_VERSION=0.8.0
|
- KAFKA_VERSION=0.8.0
|
||||||
- KAFKA_VERSION=0.8.1
|
- KAFKA_VERSION=0.8.1
|
||||||
- KAFKA_VERSION=0.8.1.1
|
- KAFKA_VERSION=0.8.1.1
|
||||||
- KAFKA_VERSION=0.8.2.0
|
- KAFKA_VERSION=0.8.2.2
|
||||||
|
- KAFKA_VERSION=0.9.0.0
|
||||||
|
|
||||||
|
sudo: false
|
||||||
|
|
||||||
|
addons:
|
||||||
|
apt:
|
||||||
|
packages:
|
||||||
|
- libsnappy-dev
|
||||||
|
|
||||||
|
cache:
|
||||||
|
directories:
|
||||||
|
- $HOME/.cache/pip
|
||||||
|
- servers/
|
||||||
|
|
||||||
before_install:
|
before_install:
|
||||||
- sudo apt-get install libsnappy-dev
|
|
||||||
- ./build_integration.sh
|
- ./build_integration.sh
|
||||||
|
|
||||||
install:
|
install:
|
||||||
- pip install tox coveralls
|
- pip install tox coveralls
|
||||||
- pip install .
|
- pip install .
|
||||||
# Deal with issue on Travis builders re: multiprocessing.Queue :(
|
|
||||||
# See https://github.com/travis-ci/travis-cookbooks/issues/155
|
|
||||||
- sudo rm -rf /dev/shm && sudo ln -s /run/shm /dev/shm
|
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
provider: pypi
|
provider: pypi
|
||||||
|
|||||||
57
AUTHORS.md
57
AUTHORS.md
@@ -1,16 +1,49 @@
|
|||||||
# Contributors
|
# Current Maintainer
|
||||||
|
|
||||||
Top 10 contributors, listed by contribution. See https://github.com/mumrah/kafka-python/graphs/contributors for the full list
|
|
||||||
|
|
||||||
* David Arthur, [@mumrah](https://github.com/mumrah)
|
|
||||||
* Dana Powers, [@dpkp](https://github.com/dpkp)
|
* Dana Powers, [@dpkp](https://github.com/dpkp)
|
||||||
* Mahendra M, [@mahendra](https://github.com/mahendra)
|
|
||||||
* Mark Roberts, [@wizzat](https://github.com/wizzat)
|
# Original Author and First Commit
|
||||||
* Omar, [@rdiomar](https://github.com/rdiomar) - RIP, Omar. 2014
|
* David Arthur, [@mumrah](https://github.com/mumrah)
|
||||||
* Bruno Renié, [@brutasse](https://github.com/brutasse)
|
|
||||||
* Marc Labbé, [@mrtheb](https://github.com/mrtheb)
|
# Contributors - 2015 (alpha by username)
|
||||||
* Ivan Pouzyrevsky, [@sandello](https://github.com/sandello)
|
* Alex Couture-Beil, [@alexcb](https://github.com/alexcb)
|
||||||
|
* Ali-Akber Saifee, [@alisaifee](https://github.com/alisaifee)
|
||||||
|
* Christophe-Marie Duquesne, [@chmduquesne](https://github.com/chmduquesne)
|
||||||
* Thomas Dimson, [@cosbynator](https://github.com/cosbynator)
|
* Thomas Dimson, [@cosbynator](https://github.com/cosbynator)
|
||||||
* Zack Dever, [@zever](https://github.com/zever)
|
* Kasper Jacobsen, [@Dinoshauer](https://github.com/Dinoshauer)
|
||||||
|
* Ross Duggan, [@duggan](https://github.com/duggan)
|
||||||
|
* Enrico Canzonieri, [@ecanzonieri](https://github.com/ecanzonieri)
|
||||||
|
* haosdent, [@haosdent](https://github.com/haosdent)
|
||||||
|
* Arturo Filastò, [@hellais](https://github.com/hellais)
|
||||||
|
* Job Evers‐Meltzer, [@jobevers](https://github.com/jobevers)
|
||||||
|
* Martin Olveyra, [@kalessin](https://github.com/kalessin)
|
||||||
|
* Kubilay Kocak, [@koobs](https://github.com/koobs)
|
||||||
|
* Matthew L Daniel <mdaniel@gmail.com>
|
||||||
|
* Eric Hewitt, [@meandthewallaby](https://github.com/meandthewallaby)
|
||||||
|
* Oliver Jowett [@mutability](https://github.com/mutability)
|
||||||
|
* Shaolei Zhou, [@reAsOn2010](https://github.com/reAsOn2010)
|
||||||
|
* Oskari Saarenmaa, [@saaros](https://github.com/saaros)
|
||||||
|
* John Anderson, [@sontek](https://github.com/sontek)
|
||||||
|
* Eduard Iskandarov, [@toidi](https://github.com/toidi)
|
||||||
|
* Todd Palino, [@toddpalino](https://github.com/toddpalino)
|
||||||
|
* trbs, [@trbs](https://github.com/trbs)
|
||||||
|
* Viktor Shlapakov, [@vshlapakov](https://github.com/vshlapakov)
|
||||||
|
* Will Daly, [@wedaly](https://github.com/wedaly)
|
||||||
|
* Warren Kiser, [@wkiser](https://github.com/wkiser)
|
||||||
|
* William Ting, [@wting](https://github.com/wting)
|
||||||
|
* Zack Dever, [@zackdever](https://github.com/zackdever)
|
||||||
|
|
||||||
|
# More Contributors
|
||||||
|
* Bruno Renié, [@brutasse](https://github.com/brutasse)
|
||||||
|
* Thomas Dimson, [@cosbynator](https://github.com/cosbynator)
|
||||||
|
* Jesse Myers, [@jessemyers](https://github.com/jessemyers)
|
||||||
|
* Mahendra M, [@mahendra](https://github.com/mahendra)
|
||||||
|
* Miguel Eduardo Gil Biraud, [@mgilbir](https://github.com/mgilbir)
|
||||||
|
* Marc Labbé, [@mrtheb](https://github.com/mrtheb)
|
||||||
|
* Patrick Lucas, [@patricklucas](https://github.com/patricklucas)
|
||||||
|
* Omar Ghishan, [@rdiomar](https://github.com/rdiomar) - RIP, Omar. 2014
|
||||||
|
* Ivan Pouzyrevsky, [@sandello](https://github.com/sandello)
|
||||||
|
* Lou Marvin Caraig, [@se7entyse7en](https://github.com/se7entyse7en)
|
||||||
|
* waliaashish85, [@waliaashish85](https://github.com/waliaashish85)
|
||||||
|
* Mark Roberts, [@wizzat](https://github.com/wizzat)
|
||||||
|
|
||||||
Thanks to all who have contributed!
|
Thanks to all who have contributed!
|
||||||
|
|||||||
107
CHANGES.md
107
CHANGES.md
@@ -1,3 +1,110 @@
|
|||||||
|
# 0.9.5 (Dec 6, 2015)
|
||||||
|
|
||||||
|
Consumers
|
||||||
|
* Initial support for consumer coordinator [offsets only] (toddpalino PR 420)
|
||||||
|
* Allow blocking until some messages are received in SimpleConsumer (saaros PR 457)
|
||||||
|
* Support subclass config changes in KafkaConsumer (zackdever PR 446)
|
||||||
|
* Support retry semantics in MultiProcessConsumer (barricadeio PR 456)
|
||||||
|
* Support partition_info in MultiProcessConsumer (scrapinghub PR 418)
|
||||||
|
* Enable seek() to an absolute offset in SimpleConsumer (haosdent PR 412)
|
||||||
|
* Add KafkaConsumer.close() (ucarion PR 426)
|
||||||
|
|
||||||
|
Producers
|
||||||
|
* Cath client.reinit() exceptions in async producer (dpkp)
|
||||||
|
* Producer.stop() now blocks until async thread completes (dpkp PR 485)
|
||||||
|
* Catch errors during load_metadata_for_topics in async producer (bschopman PR 467)
|
||||||
|
* Add compression-level support for codecs that support it (trbs PR 454)
|
||||||
|
* Fix translation of Java murmur2 code, fix byte encoding for Python 3 (chrischamberlin PR 439)
|
||||||
|
* Only call stop() on not-stopped producer objects (docker-hub PR 435)
|
||||||
|
* Allow null payload for deletion feature (scrapinghub PR 409)
|
||||||
|
|
||||||
|
Clients
|
||||||
|
* Use non-blocking io for broker aware requests (ecanzonieri PR 473)
|
||||||
|
* Use debug logging level for metadata request (ecanzonieri PR 415)
|
||||||
|
* Catch KafkaUnavailableError in _send_broker_aware_request (mutability PR 436)
|
||||||
|
* Lower logging level on replica not available and commit (ecanzonieri PR 415)
|
||||||
|
|
||||||
|
Documentation
|
||||||
|
* Update docs and links wrt maintainer change (mumrah -> dpkp)
|
||||||
|
|
||||||
|
Internals
|
||||||
|
* Add py35 to tox testing
|
||||||
|
* Update travis config to use container infrastructure
|
||||||
|
* Add 0.8.2.2 and 0.9.0.0 resources for integration tests; update default official releases
|
||||||
|
* new pylint disables for pylint 1.5.1 (zackdever PR 481)
|
||||||
|
* Fix python3 / python2 comments re queue/Queue (dpkp)
|
||||||
|
* Add Murmur2Partitioner to kafka __all__ imports (dpkp Issue 471)
|
||||||
|
* Include LICENSE in PyPI sdist (koobs PR 441)
|
||||||
|
|
||||||
|
# 0.9.4 (June 11, 2015)
|
||||||
|
|
||||||
|
Consumers
|
||||||
|
* Refactor SimpleConsumer internal fetch handling (dpkp PR 399)
|
||||||
|
* Handle exceptions in SimpleConsumer commit() and reset_partition_offset() (dpkp PR 404)
|
||||||
|
* Improve FailedPayloadsError handling in KafkaConsumer (dpkp PR 398)
|
||||||
|
* KafkaConsumer: avoid raising KeyError in task_done (dpkp PR 389)
|
||||||
|
* MultiProcessConsumer -- support configured partitions list (dpkp PR 380)
|
||||||
|
* Fix SimpleConsumer leadership change handling (dpkp PR 393)
|
||||||
|
* Fix SimpleConsumer connection error handling (reAsOn2010 PR 392)
|
||||||
|
* Improve Consumer handling of 'falsy' partition values (wting PR 342)
|
||||||
|
* Fix _offsets call error in KafkaConsumer (hellais PR 376)
|
||||||
|
* Fix str/bytes bug in KafkaConsumer (dpkp PR 365)
|
||||||
|
* Register atexit handlers for consumer and producer thread/multiprocess cleanup (dpkp PR 360)
|
||||||
|
* Always fetch commit offsets in base consumer unless group is None (dpkp PR 356)
|
||||||
|
* Stop consumer threads on delete (dpkp PR 357)
|
||||||
|
* Deprecate metadata_broker_list in favor of bootstrap_servers in KafkaConsumer (dpkp PR 340)
|
||||||
|
* Support pass-through parameters in multiprocess consumer (scrapinghub PR 336)
|
||||||
|
* Enable offset commit on SimpleConsumer.seek (ecanzonieri PR 350)
|
||||||
|
* Improve multiprocess consumer partition distribution (scrapinghub PR 335)
|
||||||
|
* Ignore messages with offset less than requested (wkiser PR 328)
|
||||||
|
* Handle OffsetOutOfRange in SimpleConsumer (ecanzonieri PR 296)
|
||||||
|
|
||||||
|
Producers
|
||||||
|
* Add Murmur2Partitioner (dpkp PR 378)
|
||||||
|
* Log error types in SimpleProducer and SimpleConsumer (dpkp PR 405)
|
||||||
|
* SimpleProducer support configuration of fail_on_error (dpkp PR 396)
|
||||||
|
* Deprecate KeyedProducer.send() (dpkp PR 379)
|
||||||
|
* Further improvements to async producer code (dpkp PR 388)
|
||||||
|
* Add more configuration parameters for async producer (dpkp)
|
||||||
|
* Deprecate SimpleProducer batch_send=True in favor of async (dpkp)
|
||||||
|
* Improve async producer error handling and retry logic (vshlapakov PR 331)
|
||||||
|
* Support message keys in async producer (vshlapakov PR 329)
|
||||||
|
* Use threading instead of multiprocessing for Async Producer (vshlapakov PR 330)
|
||||||
|
* Stop threads on __del__ (chmduquesne PR 324)
|
||||||
|
* Fix leadership failover handling in KeyedProducer (dpkp PR 314)
|
||||||
|
|
||||||
|
KafkaClient
|
||||||
|
* Add .topics property for list of known topics (dpkp)
|
||||||
|
* Fix request / response order guarantee bug in KafkaClient (dpkp PR 403)
|
||||||
|
* Improve KafkaClient handling of connection failures in _get_conn (dpkp)
|
||||||
|
* Client clears local metadata cache before updating from server (dpkp PR 367)
|
||||||
|
* KafkaClient should return a response or error for each request - enable better retry handling (dpkp PR 366)
|
||||||
|
* Improve str/bytes conversion in KafkaClient and KafkaConsumer (dpkp PR 332)
|
||||||
|
* Always return sorted partition ids in client.get_partition_ids_for_topic() (dpkp PR 315)
|
||||||
|
|
||||||
|
Documentation
|
||||||
|
* Cleanup Usage Documentation
|
||||||
|
* Improve KafkaConsumer documentation (dpkp PR 341)
|
||||||
|
* Update consumer documentation (sontek PR 317)
|
||||||
|
* Add doc configuration for tox (sontek PR 316)
|
||||||
|
* Switch to .rst doc format (sontek PR 321)
|
||||||
|
* Fixup google groups link in README (sontek PR 320)
|
||||||
|
* Automate documentation at kafka-python.readthedocs.org
|
||||||
|
|
||||||
|
Internals
|
||||||
|
* Switch integration testing from 0.8.2.0 to 0.8.2.1 (dpkp PR 402)
|
||||||
|
* Fix most flaky tests, improve debug logging, improve fixture handling (dpkp)
|
||||||
|
* General style cleanups (dpkp PR 394)
|
||||||
|
* Raise error on duplicate topic-partition payloads in protocol grouping (dpkp)
|
||||||
|
* Use module-level loggers instead of simply 'kafka' (dpkp)
|
||||||
|
* Remove pkg_resources check for __version__ at runtime (dpkp PR 387)
|
||||||
|
* Make external API consistently support python3 strings for topic (kecaps PR 361)
|
||||||
|
* Fix correlation id overflow (dpkp PR 355)
|
||||||
|
* Cleanup kafka/common structs (dpkp PR 338)
|
||||||
|
* Use context managers in gzip_encode / gzip_decode (dpkp PR 337)
|
||||||
|
* Save failed request as FailedPayloadsError attribute (jobevers PR 302)
|
||||||
|
* Remove unused kafka.queue (mumrah)
|
||||||
|
|
||||||
# 0.9.3 (Feb 3, 2015)
|
# 0.9.3 (Feb 3, 2015)
|
||||||
|
|
||||||
* Add coveralls.io support (sontek PR 307)
|
* Add coveralls.io support (sontek PR 307)
|
||||||
|
|||||||
@@ -1,2 +1,5 @@
|
|||||||
include VERSION
|
|
||||||
recursive-include kafka *.py
|
recursive-include kafka *.py
|
||||||
|
include README.rst
|
||||||
|
include LICENSE
|
||||||
|
include AUTHORS.md
|
||||||
|
include CHANGES.md
|
||||||
|
|||||||
39
README.md
39
README.md
@@ -1,39 +0,0 @@
|
|||||||
# Kafka Python client
|
|
||||||
|
|
||||||
[](https://travis-ci.org/mumrah/kafka-python)
|
|
||||||
[](https://coveralls.io/r/mumrah/kafka-python?branch=master)
|
|
||||||
|
|
||||||
[Full documentation available on ReadTheDocs](http://kafka-python.readthedocs.org/en/latest/)
|
|
||||||
|
|
||||||
This module provides low-level protocol support for Apache Kafka as well as
|
|
||||||
high-level consumer and producer classes. Request batching is supported by the
|
|
||||||
protocol as well as broker-aware request routing. Gzip and Snappy compression
|
|
||||||
is also supported for message sets.
|
|
||||||
|
|
||||||
http://kafka.apache.org/
|
|
||||||
|
|
||||||
On Freenode IRC at #kafka-python, as well as #apache-kafka
|
|
||||||
|
|
||||||
For general discussion of kafka-client design and implementation (not python specific),
|
|
||||||
see https://groups.google.com/forum/m/#!forum/kafka-clients
|
|
||||||
|
|
||||||
# License
|
|
||||||
|
|
||||||
Copyright 2014, David Arthur under Apache License, v2.0. See `LICENSE`
|
|
||||||
|
|
||||||
# Status
|
|
||||||
|
|
||||||
The current stable version of this package is [**0.9.3**](https://github.com/mumrah/kafka-python/releases/tag/v0.9.3) and is compatible with
|
|
||||||
|
|
||||||
Kafka broker versions
|
|
||||||
- 0.8.2.0 [offset management currently ZK only -- does not support ConsumerCoordinator offset management APIs]
|
|
||||||
- 0.8.1.1
|
|
||||||
- 0.8.1
|
|
||||||
- 0.8.0
|
|
||||||
|
|
||||||
Python versions
|
|
||||||
- 2.6 (tested on 2.6.9)
|
|
||||||
- 2.7 (tested on 2.7.9)
|
|
||||||
- 3.3 (tested on 3.3.5)
|
|
||||||
- 3.4 (tested on 3.4.2)
|
|
||||||
- pypy (tested on pypy 2.4.0 / python 2.7.8)
|
|
||||||
59
README.rst
Normal file
59
README.rst
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
Kafka Python client
|
||||||
|
------------------------
|
||||||
|
.. image:: https://api.travis-ci.org/dpkp/kafka-python.png?branch=master
|
||||||
|
:target: https://travis-ci.org/dpkp/kafka-python
|
||||||
|
:alt: Build Status
|
||||||
|
|
||||||
|
.. image:: https://coveralls.io/repos/dpkp/kafka-python/badge.svg?branch=master
|
||||||
|
:target: https://coveralls.io/r/dpkp/kafka-python?branch=master
|
||||||
|
:alt: Coverage Status
|
||||||
|
|
||||||
|
.. image:: https://readthedocs.org/projects/kafka-python/badge/?version=latest
|
||||||
|
:target: http://kafka-python.readthedocs.org/en/latest/
|
||||||
|
:alt: Full documentation available on ReadTheDocs
|
||||||
|
|
||||||
|
This module provides low-level protocol support for Apache Kafka as well as
|
||||||
|
high-level consumer and producer classes. Request batching is supported by the
|
||||||
|
protocol as well as broker-aware request routing. Gzip and Snappy compression
|
||||||
|
is also supported for message sets.
|
||||||
|
|
||||||
|
Coordinated Consumer Group support is under development - see Issue #38.
|
||||||
|
|
||||||
|
Full documentation available on `Read the Docs <https://kafka-python.readthedocs.org/en/latest/>`_
|
||||||
|
|
||||||
|
On Freenode IRC at #kafka-python, as well as #apache-kafka
|
||||||
|
|
||||||
|
For general discussion of kafka-client design and implementation (not python specific),
|
||||||
|
see https://groups.google.com/forum/#!forum/kafka-clients
|
||||||
|
|
||||||
|
For information about Apache Kafka generally, see https://kafka.apache.org/
|
||||||
|
|
||||||
|
License
|
||||||
|
----------
|
||||||
|
Apache License, v2.0. See `LICENSE <https://github.com/dpkp/kafka-python/blob/master/LICENSE>`_
|
||||||
|
Copyright 2015, David Arthur, Dana Powers, and Contributors
|
||||||
|
(See `AUTHORS <https://github.com/dpkp/kafka-python/blob/master/AUTHORS.md>`_
|
||||||
|
|
||||||
|
Status
|
||||||
|
----------
|
||||||
|
The current stable version of this package is
|
||||||
|
`0.9.5 <https://github.com/dpkp/kafka-python/releases/tag/v0.9.5>`_
|
||||||
|
and is compatible with:
|
||||||
|
|
||||||
|
Kafka broker versions
|
||||||
|
|
||||||
|
- 0.9.0.0
|
||||||
|
- 0.8.2.2
|
||||||
|
- 0.8.2.1
|
||||||
|
- 0.8.1.1
|
||||||
|
- 0.8.1
|
||||||
|
- 0.8.0
|
||||||
|
|
||||||
|
Python versions
|
||||||
|
|
||||||
|
- 3.5 (tested on 3.5.0)
|
||||||
|
- 3.4 (tested on 3.4.2)
|
||||||
|
- 3.3 (tested on 3.3.5)
|
||||||
|
- 2.7 (tested on 2.7.9)
|
||||||
|
- 2.6 (tested on 2.6.9)
|
||||||
|
- pypy (tested on pypy 2.5.0 / python 2.7.8)
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# Versions available for testing via binary distributions
|
# Versions available for testing via binary distributions
|
||||||
OFFICIAL_RELEASES="0.8.0 0.8.1 0.8.1.1 0.8.2.0"
|
OFFICIAL_RELEASES="0.8.1.1 0.8.2.2 0.9.0.0"
|
||||||
|
|
||||||
# Useful configuration vars, with sensible defaults
|
# Useful configuration vars, with sensible defaults
|
||||||
if [ -z "$SCALA_VERSION" ]; then
|
if [ -z "$SCALA_VERSION" ]; then
|
||||||
|
|||||||
@@ -1,67 +0,0 @@
|
|||||||
API Reference
|
|
||||||
=============
|
|
||||||
|
|
||||||
kafka
|
|
||||||
-----
|
|
||||||
.. automodule:: kafka.client
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.codec
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.common
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.conn
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.context
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.protocol
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.queue
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.util
|
|
||||||
:members:
|
|
||||||
|
|
||||||
|
|
||||||
kafka.consumer
|
|
||||||
--------------
|
|
||||||
.. automodule:: kafka.consumer.base
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.consumer.kafka
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.consumer.multiprocess
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.consumer.simple
|
|
||||||
:members:
|
|
||||||
|
|
||||||
|
|
||||||
kafka.partitioner
|
|
||||||
-----------------
|
|
||||||
.. automodule:: kafka.partitioner.base
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.partitioner.hashed
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.partitioner.roundrobin
|
|
||||||
:members:
|
|
||||||
|
|
||||||
|
|
||||||
kafka.producer
|
|
||||||
--------------
|
|
||||||
.. automodule:: kafka.producer.base
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.producer.keyed
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. automodule:: kafka.producer.simple
|
|
||||||
:members:
|
|
||||||
46
docs/apidoc/kafka.consumer.rst
Normal file
46
docs/apidoc/kafka.consumer.rst
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
kafka.consumer package
|
||||||
|
======================
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
kafka.consumer.base module
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.consumer.base
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.consumer.kafka module
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.consumer.kafka
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.consumer.multiprocess module
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.consumer.multiprocess
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.consumer.simple module
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.consumer.simple
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
Module contents
|
||||||
|
---------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.consumer
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
38
docs/apidoc/kafka.partitioner.rst
Normal file
38
docs/apidoc/kafka.partitioner.rst
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
kafka.partitioner package
|
||||||
|
=========================
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
kafka.partitioner.base module
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.partitioner.base
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.partitioner.hashed module
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.partitioner.hashed
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.partitioner.roundrobin module
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.partitioner.roundrobin
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
Module contents
|
||||||
|
---------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.partitioner
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
38
docs/apidoc/kafka.producer.rst
Normal file
38
docs/apidoc/kafka.producer.rst
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
kafka.producer package
|
||||||
|
======================
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
kafka.producer.base module
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.producer.base
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.producer.keyed module
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.producer.keyed
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.producer.simple module
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.producer.simple
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
Module contents
|
||||||
|
---------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.producer
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
79
docs/apidoc/kafka.rst
Normal file
79
docs/apidoc/kafka.rst
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
kafka package
|
||||||
|
=============
|
||||||
|
|
||||||
|
Subpackages
|
||||||
|
-----------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
|
||||||
|
kafka.consumer
|
||||||
|
kafka.partitioner
|
||||||
|
kafka.producer
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
kafka.client module
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.client
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.codec module
|
||||||
|
------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.codec
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.common module
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.common
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.conn module
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.conn
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.context module
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.context
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.protocol module
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.protocol
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
kafka.util module
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
.. automodule:: kafka.util
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
Module contents
|
||||||
|
---------------
|
||||||
|
|
||||||
|
.. automodule:: kafka
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/apidoc/modules.rst
Normal file
7
docs/apidoc/modules.rst
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
kafka
|
||||||
|
=====
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 4
|
||||||
|
|
||||||
|
kafka
|
||||||
21
docs/conf.py
21
docs/conf.py
@@ -30,6 +30,7 @@ import os
|
|||||||
# ones.
|
# ones.
|
||||||
extensions = [
|
extensions = [
|
||||||
'sphinx.ext.autodoc',
|
'sphinx.ext.autodoc',
|
||||||
|
'sphinx.ext.intersphinx',
|
||||||
'sphinx.ext.viewcode',
|
'sphinx.ext.viewcode',
|
||||||
'sphinxcontrib.napoleon',
|
'sphinxcontrib.napoleon',
|
||||||
]
|
]
|
||||||
@@ -48,18 +49,17 @@ master_doc = 'index'
|
|||||||
|
|
||||||
# General information about the project.
|
# General information about the project.
|
||||||
project = u'kafka-python'
|
project = u'kafka-python'
|
||||||
copyright = u'2015, David Arthur'
|
copyright = u'2015 - David Arthur, Dana Powers, and Contributors'
|
||||||
|
|
||||||
# The version info for the project you're documenting, acts as replacement for
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
# |version| and |release|, also used in various other places throughout the
|
# |version| and |release|, also used in various other places throughout the
|
||||||
# built documents.
|
# built documents.
|
||||||
#
|
#
|
||||||
# The short X.Y version.
|
# The short X.Y version.
|
||||||
with open('../VERSION') as version_file:
|
exec(open('../kafka/version.py').read())
|
||||||
version = version_file.read()
|
|
||||||
|
|
||||||
# The full version, including alpha/beta/rc tags.
|
# The full version, including alpha/beta/rc tags.
|
||||||
release = version
|
release = __version__
|
||||||
|
|
||||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||||
# for a list of supported languages.
|
# for a list of supported languages.
|
||||||
@@ -203,7 +203,7 @@ latex_elements = {
|
|||||||
# author, documentclass [howto, manual, or own class]).
|
# author, documentclass [howto, manual, or own class]).
|
||||||
latex_documents = [
|
latex_documents = [
|
||||||
('index', 'kafka-python.tex', u'kafka-python Documentation',
|
('index', 'kafka-python.tex', u'kafka-python Documentation',
|
||||||
u'David Arthur', 'manual'),
|
u'Dana Powers', 'manual'),
|
||||||
]
|
]
|
||||||
|
|
||||||
# The name of an image file (relative to this directory) to place at the top of
|
# The name of an image file (relative to this directory) to place at the top of
|
||||||
@@ -233,7 +233,7 @@ latex_documents = [
|
|||||||
# (source start file, name, description, authors, manual section).
|
# (source start file, name, description, authors, manual section).
|
||||||
man_pages = [
|
man_pages = [
|
||||||
('index', 'kafka-python', u'kafka-python Documentation',
|
('index', 'kafka-python', u'kafka-python Documentation',
|
||||||
[u'David Arthur'], 1)
|
[u'Dana Powers'], 1)
|
||||||
]
|
]
|
||||||
|
|
||||||
# If true, show URL addresses after external links.
|
# If true, show URL addresses after external links.
|
||||||
@@ -247,7 +247,7 @@ man_pages = [
|
|||||||
# dir menu entry, description, category)
|
# dir menu entry, description, category)
|
||||||
texinfo_documents = [
|
texinfo_documents = [
|
||||||
('index', 'kafka-python', u'kafka-python Documentation',
|
('index', 'kafka-python', u'kafka-python Documentation',
|
||||||
u'David Arthur', 'kafka-python', 'One line description of project.',
|
u'Dana Powers', 'kafka-python', 'One line description of project.',
|
||||||
'Miscellaneous'),
|
'Miscellaneous'),
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -262,3 +262,10 @@ texinfo_documents = [
|
|||||||
|
|
||||||
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
||||||
#texinfo_no_detailmenu = False
|
#texinfo_no_detailmenu = False
|
||||||
|
|
||||||
|
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
|
||||||
|
|
||||||
|
if not on_rtd: # only import and set the theme if we're building docs locally
|
||||||
|
import sphinx_rtd_theme
|
||||||
|
html_theme = 'sphinx_rtd_theme'
|
||||||
|
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
kafka-python
|
kafka-python
|
||||||
============
|
============
|
||||||
|
|
||||||
@@ -7,35 +6,45 @@ high-level consumer and producer classes. Request batching is supported by the
|
|||||||
protocol as well as broker-aware request routing. Gzip and Snappy compression
|
protocol as well as broker-aware request routing. Gzip and Snappy compression
|
||||||
is also supported for message sets.
|
is also supported for message sets.
|
||||||
|
|
||||||
http://kafka.apache.org/
|
Coordinated Consumer Group support is under development - see Issue #38.
|
||||||
|
|
||||||
On Freenode IRC at #kafka-python, as well as #apache-kafka
|
On Freenode IRC at #kafka-python, as well as #apache-kafka
|
||||||
|
|
||||||
For general discussion of kafka-client design and implementation (not python specific),
|
For general discussion of kafka-client design and implementation (not python specific),
|
||||||
see https://groups.google.com/forum/m/#!forum/kafka-clients
|
see https://groups.google.com/forum/m/#!forum/kafka-clients
|
||||||
|
|
||||||
|
For information about Apache Kafka generally, see https://kafka.apache.org/
|
||||||
|
|
||||||
Status
|
Status
|
||||||
------
|
------
|
||||||
|
|
||||||
The current stable version of this package is `0.9.2 <https://github.com/mumrah/kafka-python/releases/tag/v0.9.2>`_ and is compatible with:
|
The current stable version of this package is `0.9.5 <https://github.com/dpkp/kafka-python/releases/tag/v0.9.5>`_ and is compatible with:
|
||||||
|
|
||||||
Kafka broker versions
|
Kafka broker versions
|
||||||
|
|
||||||
* 0.8.0
|
* 0.9.0.0
|
||||||
* 0.8.1
|
* 0.8.2.2
|
||||||
|
* 0.8.2.1
|
||||||
* 0.8.1.1
|
* 0.8.1.1
|
||||||
|
* 0.8.1
|
||||||
|
* 0.8.0
|
||||||
|
|
||||||
Python versions
|
Python versions
|
||||||
|
|
||||||
|
* 3.5 (tested on 3.5.0)
|
||||||
|
* 3.4 (tested on 3.4.2)
|
||||||
|
* 3.3 (tested on 3.3.5)
|
||||||
|
* 2.7 (tested on 2.7.9)
|
||||||
* 2.6 (tested on 2.6.9)
|
* 2.6 (tested on 2.6.9)
|
||||||
* 2.7 (tested on 2.7.8)
|
* pypy (tested on pypy 2.5.0 / python 2.7.8)
|
||||||
* pypy (tested on pypy 2.3.1 / python 2.7.6)
|
|
||||||
* (Python 3.3 and 3.4 support has been added to trunk and will be available the next release)
|
|
||||||
|
|
||||||
License
|
License
|
||||||
-------
|
-------
|
||||||
|
|
||||||
Copyright 2014, David Arthur under Apache License, v2.0. See `LICENSE <https://github.com/mumrah/kafka-python/blob/master/LICENSE>`_.
|
Apache License, v2.0. See `LICENSE <https://github.com/dpkp/kafka-python/blob/master/LICENSE>`_.
|
||||||
|
|
||||||
|
Copyright 2015, David Arthur, Dana Powers, and Contributors
|
||||||
|
(See `AUTHORS <https://github.com/dpkp/kafka-python/blob/master/AUTHORS.md>`_).
|
||||||
|
|
||||||
|
|
||||||
Contents
|
Contents
|
||||||
@@ -44,11 +53,10 @@ Contents
|
|||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
|
usage
|
||||||
install
|
install
|
||||||
tests
|
tests
|
||||||
usage
|
API reference </apidoc/modules>
|
||||||
api_reference
|
|
||||||
|
|
||||||
|
|
||||||
Indices and tables
|
Indices and tables
|
||||||
==================
|
==================
|
||||||
@@ -56,4 +64,3 @@ Indices and tables
|
|||||||
* :ref:`genindex`
|
* :ref:`genindex`
|
||||||
* :ref:`modindex`
|
* :ref:`modindex`
|
||||||
* :ref:`search`
|
* :ref:`search`
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ Pip:
|
|||||||
|
|
||||||
pip install kafka-python
|
pip install kafka-python
|
||||||
|
|
||||||
Releases are also listed at https://github.com/mumrah/kafka-python/releases
|
Releases are also listed at https://github.com/dpkp/kafka-python/releases
|
||||||
|
|
||||||
|
|
||||||
Bleeding-Edge
|
Bleeding-Edge
|
||||||
@@ -19,21 +19,21 @@ Bleeding-Edge
|
|||||||
|
|
||||||
.. code:: bash
|
.. code:: bash
|
||||||
|
|
||||||
git clone https://github.com/mumrah/kafka-python
|
git clone https://github.com/dpkp/kafka-python
|
||||||
pip install ./kafka-python
|
pip install ./kafka-python
|
||||||
|
|
||||||
Setuptools:
|
Setuptools:
|
||||||
|
|
||||||
.. code:: bash
|
.. code:: bash
|
||||||
|
|
||||||
git clone https://github.com/mumrah/kafka-python
|
git clone https://github.com/dpkp/kafka-python
|
||||||
easy_install ./kafka-python
|
easy_install ./kafka-python
|
||||||
|
|
||||||
Using `setup.py` directly:
|
Using `setup.py` directly:
|
||||||
|
|
||||||
.. code:: bash
|
.. code:: bash
|
||||||
|
|
||||||
git clone https://github.com/mumrah/kafka-python
|
git clone https://github.com/dpkp/kafka-python
|
||||||
cd kafka-python
|
cd kafka-python
|
||||||
python setup.py install
|
python setup.py install
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
sphinx
|
sphinx
|
||||||
sphinxcontrib-napoleon
|
sphinxcontrib-napoleon
|
||||||
|
sphinx_rtd_theme
|
||||||
|
|
||||||
# Install kafka-python in editable mode
|
# Install kafka-python in editable mode
|
||||||
# This allows the sphinx autodoc module
|
# This allows the sphinx autodoc module
|
||||||
# to load the Python modules and extract docstrings.
|
# to load the Python modules and extract docstrings.
|
||||||
-e ..
|
# -e ..
|
||||||
|
|||||||
162
docs/usage.rst
162
docs/usage.rst
@@ -1,29 +1,32 @@
|
|||||||
Usage
|
Usage
|
||||||
=====
|
=====
|
||||||
|
|
||||||
High level
|
SimpleProducer
|
||||||
----------
|
--------------
|
||||||
|
|
||||||
.. code:: python
|
.. code:: python
|
||||||
|
|
||||||
from kafka import KafkaClient, SimpleProducer, SimpleConsumer
|
from kafka import SimpleProducer, KafkaClient
|
||||||
|
|
||||||
# To send messages synchronously
|
# To send messages synchronously
|
||||||
kafka = KafkaClient("localhost:9092")
|
kafka = KafkaClient('localhost:9092')
|
||||||
producer = SimpleProducer(kafka)
|
producer = SimpleProducer(kafka)
|
||||||
|
|
||||||
# Note that the application is responsible for encoding messages to type str
|
# Note that the application is responsible for encoding messages to type bytes
|
||||||
producer.send_messages("my-topic", "some message")
|
producer.send_messages(b'my-topic', b'some message')
|
||||||
producer.send_messages("my-topic", "this method", "is variadic")
|
producer.send_messages(b'my-topic', b'this method', b'is variadic')
|
||||||
|
|
||||||
# Send unicode message
|
# Send unicode message
|
||||||
producer.send_messages("my-topic", u'你怎么样?'.encode('utf-8'))
|
producer.send_messages(b'my-topic', u'你怎么样?'.encode('utf-8'))
|
||||||
|
|
||||||
|
Asynchronous Mode
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
# To send messages asynchronously
|
# To send messages asynchronously
|
||||||
# WARNING: current implementation does not guarantee message delivery on failure!
|
|
||||||
# messages can get dropped! Use at your own risk! Or help us improve with a PR!
|
|
||||||
producer = SimpleProducer(kafka, async=True)
|
producer = SimpleProducer(kafka, async=True)
|
||||||
producer.send_messages("my-topic", "async message")
|
producer.send_messages(b'my-topic', b'async message')
|
||||||
|
|
||||||
# To wait for acknowledgements
|
# To wait for acknowledgements
|
||||||
# ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to
|
# ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to
|
||||||
@@ -32,13 +35,12 @@ High level
|
|||||||
# by all in sync replicas before sending a response
|
# by all in sync replicas before sending a response
|
||||||
producer = SimpleProducer(kafka, async=False,
|
producer = SimpleProducer(kafka, async=False,
|
||||||
req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
|
req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
|
||||||
ack_timeout=2000)
|
ack_timeout=2000,
|
||||||
|
sync_fail_on_error=False)
|
||||||
|
|
||||||
response = producer.send_messages("my-topic", "another message")
|
responses = producer.send_messages(b'my-topic', b'another message')
|
||||||
|
for r in responses:
|
||||||
if response:
|
logging.info(r.offset)
|
||||||
print(response[0].error)
|
|
||||||
print(response[0].offset)
|
|
||||||
|
|
||||||
# To send messages in batch. You can use any of the available
|
# To send messages in batch. You can use any of the available
|
||||||
# producers for doing this. The following producer will collect
|
# producers for doing this. The following producer will collect
|
||||||
@@ -47,37 +49,117 @@ High level
|
|||||||
# Notes:
|
# Notes:
|
||||||
# * If the producer dies before the messages are sent, there will be losses
|
# * If the producer dies before the messages are sent, there will be losses
|
||||||
# * Call producer.stop() to send the messages and cleanup
|
# * Call producer.stop() to send the messages and cleanup
|
||||||
producer = SimpleProducer(kafka, batch_send=True,
|
producer = SimpleProducer(kafka, async=True,
|
||||||
batch_send_every_n=20,
|
batch_send_every_n=20,
|
||||||
batch_send_every_t=60)
|
batch_send_every_t=60)
|
||||||
|
|
||||||
# To consume messages
|
|
||||||
consumer = SimpleConsumer(kafka, "my-group", "my-topic")
|
|
||||||
for message in consumer:
|
|
||||||
# message is raw byte string -- decode if necessary!
|
|
||||||
# e.g., for unicode: `message.decode('utf-8')`
|
|
||||||
print(message)
|
|
||||||
|
|
||||||
kafka.close()
|
|
||||||
|
|
||||||
|
|
||||||
Keyed messages
|
Keyed messages
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
.. code:: python
|
.. code:: python
|
||||||
|
|
||||||
from kafka import KafkaClient, KeyedProducer, HashedPartitioner, RoundRobinPartitioner
|
from kafka import (
|
||||||
|
KafkaClient, KeyedProducer,
|
||||||
|
Murmur2Partitioner, RoundRobinPartitioner)
|
||||||
|
|
||||||
kafka = KafkaClient("localhost:9092")
|
kafka = KafkaClient('localhost:9092')
|
||||||
|
|
||||||
# HashedPartitioner is default
|
# HashedPartitioner is default (currently uses python hash())
|
||||||
producer = KeyedProducer(kafka)
|
producer = KeyedProducer(kafka)
|
||||||
producer.send("my-topic", "key1", "some message")
|
producer.send_messages(b'my-topic', b'key1', b'some message')
|
||||||
producer.send("my-topic", "key2", "this methode")
|
producer.send_messages(b'my-topic', b'key2', b'this methode')
|
||||||
|
|
||||||
|
# Murmur2Partitioner attempts to mirror the java client hashing
|
||||||
|
producer = KeyedProducer(kafka, partitioner=Murmur2Partitioner)
|
||||||
|
|
||||||
|
# Or just produce round-robin (or just use SimpleProducer)
|
||||||
producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)
|
producer = KeyedProducer(kafka, partitioner=RoundRobinPartitioner)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
KafkaConsumer
|
||||||
|
-------------
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
from kafka import KafkaConsumer
|
||||||
|
|
||||||
|
# To consume messages
|
||||||
|
consumer = KafkaConsumer('my-topic',
|
||||||
|
group_id='my_group',
|
||||||
|
bootstrap_servers=['localhost:9092'])
|
||||||
|
for message in consumer:
|
||||||
|
# message value is raw byte string -- decode if necessary!
|
||||||
|
# e.g., for unicode: `message.value.decode('utf-8')`
|
||||||
|
print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
|
||||||
|
message.offset, message.key,
|
||||||
|
message.value))
|
||||||
|
|
||||||
|
|
||||||
|
messages (m) are namedtuples with attributes:
|
||||||
|
|
||||||
|
* `m.topic`: topic name (str)
|
||||||
|
* `m.partition`: partition number (int)
|
||||||
|
* `m.offset`: message offset on topic-partition log (int)
|
||||||
|
* `m.key`: key (bytes - can be None)
|
||||||
|
* `m.value`: message (output of deserializer_class - default is raw bytes)
|
||||||
|
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
from kafka import KafkaConsumer
|
||||||
|
|
||||||
|
# more advanced consumer -- multiple topics w/ auto commit offset
|
||||||
|
# management
|
||||||
|
consumer = KafkaConsumer('topic1', 'topic2',
|
||||||
|
bootstrap_servers=['localhost:9092'],
|
||||||
|
group_id='my_consumer_group',
|
||||||
|
auto_commit_enable=True,
|
||||||
|
auto_commit_interval_ms=30 * 1000,
|
||||||
|
auto_offset_reset='smallest')
|
||||||
|
|
||||||
|
# Infinite iteration
|
||||||
|
for m in consumer:
|
||||||
|
do_some_work(m)
|
||||||
|
|
||||||
|
# Mark this message as fully consumed
|
||||||
|
# so it can be included in the next commit
|
||||||
|
#
|
||||||
|
# **messages that are not marked w/ task_done currently do not commit!
|
||||||
|
consumer.task_done(m)
|
||||||
|
|
||||||
|
# If auto_commit_enable is False, remember to commit() periodically
|
||||||
|
consumer.commit()
|
||||||
|
|
||||||
|
# Batch process interface
|
||||||
|
while True:
|
||||||
|
for m in kafka.fetch_messages():
|
||||||
|
process_message(m)
|
||||||
|
consumer.task_done(m)
|
||||||
|
|
||||||
|
|
||||||
|
Configuration settings can be passed to constructor,
|
||||||
|
otherwise defaults will be used:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
client_id='kafka.consumer.kafka',
|
||||||
|
group_id=None,
|
||||||
|
fetch_message_max_bytes=1024*1024,
|
||||||
|
fetch_min_bytes=1,
|
||||||
|
fetch_wait_max_ms=100,
|
||||||
|
refresh_leader_backoff_ms=200,
|
||||||
|
bootstrap_servers=[],
|
||||||
|
socket_timeout_ms=30*1000,
|
||||||
|
auto_offset_reset='largest',
|
||||||
|
deserializer_class=lambda msg: msg,
|
||||||
|
auto_commit_enable=False,
|
||||||
|
auto_commit_interval_ms=60 * 1000,
|
||||||
|
consumer_timeout_ms=-1
|
||||||
|
|
||||||
|
Configuration parameters are described in more detail at
|
||||||
|
http://kafka.apache.org/documentation.html#highlevelconsumerapi
|
||||||
|
|
||||||
Multiprocess consumer
|
Multiprocess consumer
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
@@ -85,13 +167,13 @@ Multiprocess consumer
|
|||||||
|
|
||||||
from kafka import KafkaClient, MultiProcessConsumer
|
from kafka import KafkaClient, MultiProcessConsumer
|
||||||
|
|
||||||
kafka = KafkaClient("localhost:9092")
|
kafka = KafkaClient('localhost:9092')
|
||||||
|
|
||||||
# This will split the number of partitions among two processes
|
# This will split the number of partitions among two processes
|
||||||
consumer = MultiProcessConsumer(kafka, "my-group", "my-topic", num_procs=2)
|
consumer = MultiProcessConsumer(kafka, b'my-group', b'my-topic', num_procs=2)
|
||||||
|
|
||||||
# This will spawn processes such that each handles 2 partitions max
|
# This will spawn processes such that each handles 2 partitions max
|
||||||
consumer = MultiProcessConsumer(kafka, "my-group", "my-topic",
|
consumer = MultiProcessConsumer(kafka, b'my-group', b'my-topic',
|
||||||
partitions_per_proc=2)
|
partitions_per_proc=2)
|
||||||
|
|
||||||
for message in consumer:
|
for message in consumer:
|
||||||
@@ -109,14 +191,14 @@ Low level
|
|||||||
from kafka.protocol import KafkaProtocol
|
from kafka.protocol import KafkaProtocol
|
||||||
from kafka.common import ProduceRequest
|
from kafka.common import ProduceRequest
|
||||||
|
|
||||||
kafka = KafkaClient("localhost:9092")
|
kafka = KafkaClient('localhost:9092')
|
||||||
|
|
||||||
req = ProduceRequest(topic="my-topic", partition=1,
|
req = ProduceRequest(topic=b'my-topic', partition=1,
|
||||||
messages=[create_message("some message")])
|
messages=[create_message(b'some message')])
|
||||||
resps = kafka.send_produce_request(payloads=[req], fail_on_error=True)
|
resps = kafka.send_produce_request(payloads=[req], fail_on_error=True)
|
||||||
kafka.close()
|
kafka.close()
|
||||||
|
|
||||||
resps[0].topic # "my-topic"
|
resps[0].topic # b'my-topic'
|
||||||
resps[0].partition # 1
|
resps[0].partition # 1
|
||||||
resps[0].error # 0 (hopefully)
|
resps[0].error # 0 (hopefully)
|
||||||
resps[0].offset # offset of the first message sent in this request
|
resps[0].offset # offset of the first message sent in this request
|
||||||
|
|||||||
@@ -1,10 +1,8 @@
|
|||||||
__title__ = 'kafka'
|
__title__ = 'kafka'
|
||||||
# Use setuptools to get version from setup.py
|
from .version import __version__
|
||||||
import pkg_resources
|
|
||||||
__version__ = pkg_resources.require('kafka-python')[0].version
|
|
||||||
__author__ = 'David Arthur'
|
__author__ = 'David Arthur'
|
||||||
__license__ = 'Apache License 2.0'
|
__license__ = 'Apache License 2.0'
|
||||||
__copyright__ = 'Copyright 2014, David Arthur under Apache License, v2.0'
|
__copyright__ = 'Copyright 2015, David Arthur under Apache License, v2.0'
|
||||||
|
|
||||||
from kafka.client import KafkaClient
|
from kafka.client import KafkaClient
|
||||||
from kafka.conn import KafkaConnection
|
from kafka.conn import KafkaConnection
|
||||||
@@ -12,7 +10,7 @@ from kafka.protocol import (
|
|||||||
create_message, create_gzip_message, create_snappy_message
|
create_message, create_gzip_message, create_snappy_message
|
||||||
)
|
)
|
||||||
from kafka.producer import SimpleProducer, KeyedProducer
|
from kafka.producer import SimpleProducer, KeyedProducer
|
||||||
from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner
|
from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner
|
||||||
from kafka.consumer import SimpleConsumer, MultiProcessConsumer, KafkaConsumer
|
from kafka.consumer import SimpleConsumer, MultiProcessConsumer, KafkaConsumer
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
|||||||
431
kafka/client.py
431
kafka/client.py
@@ -1,12 +1,11 @@
|
|||||||
import binascii
|
|
||||||
import collections
|
import collections
|
||||||
import copy
|
import copy
|
||||||
import functools
|
import functools
|
||||||
import itertools
|
|
||||||
import logging
|
import logging
|
||||||
|
import select
|
||||||
import time
|
import time
|
||||||
import kafka.common
|
|
||||||
|
|
||||||
|
import kafka.common
|
||||||
from kafka.common import (TopicAndPartition, BrokerMetadata,
|
from kafka.common import (TopicAndPartition, BrokerMetadata,
|
||||||
ConnectionError, FailedPayloadsError,
|
ConnectionError, FailedPayloadsError,
|
||||||
KafkaTimeoutError, KafkaUnavailableError,
|
KafkaTimeoutError, KafkaUnavailableError,
|
||||||
@@ -15,24 +14,27 @@ from kafka.common import (TopicAndPartition, BrokerMetadata,
|
|||||||
|
|
||||||
from kafka.conn import collect_hosts, KafkaConnection, DEFAULT_SOCKET_TIMEOUT_SECONDS
|
from kafka.conn import collect_hosts, KafkaConnection, DEFAULT_SOCKET_TIMEOUT_SECONDS
|
||||||
from kafka.protocol import KafkaProtocol
|
from kafka.protocol import KafkaProtocol
|
||||||
|
from kafka.util import kafka_bytestring
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class KafkaClient(object):
|
class KafkaClient(object):
|
||||||
|
|
||||||
CLIENT_ID = b"kafka-python"
|
CLIENT_ID = b'kafka-python'
|
||||||
ID_GEN = itertools.count()
|
|
||||||
|
|
||||||
# NOTE: The timeout given to the client should always be greater than the
|
# NOTE: The timeout given to the client should always be greater than the
|
||||||
# one passed to SimpleConsumer.get_message(), otherwise you can get a
|
# one passed to SimpleConsumer.get_message(), otherwise you can get a
|
||||||
# socket timeout.
|
# socket timeout.
|
||||||
def __init__(self, hosts, client_id=CLIENT_ID,
|
def __init__(self, hosts, client_id=CLIENT_ID,
|
||||||
timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS):
|
timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS,
|
||||||
|
correlation_id=0):
|
||||||
# We need one connection to bootstrap
|
# We need one connection to bootstrap
|
||||||
self.client_id = client_id
|
self.client_id = kafka_bytestring(client_id)
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.hosts = collect_hosts(hosts)
|
self.hosts = collect_hosts(hosts)
|
||||||
|
self.correlation_id = correlation_id
|
||||||
|
|
||||||
# create connections only when we need them
|
# create connections only when we need them
|
||||||
self.conns = {}
|
self.conns = {}
|
||||||
@@ -48,7 +50,7 @@ class KafkaClient(object):
|
|||||||
##################
|
##################
|
||||||
|
|
||||||
def _get_conn(self, host, port):
|
def _get_conn(self, host, port):
|
||||||
"Get or create a connection to a broker using host and port"
|
"""Get or create a connection to a broker using host and port"""
|
||||||
host_key = (host, port)
|
host_key = (host, port)
|
||||||
if host_key not in self.conns:
|
if host_key not in self.conns:
|
||||||
self.conns[host_key] = KafkaConnection(
|
self.conns[host_key] = KafkaConnection(
|
||||||
@@ -85,7 +87,7 @@ class KafkaClient(object):
|
|||||||
self.load_metadata_for_topics(topic)
|
self.load_metadata_for_topics(topic)
|
||||||
|
|
||||||
# If the partition doesn't actually exist, raise
|
# If the partition doesn't actually exist, raise
|
||||||
if partition not in self.topic_partitions[topic]:
|
if partition not in self.topic_partitions.get(topic, []):
|
||||||
raise UnknownTopicOrPartitionError(key)
|
raise UnknownTopicOrPartitionError(key)
|
||||||
|
|
||||||
# If there's no leader for the partition, raise
|
# If there's no leader for the partition, raise
|
||||||
@@ -96,11 +98,31 @@ class KafkaClient(object):
|
|||||||
# Otherwise return the BrokerMetadata
|
# Otherwise return the BrokerMetadata
|
||||||
return self.brokers[meta.leader]
|
return self.brokers[meta.leader]
|
||||||
|
|
||||||
|
def _get_coordinator_for_group(self, group):
|
||||||
|
"""
|
||||||
|
Returns the coordinator broker for a consumer group.
|
||||||
|
|
||||||
|
ConsumerCoordinatorNotAvailableCode will be raised if the coordinator
|
||||||
|
does not currently exist for the group.
|
||||||
|
|
||||||
|
OffsetsLoadInProgressCode is raised if the coordinator is available
|
||||||
|
but is still loading offsets from the internal topic
|
||||||
|
"""
|
||||||
|
|
||||||
|
resp = self.send_consumer_metadata_request(group)
|
||||||
|
|
||||||
|
# If there's a problem with finding the coordinator, raise the
|
||||||
|
# provided error
|
||||||
|
kafka.common.check_error(resp)
|
||||||
|
|
||||||
|
# Otherwise return the BrokerMetadata
|
||||||
|
return BrokerMetadata(resp.nodeId, resp.host, resp.port)
|
||||||
|
|
||||||
def _next_id(self):
|
def _next_id(self):
|
||||||
"""
|
"""Generate a new correlation id"""
|
||||||
Generate a new correlation id
|
# modulo to keep w/i int32
|
||||||
"""
|
self.correlation_id = (self.correlation_id + 1) % 2**31
|
||||||
return next(KafkaClient.ID_GEN)
|
return self.correlation_id
|
||||||
|
|
||||||
def _send_broker_unaware_request(self, payloads, encoder_fn, decoder_fn):
|
def _send_broker_unaware_request(self, payloads, encoder_fn, decoder_fn):
|
||||||
"""
|
"""
|
||||||
@@ -109,6 +131,7 @@ class KafkaClient(object):
|
|||||||
"""
|
"""
|
||||||
for (host, port) in self.hosts:
|
for (host, port) in self.hosts:
|
||||||
requestId = self._next_id()
|
requestId = self._next_id()
|
||||||
|
log.debug('Request %s: %s', requestId, payloads)
|
||||||
try:
|
try:
|
||||||
conn = self._get_conn(host, port)
|
conn = self._get_conn(host, port)
|
||||||
request = encoder_fn(client_id=self.client_id,
|
request = encoder_fn(client_id=self.client_id,
|
||||||
@@ -117,13 +140,15 @@ class KafkaClient(object):
|
|||||||
|
|
||||||
conn.send(requestId, request)
|
conn.send(requestId, request)
|
||||||
response = conn.recv(requestId)
|
response = conn.recv(requestId)
|
||||||
return decoder_fn(response)
|
decoded = decoder_fn(response)
|
||||||
|
log.debug('Response %s: %s', requestId, decoded)
|
||||||
|
return decoded
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception("Could not send request [%r] to server %s:%i, "
|
log.exception('Error sending request [%s] to server %s:%s, '
|
||||||
"trying next server" % (requestId, host, port))
|
'trying next server', requestId, host, port)
|
||||||
|
|
||||||
raise KafkaUnavailableError("All servers failed to process request")
|
raise KafkaUnavailableError('All servers failed to process request')
|
||||||
|
|
||||||
def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn):
|
def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn):
|
||||||
"""
|
"""
|
||||||
@@ -134,7 +159,8 @@ class KafkaClient(object):
|
|||||||
Arguments:
|
Arguments:
|
||||||
|
|
||||||
payloads: list of object-like entities with a topic (str) and
|
payloads: list of object-like entities with a topic (str) and
|
||||||
partition (int) attribute
|
partition (int) attribute; payloads with duplicate topic-partitions
|
||||||
|
are not supported.
|
||||||
|
|
||||||
encode_fn: a method to encode the list of payloads to a request body,
|
encode_fn: a method to encode the list of payloads to a request body,
|
||||||
must accept client_id, correlation_id, and payloads as
|
must accept client_id, correlation_id, and payloads as
|
||||||
@@ -148,72 +174,215 @@ class KafkaClient(object):
|
|||||||
|
|
||||||
List of response objects in the same order as the supplied payloads
|
List of response objects in the same order as the supplied payloads
|
||||||
"""
|
"""
|
||||||
|
# encoders / decoders do not maintain ordering currently
|
||||||
|
# so we need to keep this so we can rebuild order before returning
|
||||||
|
original_ordering = [(p.topic, p.partition) for p in payloads]
|
||||||
|
|
||||||
# Group the requests by topic+partition
|
# Group the requests by topic+partition
|
||||||
original_keys = []
|
brokers_for_payloads = []
|
||||||
payloads_by_broker = collections.defaultdict(list)
|
payloads_by_broker = collections.defaultdict(list)
|
||||||
|
|
||||||
|
responses = {}
|
||||||
for payload in payloads:
|
for payload in payloads:
|
||||||
leader = self._get_leader_for_partition(payload.topic,
|
try:
|
||||||
payload.partition)
|
leader = self._get_leader_for_partition(payload.topic,
|
||||||
|
payload.partition)
|
||||||
payloads_by_broker[leader].append(payload)
|
payloads_by_broker[leader].append(payload)
|
||||||
original_keys.append((payload.topic, payload.partition))
|
brokers_for_payloads.append(leader)
|
||||||
|
except KafkaUnavailableError as e:
|
||||||
# Accumulate the responses in a dictionary
|
log.warning('KafkaUnavailableError attempting to send request '
|
||||||
acc = {}
|
'on topic %s partition %d', payload.topic, payload.partition)
|
||||||
|
topic_partition = (payload.topic, payload.partition)
|
||||||
# keep a list of payloads that were failed to be sent to brokers
|
responses[topic_partition] = FailedPayloadsError(payload)
|
||||||
failed_payloads = []
|
|
||||||
|
|
||||||
# For each broker, send the list of request payloads
|
# For each broker, send the list of request payloads
|
||||||
|
# and collect the responses and errors
|
||||||
|
broker_failures = []
|
||||||
|
|
||||||
|
# For each KafkaConnection keep the real socket so that we can use
|
||||||
|
# a select to perform unblocking I/O
|
||||||
|
connections_by_socket = {}
|
||||||
for broker, payloads in payloads_by_broker.items():
|
for broker, payloads in payloads_by_broker.items():
|
||||||
conn = self._get_conn(broker.host.decode('utf-8'), broker.port)
|
|
||||||
requestId = self._next_id()
|
requestId = self._next_id()
|
||||||
|
log.debug('Request %s to %s: %s', requestId, broker, payloads)
|
||||||
request = encoder_fn(client_id=self.client_id,
|
request = encoder_fn(client_id=self.client_id,
|
||||||
correlation_id=requestId, payloads=payloads)
|
correlation_id=requestId, payloads=payloads)
|
||||||
|
|
||||||
failed = False
|
|
||||||
# Send the request, recv the response
|
# Send the request, recv the response
|
||||||
try:
|
try:
|
||||||
|
conn = self._get_conn(broker.host.decode('utf-8'), broker.port)
|
||||||
conn.send(requestId, request)
|
conn.send(requestId, request)
|
||||||
if decoder_fn is None:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
response = conn.recv(requestId)
|
|
||||||
except ConnectionError as e:
|
|
||||||
log.warning("Could not receive response to request [%s] "
|
|
||||||
"from server %s: %s", binascii.b2a_hex(request), conn, e)
|
|
||||||
failed = True
|
|
||||||
except ConnectionError as e:
|
except ConnectionError as e:
|
||||||
log.warning("Could not send request [%s] to server %s: %s",
|
broker_failures.append(broker)
|
||||||
binascii.b2a_hex(request), conn, e)
|
log.warning('ConnectionError attempting to send request %s '
|
||||||
failed = True
|
'to server %s: %s', requestId, broker, e)
|
||||||
|
|
||||||
if failed:
|
for payload in payloads:
|
||||||
failed_payloads += payloads
|
topic_partition = (payload.topic, payload.partition)
|
||||||
self.reset_all_metadata()
|
responses[topic_partition] = FailedPayloadsError(payload)
|
||||||
continue
|
|
||||||
|
|
||||||
for response in decoder_fn(response):
|
# No exception, try to get response
|
||||||
acc[(response.topic, response.partition)] = response
|
else:
|
||||||
|
|
||||||
if failed_payloads:
|
# decoder_fn=None signal that the server is expected to not
|
||||||
raise FailedPayloadsError(failed_payloads)
|
# send a response. This probably only applies to
|
||||||
|
# ProduceRequest w/ acks = 0
|
||||||
|
if decoder_fn is None:
|
||||||
|
log.debug('Request %s does not expect a response '
|
||||||
|
'(skipping conn.recv)', requestId)
|
||||||
|
for payload in payloads:
|
||||||
|
topic_partition = (payload.topic, payload.partition)
|
||||||
|
responses[topic_partition] = None
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
connections_by_socket[conn.get_connected_socket()] = (conn, broker, requestId)
|
||||||
|
|
||||||
# Order the accumulated responses by the original key order
|
conn = None
|
||||||
return (acc[k] for k in original_keys) if acc else ()
|
while connections_by_socket:
|
||||||
|
sockets = connections_by_socket.keys()
|
||||||
|
rlist, _, _ = select.select(sockets, [], [], None)
|
||||||
|
conn, broker, requestId = connections_by_socket.pop(rlist[0])
|
||||||
|
try:
|
||||||
|
response = conn.recv(requestId)
|
||||||
|
except ConnectionError as e:
|
||||||
|
broker_failures.append(broker)
|
||||||
|
log.warning('ConnectionError attempting to receive a '
|
||||||
|
'response to request %s from server %s: %s',
|
||||||
|
requestId, broker, e)
|
||||||
|
|
||||||
|
for payload in payloads_by_broker[broker]:
|
||||||
|
topic_partition = (payload.topic, payload.partition)
|
||||||
|
responses[topic_partition] = FailedPayloadsError(payload)
|
||||||
|
|
||||||
|
else:
|
||||||
|
_resps = []
|
||||||
|
for payload_response in decoder_fn(response):
|
||||||
|
topic_partition = (payload_response.topic,
|
||||||
|
payload_response.partition)
|
||||||
|
responses[topic_partition] = payload_response
|
||||||
|
_resps.append(payload_response)
|
||||||
|
log.debug('Response %s: %s', requestId, _resps)
|
||||||
|
|
||||||
|
# Connection errors generally mean stale metadata
|
||||||
|
# although sometimes it means incorrect api request
|
||||||
|
# Unfortunately there is no good way to tell the difference
|
||||||
|
# so we'll just reset metadata on all errors to be safe
|
||||||
|
if broker_failures:
|
||||||
|
self.reset_all_metadata()
|
||||||
|
|
||||||
|
# Return responses in the same order as provided
|
||||||
|
return [responses[tp] for tp in original_ordering]
|
||||||
|
|
||||||
|
def _send_consumer_aware_request(self, group, payloads, encoder_fn, decoder_fn):
|
||||||
|
"""
|
||||||
|
Send a list of requests to the consumer coordinator for the group
|
||||||
|
specified using the supplied encode/decode functions. As the payloads
|
||||||
|
that use consumer-aware requests do not contain the group (e.g.
|
||||||
|
OffsetFetchRequest), all payloads must be for a single group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
|
||||||
|
group: the name of the consumer group (str) the payloads are for
|
||||||
|
payloads: list of object-like entities with topic (str) and
|
||||||
|
partition (int) attributes; payloads with duplicate
|
||||||
|
topic+partition are not supported.
|
||||||
|
|
||||||
|
encode_fn: a method to encode the list of payloads to a request body,
|
||||||
|
must accept client_id, correlation_id, and payloads as
|
||||||
|
keyword arguments
|
||||||
|
|
||||||
|
decode_fn: a method to decode a response body into response objects.
|
||||||
|
The response objects must be object-like and have topic
|
||||||
|
and partition attributes
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
List of response objects in the same order as the supplied payloads
|
||||||
|
"""
|
||||||
|
# encoders / decoders do not maintain ordering currently
|
||||||
|
# so we need to keep this so we can rebuild order before returning
|
||||||
|
original_ordering = [(p.topic, p.partition) for p in payloads]
|
||||||
|
|
||||||
|
broker = self._get_coordinator_for_group(group)
|
||||||
|
|
||||||
|
# Send the list of request payloads and collect the responses and
|
||||||
|
# errors
|
||||||
|
responses = {}
|
||||||
|
requestId = self._next_id()
|
||||||
|
log.debug('Request %s to %s: %s', requestId, broker, payloads)
|
||||||
|
request = encoder_fn(client_id=self.client_id,
|
||||||
|
correlation_id=requestId, payloads=payloads)
|
||||||
|
|
||||||
|
# Send the request, recv the response
|
||||||
|
try:
|
||||||
|
conn = self._get_conn(broker.host.decode('utf-8'), broker.port)
|
||||||
|
conn.send(requestId, request)
|
||||||
|
|
||||||
|
except ConnectionError as e:
|
||||||
|
log.warning('ConnectionError attempting to send request %s '
|
||||||
|
'to server %s: %s', requestId, broker, e)
|
||||||
|
|
||||||
|
for payload in payloads:
|
||||||
|
topic_partition = (payload.topic, payload.partition)
|
||||||
|
responses[topic_partition] = FailedPayloadsError(payload)
|
||||||
|
|
||||||
|
# No exception, try to get response
|
||||||
|
else:
|
||||||
|
|
||||||
|
# decoder_fn=None signal that the server is expected to not
|
||||||
|
# send a response. This probably only applies to
|
||||||
|
# ProduceRequest w/ acks = 0
|
||||||
|
if decoder_fn is None:
|
||||||
|
log.debug('Request %s does not expect a response '
|
||||||
|
'(skipping conn.recv)', requestId)
|
||||||
|
for payload in payloads:
|
||||||
|
topic_partition = (payload.topic, payload.partition)
|
||||||
|
responses[topic_partition] = None
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = conn.recv(requestId)
|
||||||
|
except ConnectionError as e:
|
||||||
|
log.warning('ConnectionError attempting to receive a '
|
||||||
|
'response to request %s from server %s: %s',
|
||||||
|
requestId, broker, e)
|
||||||
|
|
||||||
|
for payload in payloads:
|
||||||
|
topic_partition = (payload.topic, payload.partition)
|
||||||
|
responses[topic_partition] = FailedPayloadsError(payload)
|
||||||
|
|
||||||
|
else:
|
||||||
|
_resps = []
|
||||||
|
for payload_response in decoder_fn(response):
|
||||||
|
topic_partition = (payload_response.topic,
|
||||||
|
payload_response.partition)
|
||||||
|
responses[topic_partition] = payload_response
|
||||||
|
_resps.append(payload_response)
|
||||||
|
log.debug('Response %s: %s', requestId, _resps)
|
||||||
|
|
||||||
|
# Return responses in the same order as provided
|
||||||
|
return [responses[tp] for tp in original_ordering]
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<KafkaClient client_id=%s>' % (self.client_id)
|
return '<KafkaClient client_id=%s>' % (self.client_id)
|
||||||
|
|
||||||
def _raise_on_response_error(self, resp):
|
def _raise_on_response_error(self, resp):
|
||||||
|
|
||||||
|
# Response can be an unraised exception object (FailedPayloadsError)
|
||||||
|
if isinstance(resp, Exception):
|
||||||
|
raise resp
|
||||||
|
|
||||||
|
# Or a server api error response
|
||||||
try:
|
try:
|
||||||
kafka.common.check_error(resp)
|
kafka.common.check_error(resp)
|
||||||
except (UnknownTopicOrPartitionError, NotLeaderForPartitionError):
|
except (UnknownTopicOrPartitionError, NotLeaderForPartitionError):
|
||||||
self.reset_topic_metadata(resp.topic)
|
self.reset_topic_metadata(resp.topic)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
# Return False if no error to enable list comprehensions
|
||||||
|
return False
|
||||||
|
|
||||||
#################
|
#################
|
||||||
# Public API #
|
# Public API #
|
||||||
#################
|
#################
|
||||||
@@ -223,8 +392,11 @@ class KafkaClient(object):
|
|||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
"""
|
"""
|
||||||
Create an inactive copy of the client object
|
Create an inactive copy of the client object, suitable for passing
|
||||||
A reinit() has to be done on the copy before it can be used again
|
to a separate thread.
|
||||||
|
|
||||||
|
Note that the copied connections are not initialized, so reinit() must
|
||||||
|
be called on the returned copy.
|
||||||
"""
|
"""
|
||||||
c = copy.deepcopy(self)
|
c = copy.deepcopy(self)
|
||||||
for key in c.conns:
|
for key in c.conns:
|
||||||
@@ -237,38 +409,40 @@ class KafkaClient(object):
|
|||||||
|
|
||||||
def reset_topic_metadata(self, *topics):
|
def reset_topic_metadata(self, *topics):
|
||||||
for topic in topics:
|
for topic in topics:
|
||||||
try:
|
for topic_partition in list(self.topics_to_brokers.keys()):
|
||||||
partitions = self.topic_partitions[topic]
|
if topic_partition.topic == topic:
|
||||||
except KeyError:
|
del self.topics_to_brokers[topic_partition]
|
||||||
continue
|
if topic in self.topic_partitions:
|
||||||
|
del self.topic_partitions[topic]
|
||||||
for partition in partitions:
|
|
||||||
self.topics_to_brokers.pop(TopicAndPartition(topic, partition), None)
|
|
||||||
|
|
||||||
del self.topic_partitions[topic]
|
|
||||||
|
|
||||||
def reset_all_metadata(self):
|
def reset_all_metadata(self):
|
||||||
self.topics_to_brokers.clear()
|
self.topics_to_brokers.clear()
|
||||||
self.topic_partitions.clear()
|
self.topic_partitions.clear()
|
||||||
|
|
||||||
def has_metadata_for_topic(self, topic):
|
def has_metadata_for_topic(self, topic):
|
||||||
|
topic = kafka_bytestring(topic)
|
||||||
return (
|
return (
|
||||||
topic in self.topic_partitions
|
topic in self.topic_partitions
|
||||||
and len(self.topic_partitions[topic]) > 0
|
and len(self.topic_partitions[topic]) > 0
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_partition_ids_for_topic(self, topic):
|
def get_partition_ids_for_topic(self, topic):
|
||||||
|
topic = kafka_bytestring(topic)
|
||||||
if topic not in self.topic_partitions:
|
if topic not in self.topic_partitions:
|
||||||
return None
|
return []
|
||||||
|
|
||||||
return list(self.topic_partitions[topic])
|
return sorted(list(self.topic_partitions[topic]))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def topics(self):
|
||||||
|
return list(self.topic_partitions.keys())
|
||||||
|
|
||||||
def ensure_topic_exists(self, topic, timeout = 30):
|
def ensure_topic_exists(self, topic, timeout = 30):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
while not self.has_metadata_for_topic(topic):
|
while not self.has_metadata_for_topic(topic):
|
||||||
if time.time() > start_time + timeout:
|
if time.time() > start_time + timeout:
|
||||||
raise KafkaTimeoutError("Unable to create topic {0}".format(topic))
|
raise KafkaTimeoutError('Unable to create topic {0}'.format(topic))
|
||||||
try:
|
try:
|
||||||
self.load_metadata_for_topics(topic)
|
self.load_metadata_for_topics(topic)
|
||||||
except LeaderNotAvailableError:
|
except LeaderNotAvailableError:
|
||||||
@@ -306,10 +480,18 @@ class KafkaClient(object):
|
|||||||
Partition-level errors will also not be raised here
|
Partition-level errors will also not be raised here
|
||||||
(a single partition w/o a leader, for example)
|
(a single partition w/o a leader, for example)
|
||||||
"""
|
"""
|
||||||
|
topics = [kafka_bytestring(t) for t in topics]
|
||||||
|
|
||||||
|
if topics:
|
||||||
|
for topic in topics:
|
||||||
|
self.reset_topic_metadata(topic)
|
||||||
|
else:
|
||||||
|
self.reset_all_metadata()
|
||||||
|
|
||||||
resp = self.send_metadata_request(topics)
|
resp = self.send_metadata_request(topics)
|
||||||
|
|
||||||
log.debug("Broker metadata: %s", resp.brokers)
|
log.debug('Updating broker metadata: %s', resp.brokers)
|
||||||
log.debug("Topic metadata: %s", resp.topics)
|
log.debug('Updating topic metadata: %s', resp.topics)
|
||||||
|
|
||||||
self.brokers = dict([(broker.nodeId, broker)
|
self.brokers = dict([(broker.nodeId, broker)
|
||||||
for broker in resp.brokers])
|
for broker in resp.brokers])
|
||||||
@@ -318,8 +500,6 @@ class KafkaClient(object):
|
|||||||
topic = topic_metadata.topic
|
topic = topic_metadata.topic
|
||||||
partitions = topic_metadata.partitions
|
partitions = topic_metadata.partitions
|
||||||
|
|
||||||
self.reset_topic_metadata(topic)
|
|
||||||
|
|
||||||
# Errors expected for new topics
|
# Errors expected for new topics
|
||||||
try:
|
try:
|
||||||
kafka.common.check_error(topic_metadata)
|
kafka.common.check_error(topic_metadata)
|
||||||
@@ -330,7 +510,7 @@ class KafkaClient(object):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
# Otherwise, just log a warning
|
# Otherwise, just log a warning
|
||||||
log.error("Error loading topic metadata for %s: %s", topic, type(e))
|
log.error('Error loading topic metadata for %s: %s', topic, type(e))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.topic_partitions[topic] = {}
|
self.topic_partitions[topic] = {}
|
||||||
@@ -356,7 +536,7 @@ class KafkaClient(object):
|
|||||||
# this error code is provided for admin purposes only
|
# this error code is provided for admin purposes only
|
||||||
# we never talk to replicas, only the leader
|
# we never talk to replicas, only the leader
|
||||||
except ReplicaNotAvailableError:
|
except ReplicaNotAvailableError:
|
||||||
log.warning('Some (non-leader) replicas not available for topic %s partition %d', topic, partition)
|
log.debug('Some (non-leader) replicas not available for topic %s partition %d', topic, partition)
|
||||||
|
|
||||||
# If Known Broker, topic_partition -> BrokerMetadata
|
# If Known Broker, topic_partition -> BrokerMetadata
|
||||||
if leader in self.brokers:
|
if leader in self.brokers:
|
||||||
@@ -371,12 +551,18 @@ class KafkaClient(object):
|
|||||||
|
|
||||||
def send_metadata_request(self, payloads=[], fail_on_error=True,
|
def send_metadata_request(self, payloads=[], fail_on_error=True,
|
||||||
callback=None):
|
callback=None):
|
||||||
|
|
||||||
encoder = KafkaProtocol.encode_metadata_request
|
encoder = KafkaProtocol.encode_metadata_request
|
||||||
decoder = KafkaProtocol.decode_metadata_response
|
decoder = KafkaProtocol.decode_metadata_response
|
||||||
|
|
||||||
return self._send_broker_unaware_request(payloads, encoder, decoder)
|
return self._send_broker_unaware_request(payloads, encoder, decoder)
|
||||||
|
|
||||||
|
def send_consumer_metadata_request(self, payloads=[], fail_on_error=True,
|
||||||
|
callback=None):
|
||||||
|
encoder = KafkaProtocol.encode_consumer_metadata_request
|
||||||
|
decoder = KafkaProtocol.decode_consumer_metadata_response
|
||||||
|
|
||||||
|
return self._send_broker_unaware_request(payloads, encoder, decoder)
|
||||||
|
|
||||||
def send_produce_request(self, payloads=[], acks=1, timeout=1000,
|
def send_produce_request(self, payloads=[], acks=1, timeout=1000,
|
||||||
fail_on_error=True, callback=None):
|
fail_on_error=True, callback=None):
|
||||||
"""
|
"""
|
||||||
@@ -387,14 +573,27 @@ class KafkaClient(object):
|
|||||||
same order as the list of payloads specified
|
same order as the list of payloads specified
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
payloads: list of ProduceRequest
|
payloads (list of ProduceRequest): produce requests to send to kafka
|
||||||
fail_on_error: boolean, should we raise an Exception if we
|
ProduceRequest payloads must not contain duplicates for any
|
||||||
encounter an API error?
|
topic-partition.
|
||||||
callback: function, instead of returning the ProduceResponse,
|
acks (int, optional): how many acks the servers should receive from replica
|
||||||
first pass it through this function
|
brokers before responding to the request. If it is 0, the server
|
||||||
|
will not send any response. If it is 1, the server will wait
|
||||||
|
until the data is written to the local log before sending a
|
||||||
|
response. If it is -1, the server will wait until the message
|
||||||
|
is committed by all in-sync replicas before sending a response.
|
||||||
|
For any value > 1, the server will wait for this number of acks to
|
||||||
|
occur (but the server will never wait for more acknowledgements than
|
||||||
|
there are in-sync replicas). defaults to 1.
|
||||||
|
timeout (int, optional): maximum time in milliseconds the server can
|
||||||
|
await the receipt of the number of acks, defaults to 1000.
|
||||||
|
fail_on_error (bool, optional): raise exceptions on connection and
|
||||||
|
server response errors, defaults to True.
|
||||||
|
callback (function, optional): instead of returning the ProduceResponse,
|
||||||
|
first pass it through this function, defaults to None.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
list of ProduceResponse or callback(ProduceResponse), in the
|
list of ProduceResponses, or callback results if supplied, in the
|
||||||
order of input payloads
|
order of input payloads
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -410,16 +609,9 @@ class KafkaClient(object):
|
|||||||
|
|
||||||
resps = self._send_broker_aware_request(payloads, encoder, decoder)
|
resps = self._send_broker_aware_request(payloads, encoder, decoder)
|
||||||
|
|
||||||
out = []
|
return [resp if not callback else callback(resp) for resp in resps
|
||||||
for resp in resps:
|
if resp is not None and
|
||||||
if fail_on_error is True:
|
(not fail_on_error or not self._raise_on_response_error(resp))]
|
||||||
self._raise_on_response_error(resp)
|
|
||||||
|
|
||||||
if callback is not None:
|
|
||||||
out.append(callback(resp))
|
|
||||||
else:
|
|
||||||
out.append(resp)
|
|
||||||
return out
|
|
||||||
|
|
||||||
def send_fetch_request(self, payloads=[], fail_on_error=True,
|
def send_fetch_request(self, payloads=[], fail_on_error=True,
|
||||||
callback=None, max_wait_time=100, min_bytes=4096):
|
callback=None, max_wait_time=100, min_bytes=4096):
|
||||||
@@ -438,16 +630,8 @@ class KafkaClient(object):
|
|||||||
payloads, encoder,
|
payloads, encoder,
|
||||||
KafkaProtocol.decode_fetch_response)
|
KafkaProtocol.decode_fetch_response)
|
||||||
|
|
||||||
out = []
|
return [resp if not callback else callback(resp) for resp in resps
|
||||||
for resp in resps:
|
if not fail_on_error or not self._raise_on_response_error(resp)]
|
||||||
if fail_on_error is True:
|
|
||||||
self._raise_on_response_error(resp)
|
|
||||||
|
|
||||||
if callback is not None:
|
|
||||||
out.append(callback(resp))
|
|
||||||
else:
|
|
||||||
out.append(resp)
|
|
||||||
return out
|
|
||||||
|
|
||||||
def send_offset_request(self, payloads=[], fail_on_error=True,
|
def send_offset_request(self, payloads=[], fail_on_error=True,
|
||||||
callback=None):
|
callback=None):
|
||||||
@@ -456,15 +640,8 @@ class KafkaClient(object):
|
|||||||
KafkaProtocol.encode_offset_request,
|
KafkaProtocol.encode_offset_request,
|
||||||
KafkaProtocol.decode_offset_response)
|
KafkaProtocol.decode_offset_response)
|
||||||
|
|
||||||
out = []
|
return [resp if not callback else callback(resp) for resp in resps
|
||||||
for resp in resps:
|
if not fail_on_error or not self._raise_on_response_error(resp)]
|
||||||
if fail_on_error is True:
|
|
||||||
self._raise_on_response_error(resp)
|
|
||||||
if callback is not None:
|
|
||||||
out.append(callback(resp))
|
|
||||||
else:
|
|
||||||
out.append(resp)
|
|
||||||
return out
|
|
||||||
|
|
||||||
def send_offset_commit_request(self, group, payloads=[],
|
def send_offset_commit_request(self, group, payloads=[],
|
||||||
fail_on_error=True, callback=None):
|
fail_on_error=True, callback=None):
|
||||||
@@ -473,16 +650,8 @@ class KafkaClient(object):
|
|||||||
decoder = KafkaProtocol.decode_offset_commit_response
|
decoder = KafkaProtocol.decode_offset_commit_response
|
||||||
resps = self._send_broker_aware_request(payloads, encoder, decoder)
|
resps = self._send_broker_aware_request(payloads, encoder, decoder)
|
||||||
|
|
||||||
out = []
|
return [resp if not callback else callback(resp) for resp in resps
|
||||||
for resp in resps:
|
if not fail_on_error or not self._raise_on_response_error(resp)]
|
||||||
if fail_on_error is True:
|
|
||||||
self._raise_on_response_error(resp)
|
|
||||||
|
|
||||||
if callback is not None:
|
|
||||||
out.append(callback(resp))
|
|
||||||
else:
|
|
||||||
out.append(resp)
|
|
||||||
return out
|
|
||||||
|
|
||||||
def send_offset_fetch_request(self, group, payloads=[],
|
def send_offset_fetch_request(self, group, payloads=[],
|
||||||
fail_on_error=True, callback=None):
|
fail_on_error=True, callback=None):
|
||||||
@@ -492,12 +661,16 @@ class KafkaClient(object):
|
|||||||
decoder = KafkaProtocol.decode_offset_fetch_response
|
decoder = KafkaProtocol.decode_offset_fetch_response
|
||||||
resps = self._send_broker_aware_request(payloads, encoder, decoder)
|
resps = self._send_broker_aware_request(payloads, encoder, decoder)
|
||||||
|
|
||||||
out = []
|
return [resp if not callback else callback(resp) for resp in resps
|
||||||
for resp in resps:
|
if not fail_on_error or not self._raise_on_response_error(resp)]
|
||||||
if fail_on_error is True:
|
|
||||||
self._raise_on_response_error(resp)
|
def send_offset_fetch_request_kafka(self, group, payloads=[],
|
||||||
if callback is not None:
|
fail_on_error=True, callback=None):
|
||||||
out.append(callback(resp))
|
|
||||||
else:
|
encoder = functools.partial(KafkaProtocol.encode_offset_fetch_request,
|
||||||
out.append(resp)
|
group=group, from_kafka=True)
|
||||||
return out
|
decoder = KafkaProtocol.decode_offset_fetch_response
|
||||||
|
resps = self._send_consumer_aware_request(group, payloads, encoder, decoder)
|
||||||
|
|
||||||
|
return [resp if not callback else callback(resp) for resp in resps
|
||||||
|
if not fail_on_error or not self._raise_on_response_error(resp)]
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
from io import BytesIO
|
|
||||||
import gzip
|
import gzip
|
||||||
|
from io import BytesIO
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
import six
|
|
||||||
from six.moves import xrange
|
from six.moves import xrange
|
||||||
|
|
||||||
_XERIAL_V1_HEADER = (-126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 1, 1)
|
_XERIAL_V1_HEADER = (-126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 1, 1)
|
||||||
@@ -10,9 +9,9 @@ _XERIAL_V1_FORMAT = 'bccccccBii'
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import snappy
|
import snappy
|
||||||
_has_snappy = True
|
_HAS_SNAPPY = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
_has_snappy = False
|
_HAS_SNAPPY = False
|
||||||
|
|
||||||
|
|
||||||
def has_gzip():
|
def has_gzip():
|
||||||
@@ -20,26 +19,39 @@ def has_gzip():
|
|||||||
|
|
||||||
|
|
||||||
def has_snappy():
|
def has_snappy():
|
||||||
return _has_snappy
|
return _HAS_SNAPPY
|
||||||
|
|
||||||
|
|
||||||
def gzip_encode(payload):
|
def gzip_encode(payload, compresslevel=None):
|
||||||
buffer = BytesIO()
|
if not compresslevel:
|
||||||
handle = gzip.GzipFile(fileobj=buffer, mode="w")
|
compresslevel = 9
|
||||||
handle.write(payload)
|
|
||||||
handle.close()
|
with BytesIO() as buf:
|
||||||
buffer.seek(0)
|
|
||||||
result = buffer.read()
|
# Gzip context manager introduced in python 2.6
|
||||||
buffer.close()
|
# so old-fashioned way until we decide to not support 2.6
|
||||||
|
gzipper = gzip.GzipFile(fileobj=buf, mode="w", compresslevel=compresslevel)
|
||||||
|
try:
|
||||||
|
gzipper.write(payload)
|
||||||
|
finally:
|
||||||
|
gzipper.close()
|
||||||
|
|
||||||
|
result = buf.getvalue()
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def gzip_decode(payload):
|
def gzip_decode(payload):
|
||||||
buffer = BytesIO(payload)
|
with BytesIO(payload) as buf:
|
||||||
handle = gzip.GzipFile(fileobj=buffer, mode='r')
|
|
||||||
result = handle.read()
|
# Gzip context manager introduced in python 2.6
|
||||||
handle.close()
|
# so old-fashioned way until we decide to not support 2.6
|
||||||
buffer.close()
|
gzipper = gzip.GzipFile(fileobj=buf, mode='r')
|
||||||
|
try:
|
||||||
|
result = gzipper.read()
|
||||||
|
finally:
|
||||||
|
gzipper.close()
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@@ -47,8 +59,8 @@ def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024):
|
|||||||
"""Encodes the given data with snappy if xerial_compatible is set then the
|
"""Encodes the given data with snappy if xerial_compatible is set then the
|
||||||
stream is encoded in a fashion compatible with the xerial snappy library
|
stream is encoded in a fashion compatible with the xerial snappy library
|
||||||
|
|
||||||
The block size (xerial_blocksize) controls how frequent the blocking occurs
|
The block size (xerial_blocksize) controls how frequent the blocking
|
||||||
32k is the default in the xerial library.
|
occurs 32k is the default in the xerial library.
|
||||||
|
|
||||||
The format winds up being
|
The format winds up being
|
||||||
+-------------+------------+--------------+------------+--------------+
|
+-------------+------------+--------------+------------+--------------+
|
||||||
@@ -63,7 +75,7 @@ def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024):
|
|||||||
length will always be <= blocksize.
|
length will always be <= blocksize.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not _has_snappy:
|
if not has_snappy():
|
||||||
raise NotImplementedError("Snappy codec is not available")
|
raise NotImplementedError("Snappy codec is not available")
|
||||||
|
|
||||||
if xerial_compatible:
|
if xerial_compatible:
|
||||||
@@ -74,7 +86,7 @@ def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024):
|
|||||||
out = BytesIO()
|
out = BytesIO()
|
||||||
|
|
||||||
header = b''.join([struct.pack('!' + fmt, dat) for fmt, dat
|
header = b''.join([struct.pack('!' + fmt, dat) for fmt, dat
|
||||||
in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER)])
|
in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER)])
|
||||||
|
|
||||||
out.write(header)
|
out.write(header)
|
||||||
for chunk in _chunker():
|
for chunk in _chunker():
|
||||||
@@ -113,13 +125,13 @@ def _detect_xerial_stream(payload):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if len(payload) > 16:
|
if len(payload) > 16:
|
||||||
header = header = struct.unpack('!' + _XERIAL_V1_FORMAT, bytes(payload)[:16])
|
header = struct.unpack('!' + _XERIAL_V1_FORMAT, bytes(payload)[:16])
|
||||||
return header == _XERIAL_V1_HEADER
|
return header == _XERIAL_V1_HEADER
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def snappy_decode(payload):
|
def snappy_decode(payload):
|
||||||
if not _has_snappy:
|
if not has_snappy():
|
||||||
raise NotImplementedError("Snappy codec is not available")
|
raise NotImplementedError("Snappy codec is not available")
|
||||||
|
|
||||||
if _detect_xerial_stream(payload):
|
if _detect_xerial_stream(payload):
|
||||||
|
|||||||
105
kafka/common.py
105
kafka/common.py
@@ -6,43 +6,53 @@ from collections import namedtuple
|
|||||||
# Structs #
|
# Structs #
|
||||||
###############
|
###############
|
||||||
|
|
||||||
# Request payloads
|
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-MetadataAPI
|
||||||
ProduceRequest = namedtuple("ProduceRequest",
|
|
||||||
["topic", "partition", "messages"])
|
|
||||||
|
|
||||||
FetchRequest = namedtuple("FetchRequest",
|
|
||||||
["topic", "partition", "offset", "max_bytes"])
|
|
||||||
|
|
||||||
OffsetRequest = namedtuple("OffsetRequest",
|
|
||||||
["topic", "partition", "time", "max_offsets"])
|
|
||||||
|
|
||||||
OffsetCommitRequest = namedtuple("OffsetCommitRequest",
|
|
||||||
["topic", "partition", "offset", "metadata"])
|
|
||||||
|
|
||||||
MetadataRequest = namedtuple("MetadataRequest",
|
MetadataRequest = namedtuple("MetadataRequest",
|
||||||
["topics"])
|
["topics"])
|
||||||
|
|
||||||
OffsetFetchRequest = namedtuple("OffsetFetchRequest", ["topic", "partition"])
|
|
||||||
|
|
||||||
MetadataResponse = namedtuple("MetadataResponse",
|
MetadataResponse = namedtuple("MetadataResponse",
|
||||||
["brokers", "topics"])
|
["brokers", "topics"])
|
||||||
|
|
||||||
# Response payloads
|
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ConsumerMetadataRequest
|
||||||
ProduceResponse = namedtuple("ProduceResponse",
|
ConsumerMetadataRequest = namedtuple("ConsumerMetadataRequest",
|
||||||
["topic", "partition", "error", "offset"])
|
["groups"])
|
||||||
|
|
||||||
FetchResponse = namedtuple("FetchResponse", ["topic", "partition", "error",
|
ConsumerMetadataResponse = namedtuple("ConsumerMetadataResponse",
|
||||||
"highwaterMark", "messages"])
|
["error", "nodeId", "host", "port"])
|
||||||
|
|
||||||
|
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProduceAPI
|
||||||
|
ProduceRequest = namedtuple("ProduceRequest",
|
||||||
|
["topic", "partition", "messages"])
|
||||||
|
|
||||||
|
ProduceResponse = namedtuple("ProduceResponse",
|
||||||
|
["topic", "partition", "error", "offset"])
|
||||||
|
|
||||||
|
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-FetchAPI
|
||||||
|
FetchRequest = namedtuple("FetchRequest",
|
||||||
|
["topic", "partition", "offset", "max_bytes"])
|
||||||
|
|
||||||
|
FetchResponse = namedtuple("FetchResponse",
|
||||||
|
["topic", "partition", "error", "highwaterMark", "messages"])
|
||||||
|
|
||||||
|
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI
|
||||||
|
OffsetRequest = namedtuple("OffsetRequest",
|
||||||
|
["topic", "partition", "time", "max_offsets"])
|
||||||
|
|
||||||
OffsetResponse = namedtuple("OffsetResponse",
|
OffsetResponse = namedtuple("OffsetResponse",
|
||||||
["topic", "partition", "error", "offsets"])
|
["topic", "partition", "error", "offsets"])
|
||||||
|
|
||||||
|
# https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
|
||||||
|
OffsetCommitRequest = namedtuple("OffsetCommitRequest",
|
||||||
|
["topic", "partition", "offset", "metadata"])
|
||||||
|
|
||||||
OffsetCommitResponse = namedtuple("OffsetCommitResponse",
|
OffsetCommitResponse = namedtuple("OffsetCommitResponse",
|
||||||
["topic", "partition", "error"])
|
["topic", "partition", "error"])
|
||||||
|
|
||||||
|
OffsetFetchRequest = namedtuple("OffsetFetchRequest",
|
||||||
|
["topic", "partition"])
|
||||||
|
|
||||||
OffsetFetchResponse = namedtuple("OffsetFetchResponse",
|
OffsetFetchResponse = namedtuple("OffsetFetchResponse",
|
||||||
["topic", "partition", "offset",
|
["topic", "partition", "offset", "metadata", "error"])
|
||||||
"metadata", "error"])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -68,6 +78,11 @@ TopicAndPartition = namedtuple("TopicAndPartition",
|
|||||||
KafkaMessage = namedtuple("KafkaMessage",
|
KafkaMessage = namedtuple("KafkaMessage",
|
||||||
["topic", "partition", "offset", "key", "value"])
|
["topic", "partition", "offset", "key", "value"])
|
||||||
|
|
||||||
|
# Define retry policy for async producer
|
||||||
|
# Limit value: int >= 0, 0 means no retries
|
||||||
|
RetryOptions = namedtuple("RetryOptions",
|
||||||
|
["limit", "backoff_ms", "retry_on_timeouts"])
|
||||||
|
|
||||||
|
|
||||||
#################
|
#################
|
||||||
# Exceptions #
|
# Exceptions #
|
||||||
@@ -152,6 +167,21 @@ class StaleLeaderEpochCodeError(BrokerResponseError):
|
|||||||
message = 'STALE_LEADER_EPOCH_CODE'
|
message = 'STALE_LEADER_EPOCH_CODE'
|
||||||
|
|
||||||
|
|
||||||
|
class OffsetsLoadInProgressCode(BrokerResponseError):
|
||||||
|
errno = 14
|
||||||
|
message = 'OFFSETS_LOAD_IN_PROGRESS_CODE'
|
||||||
|
|
||||||
|
|
||||||
|
class ConsumerCoordinatorNotAvailableCode(BrokerResponseError):
|
||||||
|
errno = 15
|
||||||
|
message = 'CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE'
|
||||||
|
|
||||||
|
|
||||||
|
class NotCoordinatorForConsumerCode(BrokerResponseError):
|
||||||
|
errno = 16
|
||||||
|
message = 'NOT_COORDINATOR_FOR_CONSUMER_CODE'
|
||||||
|
|
||||||
|
|
||||||
class KafkaUnavailableError(KafkaError):
|
class KafkaUnavailableError(KafkaError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -161,7 +191,9 @@ class KafkaTimeoutError(KafkaError):
|
|||||||
|
|
||||||
|
|
||||||
class FailedPayloadsError(KafkaError):
|
class FailedPayloadsError(KafkaError):
|
||||||
pass
|
def __init__(self, payload, *args):
|
||||||
|
super(FailedPayloadsError, self).__init__(*args)
|
||||||
|
self.payload = payload
|
||||||
|
|
||||||
|
|
||||||
class ConnectionError(KafkaError):
|
class ConnectionError(KafkaError):
|
||||||
@@ -200,6 +232,12 @@ class KafkaConfigurationError(KafkaError):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncProducerQueueFull(KafkaError):
|
||||||
|
def __init__(self, failed_msgs, *args):
|
||||||
|
super(AsyncProducerQueueFull, self).__init__(*args)
|
||||||
|
self.failed_msgs = failed_msgs
|
||||||
|
|
||||||
|
|
||||||
def _iter_broker_errors():
|
def _iter_broker_errors():
|
||||||
for name, obj in inspect.getmembers(sys.modules[__name__]):
|
for name, obj in inspect.getmembers(sys.modules[__name__]):
|
||||||
if inspect.isclass(obj) and issubclass(obj, BrokerResponseError) and obj != BrokerResponseError:
|
if inspect.isclass(obj) and issubclass(obj, BrokerResponseError) and obj != BrokerResponseError:
|
||||||
@@ -210,6 +248,23 @@ kafka_errors = dict([(x.errno, x) for x in _iter_broker_errors()])
|
|||||||
|
|
||||||
|
|
||||||
def check_error(response):
|
def check_error(response):
|
||||||
|
if isinstance(response, Exception):
|
||||||
|
raise response
|
||||||
if response.error:
|
if response.error:
|
||||||
error_class = kafka_errors.get(response.error, UnknownError)
|
error_class = kafka_errors.get(response.error, UnknownError)
|
||||||
raise error_class(response)
|
raise error_class(response)
|
||||||
|
|
||||||
|
|
||||||
|
RETRY_BACKOFF_ERROR_TYPES = (
|
||||||
|
KafkaUnavailableError, LeaderNotAvailableError,
|
||||||
|
ConnectionError, FailedPayloadsError
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
RETRY_REFRESH_ERROR_TYPES = (
|
||||||
|
NotLeaderForPartitionError, UnknownTopicOrPartitionError,
|
||||||
|
LeaderNotAvailableError, ConnectionError
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
RETRY_ERROR_TYPES = RETRY_BACKOFF_ERROR_TYPES + RETRY_REFRESH_ERROR_TYPES
|
||||||
|
|||||||
@@ -9,7 +9,8 @@ import six
|
|||||||
|
|
||||||
from kafka.common import ConnectionError
|
from kafka.common import ConnectionError
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
DEFAULT_SOCKET_TIMEOUT_SECONDS = 120
|
DEFAULT_SOCKET_TIMEOUT_SECONDS = 120
|
||||||
DEFAULT_KAFKA_PORT = 9092
|
DEFAULT_KAFKA_PORT = 9092
|
||||||
@@ -62,6 +63,9 @@ class KafkaConnection(local):
|
|||||||
|
|
||||||
self.reinit()
|
self.reinit()
|
||||||
|
|
||||||
|
def __getnewargs__(self):
|
||||||
|
return (self.host, self.port, self.timeout)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<KafkaConnection host=%s port=%d>" % (self.host, self.port)
|
return "<KafkaConnection host=%s port=%d>" % (self.host, self.port)
|
||||||
|
|
||||||
@@ -114,6 +118,11 @@ class KafkaConnection(local):
|
|||||||
|
|
||||||
# TODO multiplex socket communication to allow for multi-threaded clients
|
# TODO multiplex socket communication to allow for multi-threaded clients
|
||||||
|
|
||||||
|
def get_connected_socket(self):
|
||||||
|
if not self._sock:
|
||||||
|
self.reinit()
|
||||||
|
return self._sock
|
||||||
|
|
||||||
def send(self, request_id, payload):
|
def send(self, request_id, payload):
|
||||||
"""
|
"""
|
||||||
Send a request to Kafka
|
Send a request to Kafka
|
||||||
@@ -147,6 +156,10 @@ class KafkaConnection(local):
|
|||||||
"""
|
"""
|
||||||
log.debug("Reading response %d from Kafka" % request_id)
|
log.debug("Reading response %d from Kafka" % request_id)
|
||||||
|
|
||||||
|
# Make sure we have a connection
|
||||||
|
if not self._sock:
|
||||||
|
self.reinit()
|
||||||
|
|
||||||
# Read the size off of the header
|
# Read the size off of the header
|
||||||
resp = self._read_bytes(4)
|
resp = self._read_bytes(4)
|
||||||
(size,) = struct.unpack('>i', resp)
|
(size,) = struct.unpack('>i', resp)
|
||||||
@@ -157,9 +170,11 @@ class KafkaConnection(local):
|
|||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
"""
|
"""
|
||||||
Create an inactive copy of the connection object
|
Create an inactive copy of the connection object, suitable for
|
||||||
A reinit() has to be done on the copy before it can be used again
|
passing to a background thread.
|
||||||
return a new KafkaConnection object
|
|
||||||
|
The returned copy is not connected; you must call reinit() before
|
||||||
|
using.
|
||||||
"""
|
"""
|
||||||
c = copy.deepcopy(self)
|
c = copy.deepcopy(self)
|
||||||
# Python 3 doesn't copy custom attributes of the threadlocal subclass
|
# Python 3 doesn't copy custom attributes of the threadlocal subclass
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import atexit
|
||||||
import logging
|
import logging
|
||||||
import numbers
|
import numbers
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
@@ -7,12 +8,13 @@ from threading import Lock
|
|||||||
import kafka.common
|
import kafka.common
|
||||||
from kafka.common import (
|
from kafka.common import (
|
||||||
OffsetRequest, OffsetCommitRequest, OffsetFetchRequest,
|
OffsetRequest, OffsetCommitRequest, OffsetFetchRequest,
|
||||||
UnknownTopicOrPartitionError
|
UnknownTopicOrPartitionError, check_error, KafkaError
|
||||||
)
|
)
|
||||||
|
|
||||||
from kafka.util import ReentrantTimer
|
from kafka.util import kafka_bytestring, ReentrantTimer
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
|
||||||
|
log = logging.getLogger('kafka.consumer')
|
||||||
|
|
||||||
AUTO_COMMIT_MSG_COUNT = 100
|
AUTO_COMMIT_MSG_COUNT = 100
|
||||||
AUTO_COMMIT_INTERVAL = 5000
|
AUTO_COMMIT_INTERVAL = 5000
|
||||||
@@ -25,7 +27,9 @@ MAX_FETCH_BUFFER_SIZE_BYTES = FETCH_BUFFER_SIZE_BYTES * 8
|
|||||||
|
|
||||||
ITER_TIMEOUT_SECONDS = 60
|
ITER_TIMEOUT_SECONDS = 60
|
||||||
NO_MESSAGES_WAIT_TIME_SECONDS = 0.1
|
NO_MESSAGES_WAIT_TIME_SECONDS = 0.1
|
||||||
|
FULL_QUEUE_WAIT_TIME_SECONDS = 0.1
|
||||||
|
|
||||||
|
MAX_BACKOFF_SECONDS = 60
|
||||||
|
|
||||||
class Consumer(object):
|
class Consumer(object):
|
||||||
"""
|
"""
|
||||||
@@ -43,12 +47,12 @@ class Consumer(object):
|
|||||||
auto_commit_every_t=AUTO_COMMIT_INTERVAL):
|
auto_commit_every_t=AUTO_COMMIT_INTERVAL):
|
||||||
|
|
||||||
self.client = client
|
self.client = client
|
||||||
self.topic = topic
|
self.topic = kafka_bytestring(topic)
|
||||||
self.group = group
|
self.group = None if group is None else kafka_bytestring(group)
|
||||||
self.client.load_metadata_for_topics(topic)
|
self.client.load_metadata_for_topics(topic)
|
||||||
self.offsets = {}
|
self.offsets = {}
|
||||||
|
|
||||||
if not partitions:
|
if partitions is None:
|
||||||
partitions = self.client.get_partition_ids_for_topic(topic)
|
partitions = self.client.get_partition_ids_for_topic(topic)
|
||||||
else:
|
else:
|
||||||
assert all(isinstance(x, numbers.Integral) for x in partitions)
|
assert all(isinstance(x, numbers.Integral) for x in partitions)
|
||||||
@@ -67,37 +71,65 @@ class Consumer(object):
|
|||||||
self.commit)
|
self.commit)
|
||||||
self.commit_timer.start()
|
self.commit_timer.start()
|
||||||
|
|
||||||
if auto_commit:
|
# Set initial offsets
|
||||||
|
if self.group is not None:
|
||||||
self.fetch_last_known_offsets(partitions)
|
self.fetch_last_known_offsets(partitions)
|
||||||
else:
|
else:
|
||||||
for partition in partitions:
|
for partition in partitions:
|
||||||
self.offsets[partition] = 0
|
self.offsets[partition] = 0
|
||||||
|
|
||||||
|
# Register a cleanup handler
|
||||||
|
def cleanup(obj):
|
||||||
|
obj.stop()
|
||||||
|
self._cleanup_func = cleanup
|
||||||
|
atexit.register(cleanup, self)
|
||||||
|
|
||||||
|
self.partition_info = False # Do not return partition info in msgs
|
||||||
|
|
||||||
|
def provide_partition_info(self):
|
||||||
|
"""
|
||||||
|
Indicates that partition info must be returned by the consumer
|
||||||
|
"""
|
||||||
|
self.partition_info = True
|
||||||
|
|
||||||
def fetch_last_known_offsets(self, partitions=None):
|
def fetch_last_known_offsets(self, partitions=None):
|
||||||
if not partitions:
|
if self.group is None:
|
||||||
|
raise ValueError('KafkaClient.group must not be None')
|
||||||
|
|
||||||
|
if partitions is None:
|
||||||
partitions = self.client.get_partition_ids_for_topic(self.topic)
|
partitions = self.client.get_partition_ids_for_topic(self.topic)
|
||||||
|
|
||||||
def get_or_init_offset(resp):
|
responses = self.client.send_offset_fetch_request(
|
||||||
try:
|
self.group,
|
||||||
kafka.common.check_error(resp)
|
[OffsetFetchRequest(self.topic, p) for p in partitions],
|
||||||
return resp.offset
|
fail_on_error=False
|
||||||
except UnknownTopicOrPartitionError:
|
)
|
||||||
return 0
|
|
||||||
|
|
||||||
for partition in partitions:
|
for resp in responses:
|
||||||
req = OffsetFetchRequest(self.topic, partition)
|
try:
|
||||||
(resp,) = self.client.send_offset_fetch_request(self.group, [req],
|
check_error(resp)
|
||||||
fail_on_error=False)
|
# API spec says server wont set an error here
|
||||||
self.offsets[partition] = get_or_init_offset(resp)
|
# but 0.8.1.1 does actually...
|
||||||
self.fetch_offsets = self.offsets.copy()
|
except UnknownTopicOrPartitionError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# -1 offset signals no commit is currently stored
|
||||||
|
if resp.offset == -1:
|
||||||
|
self.offsets[resp.partition] = 0
|
||||||
|
|
||||||
|
# Otherwise we committed the stored offset
|
||||||
|
# and need to fetch the next one
|
||||||
|
else:
|
||||||
|
self.offsets[resp.partition] = resp.offset
|
||||||
|
|
||||||
def commit(self, partitions=None):
|
def commit(self, partitions=None):
|
||||||
"""
|
"""Commit stored offsets to Kafka via OffsetCommitRequest (v0)
|
||||||
Commit offsets for this consumer
|
|
||||||
|
|
||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
partitions (list): list of partitions to commit, default is to commit
|
partitions (list): list of partitions to commit, default is to commit
|
||||||
all of them
|
all of them
|
||||||
|
|
||||||
|
Returns: True on success, False on failure
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# short circuit if nothing happened. This check is kept outside
|
# short circuit if nothing happened. This check is kept outside
|
||||||
@@ -112,23 +144,28 @@ class Consumer(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
reqs = []
|
reqs = []
|
||||||
if not partitions: # commit all partitions
|
if partitions is None: # commit all partitions
|
||||||
partitions = self.offsets.keys()
|
partitions = list(self.offsets.keys())
|
||||||
|
|
||||||
|
log.debug('Committing new offsets for %s, partitions %s',
|
||||||
|
self.topic, partitions)
|
||||||
for partition in partitions:
|
for partition in partitions:
|
||||||
offset = self.offsets[partition]
|
offset = self.offsets[partition]
|
||||||
log.debug("Commit offset %d in SimpleConsumer: "
|
log.debug('Commit offset %d in SimpleConsumer: '
|
||||||
"group=%s, topic=%s, partition=%s" %
|
'group=%s, topic=%s, partition=%s',
|
||||||
(offset, self.group, self.topic, partition))
|
offset, self.group, self.topic, partition)
|
||||||
|
|
||||||
reqs.append(OffsetCommitRequest(self.topic, partition,
|
reqs.append(OffsetCommitRequest(self.topic, partition,
|
||||||
offset, None))
|
offset, None))
|
||||||
|
|
||||||
resps = self.client.send_offset_commit_request(self.group, reqs)
|
try:
|
||||||
for resp in resps:
|
self.client.send_offset_commit_request(self.group, reqs)
|
||||||
kafka.common.check_error(resp)
|
except KafkaError as e:
|
||||||
|
log.error('%s saving offsets: %s', e.__class__.__name__, e)
|
||||||
self.count_since_commit = 0
|
return False
|
||||||
|
else:
|
||||||
|
self.count_since_commit = 0
|
||||||
|
return True
|
||||||
|
|
||||||
def _auto_commit(self):
|
def _auto_commit(self):
|
||||||
"""
|
"""
|
||||||
@@ -147,6 +184,25 @@ class Consumer(object):
|
|||||||
self.commit_timer.stop()
|
self.commit_timer.stop()
|
||||||
self.commit()
|
self.commit()
|
||||||
|
|
||||||
|
if hasattr(self, '_cleanup_func'):
|
||||||
|
# Remove cleanup handler now that we've stopped
|
||||||
|
|
||||||
|
# py3 supports unregistering
|
||||||
|
if hasattr(atexit, 'unregister'):
|
||||||
|
atexit.unregister(self._cleanup_func) # pylint: disable=no-member
|
||||||
|
|
||||||
|
# py2 requires removing from private attribute...
|
||||||
|
else:
|
||||||
|
|
||||||
|
# ValueError on list.remove() if the exithandler no longer
|
||||||
|
# exists is fine here
|
||||||
|
try:
|
||||||
|
atexit._exithandlers.remove((self._cleanup_func, (self,), {}))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
del self._cleanup_func
|
||||||
|
|
||||||
def pending(self, partitions=None):
|
def pending(self, partitions=None):
|
||||||
"""
|
"""
|
||||||
Gets the pending message count
|
Gets the pending message count
|
||||||
@@ -154,7 +210,7 @@ class Consumer(object):
|
|||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
partitions (list): list of partitions to check for, default is to check all
|
partitions (list): list of partitions to check for, default is to check all
|
||||||
"""
|
"""
|
||||||
if not partitions:
|
if partitions is None:
|
||||||
partitions = self.offsets.keys()
|
partitions = self.offsets.keys()
|
||||||
|
|
||||||
total = 0
|
total = 0
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ OffsetsStruct = namedtuple("OffsetsStruct", ["fetch", "highwater", "commit", "ta
|
|||||||
DEFAULT_CONSUMER_CONFIG = {
|
DEFAULT_CONSUMER_CONFIG = {
|
||||||
'client_id': __name__,
|
'client_id': __name__,
|
||||||
'group_id': None,
|
'group_id': None,
|
||||||
'metadata_broker_list': None,
|
'bootstrap_servers': [],
|
||||||
'socket_timeout_ms': 30 * 1000,
|
'socket_timeout_ms': 30 * 1000,
|
||||||
'fetch_message_max_bytes': 1024 * 1024,
|
'fetch_message_max_bytes': 1024 * 1024,
|
||||||
'auto_offset_reset': 'largest',
|
'auto_offset_reset': 'largest',
|
||||||
@@ -47,150 +47,100 @@ DEFAULT_CONSUMER_CONFIG = {
|
|||||||
'rebalance_backoff_ms': 2000,
|
'rebalance_backoff_ms': 2000,
|
||||||
}
|
}
|
||||||
|
|
||||||
BYTES_CONFIGURATION_KEYS = ('client_id', 'group_id')
|
DEPRECATED_CONFIG_KEYS = {
|
||||||
|
'metadata_broker_list': 'bootstrap_servers',
|
||||||
|
}
|
||||||
|
|
||||||
class KafkaConsumer(object):
|
class KafkaConsumer(object):
|
||||||
"""
|
"""A simpler kafka consumer"""
|
||||||
A simpler kafka consumer
|
DEFAULT_CONFIG = deepcopy(DEFAULT_CONSUMER_CONFIG)
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
# A very basic 'tail' consumer, with no stored offset management
|
|
||||||
kafka = KafkaConsumer('topic1')
|
|
||||||
for m in kafka:
|
|
||||||
print m
|
|
||||||
|
|
||||||
# Alternate interface: next()
|
|
||||||
print kafka.next()
|
|
||||||
|
|
||||||
# Alternate interface: batch iteration
|
|
||||||
while True:
|
|
||||||
for m in kafka.fetch_messages():
|
|
||||||
print m
|
|
||||||
print "Done with batch - let's do another!"
|
|
||||||
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
# more advanced consumer -- multiple topics w/ auto commit offset management
|
|
||||||
kafka = KafkaConsumer('topic1', 'topic2',
|
|
||||||
group_id='my_consumer_group',
|
|
||||||
auto_commit_enable=True,
|
|
||||||
auto_commit_interval_ms=30 * 1000,
|
|
||||||
auto_offset_reset='smallest')
|
|
||||||
|
|
||||||
# Infinite iteration
|
|
||||||
for m in kafka:
|
|
||||||
process_message(m)
|
|
||||||
kafka.task_done(m)
|
|
||||||
|
|
||||||
# Alternate interface: next()
|
|
||||||
m = kafka.next()
|
|
||||||
process_message(m)
|
|
||||||
kafka.task_done(m)
|
|
||||||
|
|
||||||
# If auto_commit_enable is False, remember to commit() periodically
|
|
||||||
kafka.commit()
|
|
||||||
|
|
||||||
# Batch process interface
|
|
||||||
while True:
|
|
||||||
for m in kafka.fetch_messages():
|
|
||||||
process_message(m)
|
|
||||||
kafka.task_done(m)
|
|
||||||
|
|
||||||
|
|
||||||
messages (m) are namedtuples with attributes:
|
|
||||||
|
|
||||||
* `m.topic`: topic name (str)
|
|
||||||
* `m.partition`: partition number (int)
|
|
||||||
* `m.offset`: message offset on topic-partition log (int)
|
|
||||||
* `m.key`: key (bytes - can be None)
|
|
||||||
* `m.value`: message (output of deserializer_class - default is raw bytes)
|
|
||||||
|
|
||||||
Configuration settings can be passed to constructor,
|
|
||||||
otherwise defaults will be used:
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
client_id='kafka.consumer.kafka',
|
|
||||||
group_id=None,
|
|
||||||
fetch_message_max_bytes=1024*1024,
|
|
||||||
fetch_min_bytes=1,
|
|
||||||
fetch_wait_max_ms=100,
|
|
||||||
refresh_leader_backoff_ms=200,
|
|
||||||
metadata_broker_list=None,
|
|
||||||
socket_timeout_ms=30*1000,
|
|
||||||
auto_offset_reset='largest',
|
|
||||||
deserializer_class=lambda msg: msg,
|
|
||||||
auto_commit_enable=False,
|
|
||||||
auto_commit_interval_ms=60 * 1000,
|
|
||||||
consumer_timeout_ms=-1
|
|
||||||
|
|
||||||
Configuration parameters are described in more detail at
|
|
||||||
http://kafka.apache.org/documentation.html#highlevelconsumerapi
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *topics, **configs):
|
def __init__(self, *topics, **configs):
|
||||||
self.configure(**configs)
|
self.configure(**configs)
|
||||||
self.set_topic_partitions(*topics)
|
self.set_topic_partitions(*topics)
|
||||||
|
|
||||||
def configure(self, **configs):
|
def configure(self, **configs):
|
||||||
"""
|
"""Configure the consumer instance
|
||||||
|
|
||||||
Configuration settings can be passed to constructor,
|
Configuration settings can be passed to constructor,
|
||||||
otherwise defaults will be used:
|
otherwise defaults will be used:
|
||||||
|
|
||||||
.. code:: python
|
Keyword Arguments:
|
||||||
|
bootstrap_servers (list): List of initial broker nodes the consumer
|
||||||
client_id='kafka.consumer.kafka',
|
should contact to bootstrap initial cluster metadata. This does
|
||||||
group_id=None,
|
not have to be the full node list. It just needs to have at
|
||||||
fetch_message_max_bytes=1024*1024,
|
least one broker that will respond to a Metadata API Request.
|
||||||
fetch_min_bytes=1,
|
client_id (str): a unique name for this client. Defaults to
|
||||||
fetch_wait_max_ms=100,
|
'kafka.consumer.kafka'.
|
||||||
refresh_leader_backoff_ms=200,
|
group_id (str): the name of the consumer group to join,
|
||||||
metadata_broker_list=None,
|
Offsets are fetched / committed to this group name.
|
||||||
socket_timeout_ms=30*1000,
|
fetch_message_max_bytes (int, optional): Maximum bytes for each
|
||||||
auto_offset_reset='largest',
|
topic/partition fetch request. Defaults to 1024*1024.
|
||||||
deserializer_class=lambda msg: msg,
|
fetch_min_bytes (int, optional): Minimum amount of data the server
|
||||||
auto_commit_enable=False,
|
should return for a fetch request, otherwise wait up to
|
||||||
auto_commit_interval_ms=60 * 1000,
|
fetch_wait_max_ms for more data to accumulate. Defaults to 1.
|
||||||
auto_commit_interval_messages=None,
|
fetch_wait_max_ms (int, optional): Maximum time for the server to
|
||||||
consumer_timeout_ms=-1
|
block waiting for fetch_min_bytes messages to accumulate.
|
||||||
|
Defaults to 100.
|
||||||
|
refresh_leader_backoff_ms (int, optional): Milliseconds to backoff
|
||||||
|
when refreshing metadata on errors (subject to random jitter).
|
||||||
|
Defaults to 200.
|
||||||
|
socket_timeout_ms (int, optional): TCP socket timeout in
|
||||||
|
milliseconds. Defaults to 30*1000.
|
||||||
|
auto_offset_reset (str, optional): A policy for resetting offsets on
|
||||||
|
OffsetOutOfRange errors. 'smallest' will move to the oldest
|
||||||
|
available message, 'largest' will move to the most recent. Any
|
||||||
|
ofther value will raise the exception. Defaults to 'largest'.
|
||||||
|
deserializer_class (callable, optional): Any callable that takes a
|
||||||
|
raw message value and returns a deserialized value. Defaults to
|
||||||
|
lambda msg: msg.
|
||||||
|
auto_commit_enable (bool, optional): Enabling auto-commit will cause
|
||||||
|
the KafkaConsumer to periodically commit offsets without an
|
||||||
|
explicit call to commit(). Defaults to False.
|
||||||
|
auto_commit_interval_ms (int, optional): If auto_commit_enabled,
|
||||||
|
the milliseconds between automatic offset commits. Defaults to
|
||||||
|
60 * 1000.
|
||||||
|
auto_commit_interval_messages (int, optional): If
|
||||||
|
auto_commit_enabled, a number of messages consumed between
|
||||||
|
automatic offset commits. Defaults to None (disabled).
|
||||||
|
consumer_timeout_ms (int, optional): number of millisecond to throw
|
||||||
|
a timeout exception to the consumer if no message is available
|
||||||
|
for consumption. Defaults to -1 (dont throw exception).
|
||||||
|
|
||||||
Configuration parameters are described in more detail at
|
Configuration parameters are described in more detail at
|
||||||
http://kafka.apache.org/documentation.html#highlevelconsumerapi
|
http://kafka.apache.org/documentation.html#highlevelconsumerapi
|
||||||
"""
|
"""
|
||||||
|
configs = self._deprecate_configs(**configs)
|
||||||
self._config = {}
|
self._config = {}
|
||||||
for key in DEFAULT_CONSUMER_CONFIG:
|
for key in self.DEFAULT_CONFIG:
|
||||||
self._config[key] = configs.pop(key, DEFAULT_CONSUMER_CONFIG[key])
|
self._config[key] = configs.pop(key, self.DEFAULT_CONFIG[key])
|
||||||
|
|
||||||
if configs:
|
if configs:
|
||||||
raise KafkaConfigurationError('Unknown configuration key(s): ' +
|
raise KafkaConfigurationError('Unknown configuration key(s): ' +
|
||||||
str(list(configs.keys())))
|
str(list(configs.keys())))
|
||||||
|
|
||||||
# Handle str/bytes conversions
|
|
||||||
for config_key in BYTES_CONFIGURATION_KEYS:
|
|
||||||
if isinstance(self._config[config_key], six.string_types):
|
|
||||||
logger.warning("Converting configuration key '%s' to bytes" %
|
|
||||||
config_key)
|
|
||||||
self._config[config_key] = self._config[config_key].encode('utf-8')
|
|
||||||
|
|
||||||
if self._config['auto_commit_enable']:
|
if self._config['auto_commit_enable']:
|
||||||
if not self._config['group_id']:
|
if not self._config['group_id']:
|
||||||
raise KafkaConfigurationError('KafkaConsumer configured to auto-commit without required consumer group (group_id)')
|
raise KafkaConfigurationError(
|
||||||
|
'KafkaConsumer configured to auto-commit '
|
||||||
|
'without required consumer group (group_id)'
|
||||||
|
)
|
||||||
|
|
||||||
# Check auto-commit configuration
|
# Check auto-commit configuration
|
||||||
if self._config['auto_commit_enable']:
|
if self._config['auto_commit_enable']:
|
||||||
logger.info("Configuring consumer to auto-commit offsets")
|
logger.info("Configuring consumer to auto-commit offsets")
|
||||||
self._reset_auto_commit()
|
self._reset_auto_commit()
|
||||||
|
|
||||||
if self._config['metadata_broker_list'] is None:
|
if not self._config['bootstrap_servers']:
|
||||||
raise KafkaConfigurationError('metadata_broker_list required to '
|
raise KafkaConfigurationError(
|
||||||
'configure KafkaConsumer')
|
'bootstrap_servers required to configure KafkaConsumer'
|
||||||
|
)
|
||||||
|
|
||||||
self._client = KafkaClient(self._config['metadata_broker_list'],
|
self._client = KafkaClient(
|
||||||
client_id=self._config['client_id'],
|
self._config['bootstrap_servers'],
|
||||||
timeout=(self._config['socket_timeout_ms'] / 1000.0))
|
client_id=self._config['client_id'],
|
||||||
|
timeout=(self._config['socket_timeout_ms'] / 1000.0)
|
||||||
|
)
|
||||||
|
|
||||||
def set_topic_partitions(self, *topics):
|
def set_topic_partitions(self, *topics):
|
||||||
"""
|
"""
|
||||||
@@ -220,12 +170,12 @@ class KafkaConsumer(object):
|
|||||||
# Consume topic1-all; topic2-partition2; topic3-partition0
|
# Consume topic1-all; topic2-partition2; topic3-partition0
|
||||||
kafka.set_topic_partitions("topic1", ("topic2", 2), {"topic3": 0})
|
kafka.set_topic_partitions("topic1", ("topic2", 2), {"topic3": 0})
|
||||||
|
|
||||||
# Consume topic1-0 starting at offset 123, and topic2-1 at offset 456
|
# Consume topic1-0 starting at offset 12, and topic2-1 at offset 45
|
||||||
# using tuples --
|
# using tuples --
|
||||||
kafka.set_topic_partitions(("topic1", 0, 123), ("topic2", 1, 456))
|
kafka.set_topic_partitions(("topic1", 0, 12), ("topic2", 1, 45))
|
||||||
|
|
||||||
# using dict --
|
# using dict --
|
||||||
kafka.set_topic_partitions({ ("topic1", 0): 123, ("topic2", 1): 456 })
|
kafka.set_topic_partitions({ ("topic1", 0): 12, ("topic2", 1): 45 })
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self._topics = []
|
self._topics = []
|
||||||
@@ -251,10 +201,10 @@ class KafkaConsumer(object):
|
|||||||
elif isinstance(arg, tuple):
|
elif isinstance(arg, tuple):
|
||||||
topic = kafka_bytestring(arg[0])
|
topic = kafka_bytestring(arg[0])
|
||||||
partition = arg[1]
|
partition = arg[1]
|
||||||
|
self._consume_topic_partition(topic, partition)
|
||||||
if len(arg) == 3:
|
if len(arg) == 3:
|
||||||
offset = arg[2]
|
offset = arg[2]
|
||||||
self._offsets.fetch[(topic, partition)] = offset
|
self._offsets.fetch[(topic, partition)] = offset
|
||||||
self._consume_topic_partition(topic, partition)
|
|
||||||
|
|
||||||
# { topic: partitions, ... } dict
|
# { topic: partitions, ... } dict
|
||||||
elif isinstance(arg, dict):
|
elif isinstance(arg, dict):
|
||||||
@@ -273,15 +223,17 @@ class KafkaConsumer(object):
|
|||||||
for partition in value:
|
for partition in value:
|
||||||
self._consume_topic_partition(topic, partition)
|
self._consume_topic_partition(topic, partition)
|
||||||
else:
|
else:
|
||||||
raise KafkaConfigurationError('Unknown topic type (dict key must be '
|
raise KafkaConfigurationError(
|
||||||
'int or list/tuple of ints)')
|
'Unknown topic type '
|
||||||
|
'(dict key must be int or list/tuple of ints)'
|
||||||
|
)
|
||||||
|
|
||||||
# (topic, partition): offset
|
# (topic, partition): offset
|
||||||
elif isinstance(key, tuple):
|
elif isinstance(key, tuple):
|
||||||
topic = kafka_bytestring(key[0])
|
topic = kafka_bytestring(key[0])
|
||||||
partition = key[1]
|
partition = key[1]
|
||||||
self._consume_topic_partition(topic, partition)
|
self._consume_topic_partition(topic, partition)
|
||||||
self._offsets.fetch[key] = value
|
self._offsets.fetch[(topic, partition)] = value
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise KafkaConfigurationError('Unknown topic type (%s)' % type(arg))
|
raise KafkaConfigurationError('Unknown topic type (%s)' % type(arg))
|
||||||
@@ -317,19 +269,23 @@ class KafkaConsumer(object):
|
|||||||
# Reset message iterator in case we were in the middle of one
|
# Reset message iterator in case we were in the middle of one
|
||||||
self._reset_message_iterator()
|
self._reset_message_iterator()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""Close this consumer's underlying client."""
|
||||||
|
self._client.close()
|
||||||
|
|
||||||
def next(self):
|
def next(self):
|
||||||
"""
|
"""Return the next available message
|
||||||
Return a single message from the message iterator
|
|
||||||
If consumer_timeout_ms is set, will raise ConsumerTimeout
|
|
||||||
if no message is available
|
|
||||||
Otherwise blocks indefinitely
|
|
||||||
|
|
||||||
Note that this is also the method called internally during iteration:
|
Blocks indefinitely unless consumer_timeout_ms > 0
|
||||||
|
|
||||||
.. code:: python
|
Returns:
|
||||||
|
a single KafkaMessage from the message iterator
|
||||||
|
|
||||||
for m in consumer:
|
Raises:
|
||||||
pass
|
ConsumerTimeout after consumer_timeout_ms and no message
|
||||||
|
|
||||||
|
Note:
|
||||||
|
This is also the method called internally during iteration
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self._set_consumer_timeout_start()
|
self._set_consumer_timeout_start()
|
||||||
@@ -345,110 +301,129 @@ class KafkaConsumer(object):
|
|||||||
self._check_consumer_timeout()
|
self._check_consumer_timeout()
|
||||||
|
|
||||||
def fetch_messages(self):
|
def fetch_messages(self):
|
||||||
"""
|
"""Sends FetchRequests for all topic/partitions set for consumption
|
||||||
Sends FetchRequests for all topic/partitions set for consumption
|
|
||||||
Returns a generator that yields KafkaMessage structs
|
|
||||||
after deserializing with the configured `deserializer_class`
|
|
||||||
|
|
||||||
Refreshes metadata on errors, and resets fetch offset on
|
Returns:
|
||||||
OffsetOutOfRange, per the configured `auto_offset_reset` policy
|
Generator that yields KafkaMessage structs
|
||||||
|
after deserializing with the configured `deserializer_class`
|
||||||
|
|
||||||
Key configuration parameters:
|
Note:
|
||||||
|
Refreshes metadata on errors, and resets fetch offset on
|
||||||
|
OffsetOutOfRange, per the configured `auto_offset_reset` policy
|
||||||
|
|
||||||
|
See Also:
|
||||||
|
Key KafkaConsumer configuration parameters:
|
||||||
|
* `fetch_message_max_bytes`
|
||||||
|
* `fetch_max_wait_ms`
|
||||||
|
* `fetch_min_bytes`
|
||||||
|
* `deserializer_class`
|
||||||
|
* `auto_offset_reset`
|
||||||
|
|
||||||
* `fetch_message_max_bytes`
|
|
||||||
* `fetch_max_wait_ms`
|
|
||||||
* `fetch_min_bytes`
|
|
||||||
* `deserializer_class`
|
|
||||||
* `auto_offset_reset`
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
max_bytes = self._config['fetch_message_max_bytes']
|
max_bytes = self._config['fetch_message_max_bytes']
|
||||||
max_wait_time = self._config['fetch_wait_max_ms']
|
max_wait_time = self._config['fetch_wait_max_ms']
|
||||||
min_bytes = self._config['fetch_min_bytes']
|
min_bytes = self._config['fetch_min_bytes']
|
||||||
|
|
||||||
# Get current fetch offsets
|
if not self._topics:
|
||||||
offsets = self._offsets.fetch
|
raise KafkaConfigurationError('No topics or partitions configured')
|
||||||
if not offsets:
|
|
||||||
if not self._topics:
|
|
||||||
raise KafkaConfigurationError('No topics or partitions configured')
|
|
||||||
raise KafkaConfigurationError('No fetch offsets found when calling fetch_messages')
|
|
||||||
|
|
||||||
fetches = []
|
if not self._offsets.fetch:
|
||||||
for topic_partition, offset in six.iteritems(offsets):
|
raise KafkaConfigurationError(
|
||||||
fetches.append(FetchRequest(topic_partition[0], topic_partition[1], offset, max_bytes))
|
'No fetch offsets found when calling fetch_messages'
|
||||||
|
)
|
||||||
|
|
||||||
# client.send_fetch_request will collect topic/partition requests by leader
|
fetches = [FetchRequest(topic, partition,
|
||||||
# and send each group as a single FetchRequest to the correct broker
|
self._offsets.fetch[(topic, partition)],
|
||||||
try:
|
max_bytes)
|
||||||
responses = self._client.send_fetch_request(fetches,
|
for (topic, partition) in self._topics]
|
||||||
max_wait_time=max_wait_time,
|
|
||||||
min_bytes=min_bytes,
|
# send_fetch_request will batch topic/partition requests by leader
|
||||||
fail_on_error=False)
|
responses = self._client.send_fetch_request(
|
||||||
except FailedPayloadsError:
|
fetches,
|
||||||
logger.warning('FailedPayloadsError attempting to fetch data from kafka')
|
max_wait_time=max_wait_time,
|
||||||
self._refresh_metadata_on_error()
|
min_bytes=min_bytes,
|
||||||
return
|
fail_on_error=False
|
||||||
|
)
|
||||||
|
|
||||||
for resp in responses:
|
for resp in responses:
|
||||||
topic_partition = (resp.topic, resp.partition)
|
|
||||||
|
if isinstance(resp, FailedPayloadsError):
|
||||||
|
logger.warning('FailedPayloadsError attempting to fetch data')
|
||||||
|
self._refresh_metadata_on_error()
|
||||||
|
continue
|
||||||
|
|
||||||
|
topic = kafka_bytestring(resp.topic)
|
||||||
|
partition = resp.partition
|
||||||
try:
|
try:
|
||||||
check_error(resp)
|
check_error(resp)
|
||||||
except OffsetOutOfRangeError:
|
except OffsetOutOfRangeError:
|
||||||
logger.warning('OffsetOutOfRange: topic %s, partition %d, offset %d '
|
logger.warning('OffsetOutOfRange: topic %s, partition %d, '
|
||||||
'(Highwatermark: %d)',
|
'offset %d (Highwatermark: %d)',
|
||||||
resp.topic, resp.partition,
|
topic, partition,
|
||||||
offsets[topic_partition], resp.highwaterMark)
|
self._offsets.fetch[(topic, partition)],
|
||||||
|
resp.highwaterMark)
|
||||||
# Reset offset
|
# Reset offset
|
||||||
self._offsets.fetch[topic_partition] = self._reset_partition_offset(topic_partition)
|
self._offsets.fetch[(topic, partition)] = (
|
||||||
|
self._reset_partition_offset((topic, partition))
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
except NotLeaderForPartitionError:
|
except NotLeaderForPartitionError:
|
||||||
logger.warning("NotLeaderForPartitionError for %s - %d. "
|
logger.warning("NotLeaderForPartitionError for %s - %d. "
|
||||||
"Metadata may be out of date",
|
"Metadata may be out of date",
|
||||||
resp.topic, resp.partition)
|
topic, partition)
|
||||||
self._refresh_metadata_on_error()
|
self._refresh_metadata_on_error()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
except RequestTimedOutError:
|
except RequestTimedOutError:
|
||||||
logger.warning("RequestTimedOutError for %s - %d",
|
logger.warning("RequestTimedOutError for %s - %d",
|
||||||
resp.topic, resp.partition)
|
topic, partition)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Track server highwater mark
|
# Track server highwater mark
|
||||||
self._offsets.highwater[topic_partition] = resp.highwaterMark
|
self._offsets.highwater[(topic, partition)] = resp.highwaterMark
|
||||||
|
|
||||||
# Yield each message
|
# Yield each message
|
||||||
# Kafka-python could raise an exception during iteration
|
# Kafka-python could raise an exception during iteration
|
||||||
# we are not catching -- user will need to address
|
# we are not catching -- user will need to address
|
||||||
for (offset, message) in resp.messages:
|
for (offset, message) in resp.messages:
|
||||||
# deserializer_class could raise an exception here
|
# deserializer_class could raise an exception here
|
||||||
msg = KafkaMessage(resp.topic,
|
val = self._config['deserializer_class'](message.value)
|
||||||
resp.partition,
|
msg = KafkaMessage(topic, partition, offset, message.key, val)
|
||||||
offset, message.key,
|
|
||||||
self._config['deserializer_class'](message.value))
|
|
||||||
|
|
||||||
# Only increment fetch offset if we safely got the message and deserialized
|
# in some cases the server will return earlier messages
|
||||||
self._offsets.fetch[topic_partition] = offset + 1
|
# than we requested. skip them per kafka spec
|
||||||
|
if offset < self._offsets.fetch[(topic, partition)]:
|
||||||
|
logger.debug('message offset less than fetched offset '
|
||||||
|
'skipping: %s', msg)
|
||||||
|
continue
|
||||||
|
# Only increment fetch offset
|
||||||
|
# if we safely got the message and deserialized
|
||||||
|
self._offsets.fetch[(topic, partition)] = offset + 1
|
||||||
|
|
||||||
# Then yield to user
|
# Then yield to user
|
||||||
yield msg
|
yield msg
|
||||||
|
|
||||||
def get_partition_offsets(self, topic, partition, request_time_ms, max_num_offsets):
|
def get_partition_offsets(self, topic, partition, request_time_ms, max_num_offsets):
|
||||||
"""
|
"""Request available fetch offsets for a single topic/partition
|
||||||
Request available fetch offsets for a single topic/partition
|
|
||||||
|
|
||||||
Arguments:
|
Keyword Arguments:
|
||||||
topic (str)
|
topic (str): topic for offset request
|
||||||
partition (int)
|
partition (int): partition for offset request
|
||||||
request_time_ms (int): Used to ask for all messages before a
|
request_time_ms (int): Used to ask for all messages before a
|
||||||
certain time (ms). There are two special values. Specify -1 to receive the latest
|
certain time (ms). There are two special values.
|
||||||
offset (i.e. the offset of the next coming message) and -2 to receive the earliest
|
Specify -1 to receive the latest offset (i.e. the offset of the
|
||||||
available offset. Note that because offsets are pulled in descending order, asking for
|
next coming message) and -2 to receive the earliest available
|
||||||
the earliest offset will always return you a single element.
|
offset. Note that because offsets are pulled in descending
|
||||||
max_num_offsets (int)
|
order, asking for the earliest offset will always return you a
|
||||||
|
single element.
|
||||||
|
max_num_offsets (int): Maximum offsets to include in the OffsetResponse
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
offsets (list)
|
a list of offsets in the OffsetResponse submitted for the provided
|
||||||
|
topic / partition. See:
|
||||||
|
https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI
|
||||||
"""
|
"""
|
||||||
reqs = [OffsetRequest(topic, partition, request_time_ms, max_num_offsets)]
|
reqs = [OffsetRequest(topic, partition, request_time_ms, max_num_offsets)]
|
||||||
|
|
||||||
@@ -464,7 +439,8 @@ class KafkaConsumer(object):
|
|||||||
return resp.offsets
|
return resp.offsets
|
||||||
|
|
||||||
def offsets(self, group=None):
|
def offsets(self, group=None):
|
||||||
"""
|
"""Get internal consumer offset values
|
||||||
|
|
||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
group: Either "fetch", "commit", "task_done", or "highwater".
|
group: Either "fetch", "commit", "task_done", or "highwater".
|
||||||
If no group specified, returns all groups.
|
If no group specified, returns all groups.
|
||||||
@@ -483,12 +459,25 @@ class KafkaConsumer(object):
|
|||||||
return dict(deepcopy(getattr(self._offsets, group)))
|
return dict(deepcopy(getattr(self._offsets, group)))
|
||||||
|
|
||||||
def task_done(self, message):
|
def task_done(self, message):
|
||||||
"""
|
"""Mark a fetched message as consumed.
|
||||||
Mark a fetched message as consumed.
|
|
||||||
Offsets for messages marked as "task_done" will be stored back
|
Offsets for messages marked as "task_done" will be stored back
|
||||||
to the kafka cluster for this consumer group on commit()
|
to the kafka cluster for this consumer group on commit()
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
message (KafkaMessage): the message to mark as complete
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True, unless the topic-partition for this message has not
|
||||||
|
been configured for the consumer. In normal operation, this
|
||||||
|
should not happen. But see github issue 364.
|
||||||
"""
|
"""
|
||||||
topic_partition = (message.topic, message.partition)
|
topic_partition = (message.topic, message.partition)
|
||||||
|
if topic_partition not in self._topics:
|
||||||
|
logger.warning('Unrecognized topic/partition in task_done message: '
|
||||||
|
'{0}:{1}'.format(*topic_partition))
|
||||||
|
return False
|
||||||
|
|
||||||
offset = message.offset
|
offset = message.offset
|
||||||
|
|
||||||
# Warn on non-contiguous offsets
|
# Warn on non-contiguous offsets
|
||||||
@@ -513,17 +502,25 @@ class KafkaConsumer(object):
|
|||||||
if self._should_auto_commit():
|
if self._should_auto_commit():
|
||||||
self.commit()
|
self.commit()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def commit(self):
|
def commit(self):
|
||||||
"""
|
"""Store consumed message offsets (marked via task_done())
|
||||||
Store consumed message offsets (marked via task_done())
|
|
||||||
to kafka cluster for this consumer_group.
|
to kafka cluster for this consumer_group.
|
||||||
|
|
||||||
**Note**: this functionality requires server version >=0.8.1.1
|
Returns:
|
||||||
See `this wiki page <https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI>`_.
|
True on success, or False if no offsets were found for commit
|
||||||
|
|
||||||
|
Note:
|
||||||
|
this functionality requires server version >=0.8.1.1
|
||||||
|
https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
|
||||||
"""
|
"""
|
||||||
if not self._config['group_id']:
|
if not self._config['group_id']:
|
||||||
logger.warning('Cannot commit without a group_id!')
|
logger.warning('Cannot commit without a group_id!')
|
||||||
raise KafkaConfigurationError('Attempted to commit offsets without a configured consumer group (group_id)')
|
raise KafkaConfigurationError(
|
||||||
|
'Attempted to commit offsets '
|
||||||
|
'without a configured consumer group (group_id)'
|
||||||
|
)
|
||||||
|
|
||||||
# API supports storing metadata with each commit
|
# API supports storing metadata with each commit
|
||||||
# but for now it is unused
|
# but for now it is unused
|
||||||
@@ -547,13 +544,17 @@ class KafkaConsumer(object):
|
|||||||
if commit_offset == self._offsets.commit[topic_partition]:
|
if commit_offset == self._offsets.commit[topic_partition]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
commits.append(OffsetCommitRequest(topic_partition[0], topic_partition[1], commit_offset, metadata))
|
commits.append(
|
||||||
|
OffsetCommitRequest(topic_partition[0], topic_partition[1],
|
||||||
|
commit_offset, metadata)
|
||||||
|
)
|
||||||
|
|
||||||
if commits:
|
if commits:
|
||||||
logger.info('committing consumer offsets to group %s', self._config['group_id'])
|
logger.info('committing consumer offsets to group %s', self._config['group_id'])
|
||||||
resps = self._client.send_offset_commit_request(self._config['group_id'],
|
resps = self._client.send_offset_commit_request(
|
||||||
commits,
|
kafka_bytestring(self._config['group_id']), commits,
|
||||||
fail_on_error=False)
|
fail_on_error=False
|
||||||
|
)
|
||||||
|
|
||||||
for r in resps:
|
for r in resps:
|
||||||
check_error(r)
|
check_error(r)
|
||||||
@@ -615,7 +616,7 @@ class KafkaConsumer(object):
|
|||||||
logger.info("Consumer fetching stored offsets")
|
logger.info("Consumer fetching stored offsets")
|
||||||
for topic_partition in self._topics:
|
for topic_partition in self._topics:
|
||||||
(resp,) = self._client.send_offset_fetch_request(
|
(resp,) = self._client.send_offset_fetch_request(
|
||||||
self._config['group_id'],
|
kafka_bytestring(self._config['group_id']),
|
||||||
[OffsetFetchRequest(topic_partition[0], topic_partition[1])],
|
[OffsetFetchRequest(topic_partition[0], topic_partition[1])],
|
||||||
fail_on_error=False)
|
fail_on_error=False)
|
||||||
try:
|
try:
|
||||||
@@ -664,7 +665,7 @@ class KafkaConsumer(object):
|
|||||||
# Otherwise we should re-raise the upstream exception
|
# Otherwise we should re-raise the upstream exception
|
||||||
# b/c it typically includes additional data about
|
# b/c it typically includes additional data about
|
||||||
# the request that triggered it, and we do not want to drop that
|
# the request that triggered it, and we do not want to drop that
|
||||||
raise
|
raise # pylint: disable-msg=E0704
|
||||||
|
|
||||||
(offset, ) = self.get_partition_offsets(topic, partition,
|
(offset, ) = self.get_partition_offsets(topic, partition,
|
||||||
request_time_ms, max_num_offsets=1)
|
request_time_ms, max_num_offsets=1)
|
||||||
@@ -750,6 +751,22 @@ class KafkaConsumer(object):
|
|||||||
#
|
#
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<KafkaConsumer topics=(%s)>' % ', '.join(["%s-%d" % topic_partition
|
return '<{0} topics=({1})>'.format(
|
||||||
for topic_partition in
|
self.__class__.__name__,
|
||||||
self._topics])
|
'|'.join(["%s-%d" % topic_partition
|
||||||
|
for topic_partition in self._topics])
|
||||||
|
)
|
||||||
|
|
||||||
|
#
|
||||||
|
# other private methods
|
||||||
|
#
|
||||||
|
|
||||||
|
def _deprecate_configs(self, **configs):
|
||||||
|
for old, new in six.iteritems(DEPRECATED_CONFIG_KEYS):
|
||||||
|
if old in configs:
|
||||||
|
logger.warning('Deprecated Kafka Consumer configuration: %s. '
|
||||||
|
'Please use %s instead.', old, new)
|
||||||
|
old_value = configs.pop(old)
|
||||||
|
if new not in configs:
|
||||||
|
configs[new] = old_value
|
||||||
|
return configs
|
||||||
|
|||||||
@@ -1,24 +1,31 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
from collections import namedtuple
|
||||||
import logging
|
import logging
|
||||||
import time
|
from multiprocessing import Process, Manager as MPManager
|
||||||
from multiprocessing import Process, Queue as MPQueue, Event, Value
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from Queue import Empty
|
import queue # python 3
|
||||||
except ImportError: # python 2
|
except ImportError:
|
||||||
from queue import Empty
|
import Queue as queue # python 2
|
||||||
|
import time
|
||||||
|
|
||||||
|
from ..common import KafkaError
|
||||||
from .base import (
|
from .base import (
|
||||||
|
Consumer,
|
||||||
AUTO_COMMIT_MSG_COUNT, AUTO_COMMIT_INTERVAL,
|
AUTO_COMMIT_MSG_COUNT, AUTO_COMMIT_INTERVAL,
|
||||||
NO_MESSAGES_WAIT_TIME_SECONDS
|
NO_MESSAGES_WAIT_TIME_SECONDS,
|
||||||
|
FULL_QUEUE_WAIT_TIME_SECONDS,
|
||||||
|
MAX_BACKOFF_SECONDS,
|
||||||
)
|
)
|
||||||
from .simple import Consumer, SimpleConsumer
|
from .simple import SimpleConsumer
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
|
||||||
|
|
||||||
|
|
||||||
def _mp_consume(client, group, topic, chunk, queue, start, exit, pause, size):
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
Events = namedtuple("Events", ["start", "pause", "exit"])
|
||||||
|
|
||||||
|
|
||||||
|
def _mp_consume(client, group, topic, queue, size, events, **consumer_options):
|
||||||
"""
|
"""
|
||||||
A child process worker which consumes messages based on the
|
A child process worker which consumes messages based on the
|
||||||
notifications given by the controller process
|
notifications given by the controller process
|
||||||
@@ -28,51 +35,67 @@ def _mp_consume(client, group, topic, chunk, queue, start, exit, pause, size):
|
|||||||
functionality breaks unless this function is kept outside of a class
|
functionality breaks unless this function is kept outside of a class
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Make the child processes open separate socket connections
|
# Initial interval for retries in seconds.
|
||||||
client.reinit()
|
interval = 1
|
||||||
|
while not events.exit.is_set():
|
||||||
|
try:
|
||||||
|
# Make the child processes open separate socket connections
|
||||||
|
client.reinit()
|
||||||
|
|
||||||
# We will start consumers without auto-commit. Auto-commit will be
|
# We will start consumers without auto-commit. Auto-commit will be
|
||||||
# done by the master controller process.
|
# done by the master controller process.
|
||||||
consumer = SimpleConsumer(client, group, topic,
|
consumer = SimpleConsumer(client, group, topic,
|
||||||
partitions=chunk,
|
auto_commit=False,
|
||||||
auto_commit=False,
|
auto_commit_every_n=None,
|
||||||
auto_commit_every_n=None,
|
auto_commit_every_t=None,
|
||||||
auto_commit_every_t=None)
|
**consumer_options)
|
||||||
|
|
||||||
# Ensure that the consumer provides the partition information
|
# Ensure that the consumer provides the partition information
|
||||||
consumer.provide_partition_info()
|
consumer.provide_partition_info()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# Wait till the controller indicates us to start consumption
|
# Wait till the controller indicates us to start consumption
|
||||||
start.wait()
|
events.start.wait()
|
||||||
|
|
||||||
# If we are asked to quit, do so
|
# If we are asked to quit, do so
|
||||||
if exit.is_set():
|
if events.exit.is_set():
|
||||||
break
|
break
|
||||||
|
|
||||||
# Consume messages and add them to the queue. If the controller
|
# Consume messages and add them to the queue. If the controller
|
||||||
# indicates a specific number of messages, follow that advice
|
# indicates a specific number of messages, follow that advice
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
message = consumer.get_message()
|
message = consumer.get_message()
|
||||||
if message:
|
if message:
|
||||||
queue.put(message)
|
while True:
|
||||||
count += 1
|
try:
|
||||||
|
queue.put(message, timeout=FULL_QUEUE_WAIT_TIME_SECONDS)
|
||||||
|
break
|
||||||
|
except queue.Full:
|
||||||
|
if events.exit.is_set(): break
|
||||||
|
|
||||||
# We have reached the required size. The controller might have
|
count += 1
|
||||||
# more than what he needs. Wait for a while.
|
|
||||||
# Without this logic, it is possible that we run into a big
|
|
||||||
# loop consuming all available messages before the controller
|
|
||||||
# can reset the 'start' event
|
|
||||||
if count == size.value:
|
|
||||||
pause.wait()
|
|
||||||
|
|
||||||
else:
|
# We have reached the required size. The controller might have
|
||||||
# In case we did not receive any message, give up the CPU for
|
# more than what he needs. Wait for a while.
|
||||||
# a while before we try again
|
# Without this logic, it is possible that we run into a big
|
||||||
time.sleep(NO_MESSAGES_WAIT_TIME_SECONDS)
|
# loop consuming all available messages before the controller
|
||||||
|
# can reset the 'start' event
|
||||||
|
if count == size.value:
|
||||||
|
events.pause.wait()
|
||||||
|
|
||||||
consumer.stop()
|
else:
|
||||||
|
# In case we did not receive any message, give up the CPU for
|
||||||
|
# a while before we try again
|
||||||
|
time.sleep(NO_MESSAGES_WAIT_TIME_SECONDS)
|
||||||
|
|
||||||
|
consumer.stop()
|
||||||
|
|
||||||
|
except KafkaError as e:
|
||||||
|
# Retry with exponential backoff
|
||||||
|
log.error("Problem communicating with Kafka (%s), retrying in %d seconds..." % (e, interval))
|
||||||
|
time.sleep(interval)
|
||||||
|
interval = interval*2 if interval*2 < MAX_BACKOFF_SECONDS else MAX_BACKOFF_SECONDS
|
||||||
|
|
||||||
|
|
||||||
class MultiProcessConsumer(Consumer):
|
class MultiProcessConsumer(Consumer):
|
||||||
@@ -83,9 +106,12 @@ class MultiProcessConsumer(Consumer):
|
|||||||
Arguments:
|
Arguments:
|
||||||
client: a connected KafkaClient
|
client: a connected KafkaClient
|
||||||
group: a name for this consumer, used for offset storage and must be unique
|
group: a name for this consumer, used for offset storage and must be unique
|
||||||
|
If you are connecting to a server that does not support offset
|
||||||
|
commit/fetch (any prior to 0.8.1.1), then you *must* set this to None
|
||||||
topic: the topic to consume
|
topic: the topic to consume
|
||||||
|
|
||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
|
partitions: An optional list of partitions to consume the data from
|
||||||
auto_commit: default True. Whether or not to auto commit the offsets
|
auto_commit: default True. Whether or not to auto commit the offsets
|
||||||
auto_commit_every_n: default 100. How many messages to consume
|
auto_commit_every_n: default 100. How many messages to consume
|
||||||
before a commit
|
before a commit
|
||||||
@@ -102,51 +128,61 @@ class MultiProcessConsumer(Consumer):
|
|||||||
commit method on this class. A manual call to commit will also reset
|
commit method on this class. A manual call to commit will also reset
|
||||||
these triggers
|
these triggers
|
||||||
"""
|
"""
|
||||||
def __init__(self, client, group, topic, auto_commit=True,
|
def __init__(self, client, group, topic,
|
||||||
|
partitions=None,
|
||||||
|
auto_commit=True,
|
||||||
auto_commit_every_n=AUTO_COMMIT_MSG_COUNT,
|
auto_commit_every_n=AUTO_COMMIT_MSG_COUNT,
|
||||||
auto_commit_every_t=AUTO_COMMIT_INTERVAL,
|
auto_commit_every_t=AUTO_COMMIT_INTERVAL,
|
||||||
num_procs=1, partitions_per_proc=0):
|
num_procs=1,
|
||||||
|
partitions_per_proc=0,
|
||||||
|
**simple_consumer_options):
|
||||||
|
|
||||||
# Initiate the base consumer class
|
# Initiate the base consumer class
|
||||||
super(MultiProcessConsumer, self).__init__(
|
super(MultiProcessConsumer, self).__init__(
|
||||||
client, group, topic,
|
client, group, topic,
|
||||||
partitions=None,
|
partitions=partitions,
|
||||||
auto_commit=auto_commit,
|
auto_commit=auto_commit,
|
||||||
auto_commit_every_n=auto_commit_every_n,
|
auto_commit_every_n=auto_commit_every_n,
|
||||||
auto_commit_every_t=auto_commit_every_t)
|
auto_commit_every_t=auto_commit_every_t)
|
||||||
|
|
||||||
# Variables for managing and controlling the data flow from
|
# Variables for managing and controlling the data flow from
|
||||||
# consumer child process to master
|
# consumer child process to master
|
||||||
self.queue = MPQueue(1024) # Child consumers dump messages into this
|
manager = MPManager()
|
||||||
self.start = Event() # Indicates the consumers to start fetch
|
self.queue = manager.Queue(1024) # Child consumers dump messages into this
|
||||||
self.exit = Event() # Requests the consumers to shutdown
|
self.events = Events(
|
||||||
self.pause = Event() # Requests the consumers to pause fetch
|
start = manager.Event(), # Indicates the consumers to start fetch
|
||||||
self.size = Value('i', 0) # Indicator of number of messages to fetch
|
exit = manager.Event(), # Requests the consumers to shutdown
|
||||||
|
pause = manager.Event()) # Requests the consumers to pause fetch
|
||||||
|
self.size = manager.Value('i', 0) # Indicator of number of messages to fetch
|
||||||
|
|
||||||
partitions = self.offsets.keys()
|
# dict.keys() returns a view in py3 + it's not a thread-safe operation
|
||||||
|
# http://blog.labix.org/2008/06/27/watch-out-for-listdictkeys-in-python-3
|
||||||
|
# It's safer to copy dict as it only runs during the init.
|
||||||
|
partitions = list(self.offsets.copy().keys())
|
||||||
|
|
||||||
# If unspecified, start one consumer per partition
|
# By default, start one consumer process for all partitions
|
||||||
# The logic below ensures that
|
# The logic below ensures that
|
||||||
# * we do not cross the num_procs limit
|
# * we do not cross the num_procs limit
|
||||||
# * we have an even distribution of partitions among processes
|
# * we have an even distribution of partitions among processes
|
||||||
if not partitions_per_proc:
|
|
||||||
partitions_per_proc = round(len(partitions) * 1.0 / num_procs)
|
if partitions_per_proc:
|
||||||
if partitions_per_proc < num_procs * 0.5:
|
num_procs = len(partitions) / partitions_per_proc
|
||||||
partitions_per_proc += 1
|
if num_procs * partitions_per_proc < len(partitions):
|
||||||
|
num_procs += 1
|
||||||
|
|
||||||
# The final set of chunks
|
# The final set of chunks
|
||||||
chunker = lambda *x: [] + list(x)
|
chunks = [partitions[proc::num_procs] for proc in range(num_procs)]
|
||||||
chunks = map(chunker, *[iter(partitions)] * int(partitions_per_proc))
|
|
||||||
|
|
||||||
self.procs = []
|
self.procs = []
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
chunk = filter(lambda x: x is not None, chunk)
|
options = {'partitions': list(chunk)}
|
||||||
args = (client.copy(),
|
if simple_consumer_options:
|
||||||
group, topic, list(chunk),
|
simple_consumer_options.pop('partitions', None)
|
||||||
self.queue, self.start, self.exit,
|
options.update(simple_consumer_options)
|
||||||
self.pause, self.size)
|
|
||||||
|
|
||||||
proc = Process(target=_mp_consume, args=args)
|
args = (client.copy(), self.group, self.topic, self.queue,
|
||||||
|
self.size, self.events)
|
||||||
|
proc = Process(target=_mp_consume, args=args, kwargs=options)
|
||||||
proc.daemon = True
|
proc.daemon = True
|
||||||
proc.start()
|
proc.start()
|
||||||
self.procs.append(proc)
|
self.procs.append(proc)
|
||||||
@@ -157,9 +193,9 @@ class MultiProcessConsumer(Consumer):
|
|||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
# Set exit and start off all waiting consumers
|
# Set exit and start off all waiting consumers
|
||||||
self.exit.set()
|
self.events.exit.set()
|
||||||
self.pause.set()
|
self.events.pause.set()
|
||||||
self.start.set()
|
self.events.start.set()
|
||||||
|
|
||||||
for proc in self.procs:
|
for proc in self.procs:
|
||||||
proc.join()
|
proc.join()
|
||||||
@@ -174,27 +210,27 @@ class MultiProcessConsumer(Consumer):
|
|||||||
# Trigger the consumer procs to start off.
|
# Trigger the consumer procs to start off.
|
||||||
# We will iterate till there are no more messages available
|
# We will iterate till there are no more messages available
|
||||||
self.size.value = 0
|
self.size.value = 0
|
||||||
self.pause.set()
|
self.events.pause.set()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
self.start.set()
|
self.events.start.set()
|
||||||
try:
|
try:
|
||||||
# We will block for a small while so that the consumers get
|
# We will block for a small while so that the consumers get
|
||||||
# a chance to run and put some messages in the queue
|
# a chance to run and put some messages in the queue
|
||||||
# TODO: This is a hack and will make the consumer block for
|
# TODO: This is a hack and will make the consumer block for
|
||||||
# at least one second. Need to find a better way of doing this
|
# at least one second. Need to find a better way of doing this
|
||||||
partition, message = self.queue.get(block=True, timeout=1)
|
partition, message = self.queue.get(block=True, timeout=1)
|
||||||
except Empty:
|
except queue.Empty:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Count, check and commit messages if necessary
|
# Count, check and commit messages if necessary
|
||||||
self.offsets[partition] = message.offset + 1
|
self.offsets[partition] = message.offset + 1
|
||||||
self.start.clear()
|
self.events.start.clear()
|
||||||
self.count_since_commit += 1
|
self.count_since_commit += 1
|
||||||
self._auto_commit()
|
self._auto_commit()
|
||||||
yield message
|
yield message
|
||||||
|
|
||||||
self.start.clear()
|
self.events.start.clear()
|
||||||
|
|
||||||
def get_messages(self, count=1, block=True, timeout=10):
|
def get_messages(self, count=1, block=True, timeout=10):
|
||||||
"""
|
"""
|
||||||
@@ -202,10 +238,12 @@ class MultiProcessConsumer(Consumer):
|
|||||||
|
|
||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
count: Indicates the maximum number of messages to be fetched
|
count: Indicates the maximum number of messages to be fetched
|
||||||
block: If True, the API will block till some messages are fetched.
|
block: If True, the API will block till all messages are fetched.
|
||||||
timeout: If block is True, the function will block for the specified
|
If block is a positive integer the API will block until that
|
||||||
time (in seconds) until count messages is fetched. If None,
|
many messages are fetched.
|
||||||
it will block forever.
|
timeout: When blocking is requested the function will block for
|
||||||
|
the specified time (in seconds) until count messages is
|
||||||
|
fetched. If None, it will block forever.
|
||||||
"""
|
"""
|
||||||
messages = []
|
messages = []
|
||||||
|
|
||||||
@@ -214,7 +252,7 @@ class MultiProcessConsumer(Consumer):
|
|||||||
# necessary, but these will not be committed to kafka. Also, the extra
|
# necessary, but these will not be committed to kafka. Also, the extra
|
||||||
# messages can be provided in subsequent runs
|
# messages can be provided in subsequent runs
|
||||||
self.size.value = count
|
self.size.value = count
|
||||||
self.pause.clear()
|
self.events.pause.clear()
|
||||||
|
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
max_time = time.time() + timeout
|
max_time = time.time() + timeout
|
||||||
@@ -226,22 +264,25 @@ class MultiProcessConsumer(Consumer):
|
|||||||
# go into overdrive and keep consuming thousands of
|
# go into overdrive and keep consuming thousands of
|
||||||
# messages when the user might need only a few
|
# messages when the user might need only a few
|
||||||
if self.queue.empty():
|
if self.queue.empty():
|
||||||
self.start.set()
|
self.events.start.set()
|
||||||
|
|
||||||
|
block_next_call = block is True or block > len(messages)
|
||||||
try:
|
try:
|
||||||
partition, message = self.queue.get(block, timeout)
|
partition, message = self.queue.get(block_next_call,
|
||||||
except Empty:
|
timeout)
|
||||||
|
except queue.Empty:
|
||||||
break
|
break
|
||||||
|
|
||||||
messages.append(message)
|
_msg = (partition, message) if self.partition_info else message
|
||||||
|
messages.append(_msg)
|
||||||
new_offsets[partition] = message.offset + 1
|
new_offsets[partition] = message.offset + 1
|
||||||
count -= 1
|
count -= 1
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
timeout = max_time - time.time()
|
timeout = max_time - time.time()
|
||||||
|
|
||||||
self.size.value = 0
|
self.size.value = 0
|
||||||
self.start.clear()
|
self.events.start.clear()
|
||||||
self.pause.set()
|
self.events.pause.set()
|
||||||
|
|
||||||
# Update and commit offsets if necessary
|
# Update and commit offsets if necessary
|
||||||
self.offsets.update(new_offsets)
|
self.offsets.update(new_offsets)
|
||||||
|
|||||||
@@ -2,22 +2,18 @@ from __future__ import absolute_import
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from itertools import zip_longest as izip_longest, repeat # pylint: disable-msg=E0611
|
from itertools import zip_longest as izip_longest, repeat # pylint: disable-msg=E0611
|
||||||
except ImportError: # python 2
|
except ImportError:
|
||||||
from itertools import izip_longest as izip_longest, repeat
|
from itertools import izip_longest as izip_longest, repeat # python 2
|
||||||
import logging
|
import logging
|
||||||
|
try:
|
||||||
|
import queue # python 3
|
||||||
|
except ImportError:
|
||||||
|
import Queue as queue # python 2
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import six
|
import six
|
||||||
|
|
||||||
try:
|
|
||||||
from Queue import Empty, Queue
|
|
||||||
except ImportError: # python 2
|
|
||||||
from queue import Empty, Queue
|
|
||||||
|
|
||||||
from kafka.common import (
|
|
||||||
FetchRequest, OffsetRequest,
|
|
||||||
ConsumerFetchSizeTooSmall, ConsumerNoMoreData
|
|
||||||
)
|
|
||||||
from .base import (
|
from .base import (
|
||||||
Consumer,
|
Consumer,
|
||||||
FETCH_DEFAULT_BLOCK_TIMEOUT,
|
FETCH_DEFAULT_BLOCK_TIMEOUT,
|
||||||
@@ -30,8 +26,16 @@ from .base import (
|
|||||||
ITER_TIMEOUT_SECONDS,
|
ITER_TIMEOUT_SECONDS,
|
||||||
NO_MESSAGES_WAIT_TIME_SECONDS
|
NO_MESSAGES_WAIT_TIME_SECONDS
|
||||||
)
|
)
|
||||||
|
from ..common import (
|
||||||
|
FetchRequest, KafkaError, OffsetRequest,
|
||||||
|
ConsumerFetchSizeTooSmall, ConsumerNoMoreData,
|
||||||
|
UnknownTopicOrPartitionError, NotLeaderForPartitionError,
|
||||||
|
OffsetOutOfRangeError, FailedPayloadsError, check_error
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
|
||||||
|
|
||||||
class FetchContext(object):
|
class FetchContext(object):
|
||||||
"""
|
"""
|
||||||
@@ -70,6 +74,8 @@ class SimpleConsumer(Consumer):
|
|||||||
Arguments:
|
Arguments:
|
||||||
client: a connected KafkaClient
|
client: a connected KafkaClient
|
||||||
group: a name for this consumer, used for offset storage and must be unique
|
group: a name for this consumer, used for offset storage and must be unique
|
||||||
|
If you are connecting to a server that does not support offset
|
||||||
|
commit/fetch (any prior to 0.8.1.1), then you *must* set this to None
|
||||||
topic: the topic to consume
|
topic: the topic to consume
|
||||||
|
|
||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
@@ -94,6 +100,10 @@ class SimpleConsumer(Consumer):
|
|||||||
message in the iterator before exiting. None means no
|
message in the iterator before exiting. None means no
|
||||||
timeout, so it will wait forever.
|
timeout, so it will wait forever.
|
||||||
|
|
||||||
|
auto_offset_reset: default largest. Reset partition offsets upon
|
||||||
|
OffsetOutOfRangeError. Valid values are largest and smallest.
|
||||||
|
Otherwise, do not reset the offsets and raise OffsetOutOfRangeError.
|
||||||
|
|
||||||
Auto commit details:
|
Auto commit details:
|
||||||
If both auto_commit_every_n and auto_commit_every_t are set, they will
|
If both auto_commit_every_n and auto_commit_every_t are set, they will
|
||||||
reset one another when one is triggered. These triggers simply call the
|
reset one another when one is triggered. These triggers simply call the
|
||||||
@@ -106,7 +116,8 @@ class SimpleConsumer(Consumer):
|
|||||||
fetch_size_bytes=FETCH_MIN_BYTES,
|
fetch_size_bytes=FETCH_MIN_BYTES,
|
||||||
buffer_size=FETCH_BUFFER_SIZE_BYTES,
|
buffer_size=FETCH_BUFFER_SIZE_BYTES,
|
||||||
max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES,
|
max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES,
|
||||||
iter_timeout=None):
|
iter_timeout=None,
|
||||||
|
auto_offset_reset='largest'):
|
||||||
super(SimpleConsumer, self).__init__(
|
super(SimpleConsumer, self).__init__(
|
||||||
client, group, topic,
|
client, group, topic,
|
||||||
partitions=partitions,
|
partitions=partitions,
|
||||||
@@ -115,55 +126,117 @@ class SimpleConsumer(Consumer):
|
|||||||
auto_commit_every_t=auto_commit_every_t)
|
auto_commit_every_t=auto_commit_every_t)
|
||||||
|
|
||||||
if max_buffer_size is not None and buffer_size > max_buffer_size:
|
if max_buffer_size is not None and buffer_size > max_buffer_size:
|
||||||
raise ValueError("buffer_size (%d) is greater than "
|
raise ValueError('buffer_size (%d) is greater than '
|
||||||
"max_buffer_size (%d)" %
|
'max_buffer_size (%d)' %
|
||||||
(buffer_size, max_buffer_size))
|
(buffer_size, max_buffer_size))
|
||||||
self.buffer_size = buffer_size
|
self.buffer_size = buffer_size
|
||||||
self.max_buffer_size = max_buffer_size
|
self.max_buffer_size = max_buffer_size
|
||||||
self.partition_info = False # Do not return partition info in msgs
|
|
||||||
self.fetch_max_wait_time = FETCH_MAX_WAIT_TIME
|
self.fetch_max_wait_time = FETCH_MAX_WAIT_TIME
|
||||||
self.fetch_min_bytes = fetch_size_bytes
|
self.fetch_min_bytes = fetch_size_bytes
|
||||||
self.fetch_offsets = self.offsets.copy()
|
self.fetch_offsets = self.offsets.copy()
|
||||||
self.iter_timeout = iter_timeout
|
self.iter_timeout = iter_timeout
|
||||||
self.queue = Queue()
|
self.auto_offset_reset = auto_offset_reset
|
||||||
|
self.queue = queue.Queue()
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<SimpleConsumer group=%s, topic=%s, partitions=%s>' % \
|
return '<SimpleConsumer group=%s, topic=%s, partitions=%s>' % \
|
||||||
(self.group, self.topic, str(self.offsets.keys()))
|
(self.group, self.topic, str(self.offsets.keys()))
|
||||||
|
|
||||||
def provide_partition_info(self):
|
def reset_partition_offset(self, partition):
|
||||||
"""
|
"""Update offsets using auto_offset_reset policy (smallest|largest)
|
||||||
Indicates that partition info must be returned by the consumer
|
|
||||||
"""
|
|
||||||
self.partition_info = True
|
|
||||||
|
|
||||||
def seek(self, offset, whence):
|
Arguments:
|
||||||
|
partition (int): the partition for which offsets should be updated
|
||||||
|
|
||||||
|
Returns: Updated offset on success, None on failure
|
||||||
|
"""
|
||||||
|
LATEST = -1
|
||||||
|
EARLIEST = -2
|
||||||
|
if self.auto_offset_reset == 'largest':
|
||||||
|
reqs = [OffsetRequest(self.topic, partition, LATEST, 1)]
|
||||||
|
elif self.auto_offset_reset == 'smallest':
|
||||||
|
reqs = [OffsetRequest(self.topic, partition, EARLIEST, 1)]
|
||||||
|
else:
|
||||||
|
# Let's raise an reasonable exception type if user calls
|
||||||
|
# outside of an exception context
|
||||||
|
if sys.exc_info() == (None, None, None):
|
||||||
|
raise OffsetOutOfRangeError('Cannot reset partition offsets without a '
|
||||||
|
'valid auto_offset_reset setting '
|
||||||
|
'(largest|smallest)')
|
||||||
|
# Otherwise we should re-raise the upstream exception
|
||||||
|
# b/c it typically includes additional data about
|
||||||
|
# the request that triggered it, and we do not want to drop that
|
||||||
|
raise # pylint: disable-msg=E0704
|
||||||
|
|
||||||
|
# send_offset_request
|
||||||
|
log.info('Resetting topic-partition offset to %s for %s:%d',
|
||||||
|
self.auto_offset_reset, self.topic, partition)
|
||||||
|
try:
|
||||||
|
(resp, ) = self.client.send_offset_request(reqs)
|
||||||
|
except KafkaError as e:
|
||||||
|
log.error('%s sending offset request for %s:%d',
|
||||||
|
e.__class__.__name__, self.topic, partition)
|
||||||
|
else:
|
||||||
|
self.offsets[partition] = resp.offsets[0]
|
||||||
|
self.fetch_offsets[partition] = resp.offsets[0]
|
||||||
|
return resp.offsets[0]
|
||||||
|
|
||||||
|
def seek(self, offset, whence=None, partition=None):
|
||||||
"""
|
"""
|
||||||
Alter the current offset in the consumer, similar to fseek
|
Alter the current offset in the consumer, similar to fseek
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
offset: how much to modify the offset
|
offset: how much to modify the offset
|
||||||
whence: where to modify it from
|
whence: where to modify it from, default is None
|
||||||
|
|
||||||
* 0 is relative to the earliest available offset (head)
|
* None is an absolute offset
|
||||||
* 1 is relative to the current offset
|
* 0 is relative to the earliest available offset (head)
|
||||||
* 2 is relative to the latest known offset (tail)
|
* 1 is relative to the current offset
|
||||||
|
* 2 is relative to the latest known offset (tail)
|
||||||
|
|
||||||
|
partition: modify which partition, default is None.
|
||||||
|
If partition is None, would modify all partitions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if whence == 1: # relative to current position
|
if whence is None: # set an absolute offset
|
||||||
for partition, _offset in self.offsets.items():
|
if partition is None:
|
||||||
self.offsets[partition] = _offset + offset
|
for tmp_partition in self.offsets:
|
||||||
|
self.offsets[tmp_partition] = offset
|
||||||
|
else:
|
||||||
|
self.offsets[partition] = offset
|
||||||
|
elif whence == 1: # relative to current position
|
||||||
|
if partition is None:
|
||||||
|
for tmp_partition, _offset in self.offsets.items():
|
||||||
|
self.offsets[tmp_partition] = _offset + offset
|
||||||
|
else:
|
||||||
|
self.offsets[partition] += offset
|
||||||
elif whence in (0, 2): # relative to beginning or end
|
elif whence in (0, 2): # relative to beginning or end
|
||||||
# divide the request offset by number of partitions,
|
|
||||||
# distribute the remained evenly
|
|
||||||
(delta, rem) = divmod(offset, len(self.offsets))
|
|
||||||
deltas = {}
|
|
||||||
for partition, r in izip_longest(self.offsets.keys(),
|
|
||||||
repeat(1, rem), fillvalue=0):
|
|
||||||
deltas[partition] = delta + r
|
|
||||||
|
|
||||||
reqs = []
|
reqs = []
|
||||||
for partition in self.offsets.keys():
|
deltas = {}
|
||||||
|
if partition is None:
|
||||||
|
# divide the request offset by number of partitions,
|
||||||
|
# distribute the remained evenly
|
||||||
|
(delta, rem) = divmod(offset, len(self.offsets))
|
||||||
|
for tmp_partition, r in izip_longest(self.offsets.keys(),
|
||||||
|
repeat(1, rem),
|
||||||
|
fillvalue=0):
|
||||||
|
deltas[tmp_partition] = delta + r
|
||||||
|
|
||||||
|
for tmp_partition in self.offsets.keys():
|
||||||
|
if whence == 0:
|
||||||
|
reqs.append(OffsetRequest(self.topic,
|
||||||
|
tmp_partition,
|
||||||
|
-2,
|
||||||
|
1))
|
||||||
|
elif whence == 2:
|
||||||
|
reqs.append(OffsetRequest(self.topic,
|
||||||
|
tmp_partition,
|
||||||
|
-1,
|
||||||
|
1))
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
deltas[partition] = offset
|
||||||
if whence == 0:
|
if whence == 0:
|
||||||
reqs.append(OffsetRequest(self.topic, partition, -2, 1))
|
reqs.append(OffsetRequest(self.topic, partition, -2, 1))
|
||||||
elif whence == 2:
|
elif whence == 2:
|
||||||
@@ -176,15 +249,15 @@ class SimpleConsumer(Consumer):
|
|||||||
self.offsets[resp.partition] = \
|
self.offsets[resp.partition] = \
|
||||||
resp.offsets[0] + deltas[resp.partition]
|
resp.offsets[0] + deltas[resp.partition]
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unexpected value for `whence`, %d" % whence)
|
raise ValueError('Unexpected value for `whence`, %d' % whence)
|
||||||
|
|
||||||
# Reset queue and fetch offsets since they are invalid
|
# Reset queue and fetch offsets since they are invalid
|
||||||
self.fetch_offsets = self.offsets.copy()
|
self.fetch_offsets = self.offsets.copy()
|
||||||
|
self.count_since_commit += 1
|
||||||
if self.auto_commit:
|
if self.auto_commit:
|
||||||
self.count_since_commit += 1
|
|
||||||
self.commit()
|
self.commit()
|
||||||
|
|
||||||
self.queue = Queue()
|
self.queue = queue.Queue()
|
||||||
|
|
||||||
def get_messages(self, count=1, block=True, timeout=0.1):
|
def get_messages(self, count=1, block=True, timeout=0.1):
|
||||||
"""
|
"""
|
||||||
@@ -192,42 +265,42 @@ class SimpleConsumer(Consumer):
|
|||||||
|
|
||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
count: Indicates the maximum number of messages to be fetched
|
count: Indicates the maximum number of messages to be fetched
|
||||||
block: If True, the API will block till some messages are fetched.
|
block: If True, the API will block till all messages are fetched.
|
||||||
timeout: If block is True, the function will block for the specified
|
If block is a positive integer the API will block until that
|
||||||
time (in seconds) until count messages is fetched. If None,
|
many messages are fetched.
|
||||||
it will block forever.
|
timeout: When blocking is requested the function will block for
|
||||||
|
the specified time (in seconds) until count messages is
|
||||||
|
fetched. If None, it will block forever.
|
||||||
"""
|
"""
|
||||||
messages = []
|
messages = []
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
max_time = time.time() + timeout
|
timeout += time.time()
|
||||||
|
|
||||||
new_offsets = {}
|
new_offsets = {}
|
||||||
while count > 0 and (timeout is None or timeout > 0):
|
log.debug('getting %d messages', count)
|
||||||
result = self._get_message(block, timeout, get_partition_info=True,
|
while len(messages) < count:
|
||||||
|
block_time = timeout - time.time()
|
||||||
|
log.debug('calling _get_message block=%s timeout=%s', block, block_time)
|
||||||
|
block_next_call = block is True or block > len(messages)
|
||||||
|
result = self._get_message(block_next_call, block_time,
|
||||||
|
get_partition_info=True,
|
||||||
update_offset=False)
|
update_offset=False)
|
||||||
if result:
|
log.debug('got %s from _get_messages', result)
|
||||||
partition, message = result
|
if not result:
|
||||||
if self.partition_info:
|
if block_next_call and (timeout is None or time.time() <= timeout):
|
||||||
messages.append(result)
|
continue
|
||||||
else:
|
break
|
||||||
messages.append(message)
|
|
||||||
new_offsets[partition] = message.offset + 1
|
|
||||||
count -= 1
|
|
||||||
else:
|
|
||||||
# Ran out of messages for the last request.
|
|
||||||
if not block:
|
|
||||||
# If we're not blocking, break.
|
|
||||||
break
|
|
||||||
|
|
||||||
# If we have a timeout, reduce it to the
|
partition, message = result
|
||||||
# appropriate value
|
_msg = (partition, message) if self.partition_info else message
|
||||||
if timeout is not None:
|
messages.append(_msg)
|
||||||
timeout = max_time - time.time()
|
new_offsets[partition] = message.offset + 1
|
||||||
|
|
||||||
# Update and commit offsets if necessary
|
# Update and commit offsets if necessary
|
||||||
self.offsets.update(new_offsets)
|
self.offsets.update(new_offsets)
|
||||||
self.count_since_commit += len(messages)
|
self.count_since_commit += len(messages)
|
||||||
self._auto_commit()
|
self._auto_commit()
|
||||||
|
log.debug('got %d messages: %s', len(messages), messages)
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
def get_message(self, block=True, timeout=0.1, get_partition_info=None):
|
def get_message(self, block=True, timeout=0.1, get_partition_info=None):
|
||||||
@@ -241,10 +314,16 @@ class SimpleConsumer(Consumer):
|
|||||||
If get_partition_info is True, returns (partition, message)
|
If get_partition_info is True, returns (partition, message)
|
||||||
If get_partition_info is False, returns message
|
If get_partition_info is False, returns message
|
||||||
"""
|
"""
|
||||||
if self.queue.empty():
|
start_at = time.time()
|
||||||
|
while self.queue.empty():
|
||||||
# We're out of messages, go grab some more.
|
# We're out of messages, go grab some more.
|
||||||
|
log.debug('internal queue empty, fetching more messages')
|
||||||
with FetchContext(self, block, timeout):
|
with FetchContext(self, block, timeout):
|
||||||
self._fetch()
|
self._fetch()
|
||||||
|
|
||||||
|
if not block or time.time() > (start_at + timeout):
|
||||||
|
break
|
||||||
|
|
||||||
try:
|
try:
|
||||||
partition, message = self.queue.get_nowait()
|
partition, message = self.queue.get_nowait()
|
||||||
|
|
||||||
@@ -262,7 +341,8 @@ class SimpleConsumer(Consumer):
|
|||||||
return partition, message
|
return partition, message
|
||||||
else:
|
else:
|
||||||
return message
|
return message
|
||||||
except Empty:
|
except queue.Empty:
|
||||||
|
log.debug('internal queue empty after fetch - returning None')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
@@ -297,21 +377,55 @@ class SimpleConsumer(Consumer):
|
|||||||
responses = self.client.send_fetch_request(
|
responses = self.client.send_fetch_request(
|
||||||
requests,
|
requests,
|
||||||
max_wait_time=int(self.fetch_max_wait_time),
|
max_wait_time=int(self.fetch_max_wait_time),
|
||||||
min_bytes=self.fetch_min_bytes)
|
min_bytes=self.fetch_min_bytes,
|
||||||
|
fail_on_error=False
|
||||||
|
)
|
||||||
|
|
||||||
retry_partitions = {}
|
retry_partitions = {}
|
||||||
for resp in responses:
|
for resp in responses:
|
||||||
|
|
||||||
|
try:
|
||||||
|
check_error(resp)
|
||||||
|
except UnknownTopicOrPartitionError:
|
||||||
|
log.error('UnknownTopicOrPartitionError for %s:%d',
|
||||||
|
resp.topic, resp.partition)
|
||||||
|
self.client.reset_topic_metadata(resp.topic)
|
||||||
|
raise
|
||||||
|
except NotLeaderForPartitionError:
|
||||||
|
log.error('NotLeaderForPartitionError for %s:%d',
|
||||||
|
resp.topic, resp.partition)
|
||||||
|
self.client.reset_topic_metadata(resp.topic)
|
||||||
|
continue
|
||||||
|
except OffsetOutOfRangeError:
|
||||||
|
log.warning('OffsetOutOfRangeError for %s:%d. '
|
||||||
|
'Resetting partition offset...',
|
||||||
|
resp.topic, resp.partition)
|
||||||
|
self.reset_partition_offset(resp.partition)
|
||||||
|
# Retry this partition
|
||||||
|
retry_partitions[resp.partition] = partitions[resp.partition]
|
||||||
|
continue
|
||||||
|
except FailedPayloadsError as e:
|
||||||
|
log.warning('FailedPayloadsError for %s:%d',
|
||||||
|
e.payload.topic, e.payload.partition)
|
||||||
|
# Retry this partition
|
||||||
|
retry_partitions[e.payload.partition] = partitions[e.payload.partition]
|
||||||
|
continue
|
||||||
|
|
||||||
partition = resp.partition
|
partition = resp.partition
|
||||||
buffer_size = partitions[partition]
|
buffer_size = partitions[partition]
|
||||||
try:
|
try:
|
||||||
for message in resp.messages:
|
for message in resp.messages:
|
||||||
|
if message.offset < self.fetch_offsets[partition]:
|
||||||
|
log.debug('Skipping message %s because its offset is less than the consumer offset',
|
||||||
|
message)
|
||||||
|
continue
|
||||||
# Put the message in our queue
|
# Put the message in our queue
|
||||||
self.queue.put((partition, message))
|
self.queue.put((partition, message))
|
||||||
self.fetch_offsets[partition] = message.offset + 1
|
self.fetch_offsets[partition] = message.offset + 1
|
||||||
except ConsumerFetchSizeTooSmall:
|
except ConsumerFetchSizeTooSmall:
|
||||||
if (self.max_buffer_size is not None and
|
if (self.max_buffer_size is not None and
|
||||||
buffer_size == self.max_buffer_size):
|
buffer_size == self.max_buffer_size):
|
||||||
log.error("Max fetch size %d too small",
|
log.error('Max fetch size %d too small',
|
||||||
self.max_buffer_size)
|
self.max_buffer_size)
|
||||||
raise
|
raise
|
||||||
if self.max_buffer_size is None:
|
if self.max_buffer_size is None:
|
||||||
@@ -319,12 +433,12 @@ class SimpleConsumer(Consumer):
|
|||||||
else:
|
else:
|
||||||
buffer_size = min(buffer_size * 2,
|
buffer_size = min(buffer_size * 2,
|
||||||
self.max_buffer_size)
|
self.max_buffer_size)
|
||||||
log.warn("Fetch size too small, increase to %d (2x) "
|
log.warning('Fetch size too small, increase to %d (2x) '
|
||||||
"and retry", buffer_size)
|
'and retry', buffer_size)
|
||||||
retry_partitions[partition] = buffer_size
|
retry_partitions[partition] = buffer_size
|
||||||
except ConsumerNoMoreData as e:
|
except ConsumerNoMoreData as e:
|
||||||
log.debug("Iteration was ended by %r", e)
|
log.debug('Iteration was ended by %r', e)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
# Stop iterating through this partition
|
# Stop iterating through this partition
|
||||||
log.debug("Done iterating over partition %s" % partition)
|
log.debug('Done iterating over partition %s', partition)
|
||||||
partitions = retry_partitions
|
partitions = retry_partitions
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from .roundrobin import RoundRobinPartitioner
|
from .roundrobin import RoundRobinPartitioner
|
||||||
from .hashed import HashedPartitioner
|
from .hashed import HashedPartitioner, Murmur2Partitioner, LegacyPartitioner
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'RoundRobinPartitioner', 'HashedPartitioner'
|
'RoundRobinPartitioner', 'HashedPartitioner', 'Murmur2Partitioner',
|
||||||
|
'LegacyPartitioner'
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -12,14 +12,13 @@ class Partitioner(object):
|
|||||||
"""
|
"""
|
||||||
self.partitions = partitions
|
self.partitions = partitions
|
||||||
|
|
||||||
def partition(self, key, partitions):
|
def partition(self, key, partitions=None):
|
||||||
"""
|
"""
|
||||||
Takes a string key and num_partitions as argument and returns
|
Takes a string key and num_partitions as argument and returns
|
||||||
a partition to be used for the message
|
a partition to be used for the message
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
partitions: The list of partitions is passed in every call. This
|
key: the key to use for partitioning
|
||||||
may look like an overhead, but it will be useful
|
partitions: (optional) a list of partitions.
|
||||||
(in future) when we handle cases like rebalancing
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError('partition function has to be implemented')
|
raise NotImplementedError('partition function has to be implemented')
|
||||||
|
|||||||
@@ -1,12 +1,110 @@
|
|||||||
|
import six
|
||||||
|
|
||||||
from .base import Partitioner
|
from .base import Partitioner
|
||||||
|
|
||||||
class HashedPartitioner(Partitioner):
|
|
||||||
|
class Murmur2Partitioner(Partitioner):
|
||||||
"""
|
"""
|
||||||
|
Implements a partitioner which selects the target partition based on
|
||||||
|
the hash of the key. Attempts to apply the same hashing
|
||||||
|
function as mainline java client.
|
||||||
|
"""
|
||||||
|
def partition(self, key, partitions=None):
|
||||||
|
if not partitions:
|
||||||
|
partitions = self.partitions
|
||||||
|
|
||||||
|
# https://github.com/apache/kafka/blob/0.8.2/clients/src/main/java/org/apache/kafka/clients/producer/internals/Partitioner.java#L69
|
||||||
|
idx = (murmur2(key) & 0x7fffffff) % len(partitions)
|
||||||
|
|
||||||
|
return partitions[idx]
|
||||||
|
|
||||||
|
|
||||||
|
class LegacyPartitioner(Partitioner):
|
||||||
|
"""DEPRECATED -- See Issue 374
|
||||||
|
|
||||||
Implements a partitioner which selects the target partition based on
|
Implements a partitioner which selects the target partition based on
|
||||||
the hash of the key
|
the hash of the key
|
||||||
"""
|
"""
|
||||||
def partition(self, key, partitions):
|
def partition(self, key, partitions=None):
|
||||||
|
if not partitions:
|
||||||
|
partitions = self.partitions
|
||||||
size = len(partitions)
|
size = len(partitions)
|
||||||
idx = hash(key) % size
|
idx = hash(key) % size
|
||||||
|
|
||||||
return partitions[idx]
|
return partitions[idx]
|
||||||
|
|
||||||
|
|
||||||
|
# Default will change to Murmur2 in 0.10 release
|
||||||
|
HashedPartitioner = LegacyPartitioner
|
||||||
|
|
||||||
|
|
||||||
|
# https://github.com/apache/kafka/blob/0.8.2/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L244
|
||||||
|
def murmur2(key):
|
||||||
|
"""Pure-python Murmur2 implementation.
|
||||||
|
|
||||||
|
Based on java client, see org.apache.kafka.common.utils.Utils.murmur2
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: if not a bytes type, encoded using default encoding
|
||||||
|
|
||||||
|
Returns: MurmurHash2 of key bytearray
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Convert key to bytes or bytearray
|
||||||
|
if isinstance(key, bytearray) or (six.PY3 and isinstance(key, bytes)):
|
||||||
|
data = key
|
||||||
|
else:
|
||||||
|
data = bytearray(str(key).encode())
|
||||||
|
|
||||||
|
length = len(data)
|
||||||
|
seed = 0x9747b28c
|
||||||
|
# 'm' and 'r' are mixing constants generated offline.
|
||||||
|
# They're not really 'magic', they just happen to work well.
|
||||||
|
m = 0x5bd1e995
|
||||||
|
r = 24
|
||||||
|
|
||||||
|
# Initialize the hash to a random value
|
||||||
|
h = seed ^ length
|
||||||
|
length4 = length // 4
|
||||||
|
|
||||||
|
for i in range(length4):
|
||||||
|
i4 = i * 4
|
||||||
|
k = ((data[i4 + 0] & 0xff) +
|
||||||
|
((data[i4 + 1] & 0xff) << 8) +
|
||||||
|
((data[i4 + 2] & 0xff) << 16) +
|
||||||
|
((data[i4 + 3] & 0xff) << 24))
|
||||||
|
k &= 0xffffffff
|
||||||
|
k *= m
|
||||||
|
k &= 0xffffffff
|
||||||
|
k ^= (k % 0x100000000) >> r # k ^= k >>> r
|
||||||
|
k &= 0xffffffff
|
||||||
|
k *= m
|
||||||
|
k &= 0xffffffff
|
||||||
|
|
||||||
|
h *= m
|
||||||
|
h &= 0xffffffff
|
||||||
|
h ^= k
|
||||||
|
h &= 0xffffffff
|
||||||
|
|
||||||
|
# Handle the last few bytes of the input array
|
||||||
|
extra_bytes = length % 4
|
||||||
|
if extra_bytes >= 3:
|
||||||
|
h ^= (data[(length & ~3) + 2] & 0xff) << 16
|
||||||
|
h &= 0xffffffff
|
||||||
|
if extra_bytes >= 2:
|
||||||
|
h ^= (data[(length & ~3) + 1] & 0xff) << 8
|
||||||
|
h &= 0xffffffff
|
||||||
|
if extra_bytes >= 1:
|
||||||
|
h ^= (data[length & ~3] & 0xff)
|
||||||
|
h &= 0xffffffff
|
||||||
|
h *= m
|
||||||
|
h &= 0xffffffff
|
||||||
|
|
||||||
|
h ^= (h % 0x100000000) >> 13 # h >>> 13;
|
||||||
|
h &= 0xffffffff
|
||||||
|
h *= m
|
||||||
|
h &= 0xffffffff
|
||||||
|
h ^= (h % 0x100000000) >> 15 # h >>> 15;
|
||||||
|
h &= 0xffffffff
|
||||||
|
|
||||||
|
return h
|
||||||
|
|||||||
@@ -15,9 +15,9 @@ class RoundRobinPartitioner(Partitioner):
|
|||||||
self.partitions = partitions
|
self.partitions = partitions
|
||||||
self.iterpart = cycle(partitions)
|
self.iterpart = cycle(partitions)
|
||||||
|
|
||||||
def partition(self, key, partitions):
|
def partition(self, key, partitions=None):
|
||||||
# Refresh the partition list if necessary
|
# Refresh the partition list if necessary
|
||||||
if self.partitions != partitions:
|
if partitions and self.partitions != partitions:
|
||||||
self._set_partitions(partitions)
|
self._set_partitions(partitions)
|
||||||
|
|
||||||
return next(self.iterpart)
|
return next(self.iterpart)
|
||||||
|
|||||||
@@ -1,84 +1,223 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import atexit
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from queue import Empty
|
from queue import Empty, Full, Queue
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from Queue import Empty
|
from Queue import Empty, Full, Queue
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from multiprocessing import Queue, Process
|
|
||||||
|
from threading import Thread, Event
|
||||||
|
|
||||||
import six
|
import six
|
||||||
|
|
||||||
from kafka.common import (
|
from kafka.common import (
|
||||||
ProduceRequest, TopicAndPartition, UnsupportedCodecError
|
ProduceRequest, ProduceResponse, TopicAndPartition, RetryOptions,
|
||||||
|
kafka_errors, UnsupportedCodecError, FailedPayloadsError,
|
||||||
|
RequestTimedOutError, AsyncProducerQueueFull, UnknownError,
|
||||||
|
RETRY_ERROR_TYPES, RETRY_BACKOFF_ERROR_TYPES, RETRY_REFRESH_ERROR_TYPES
|
||||||
)
|
)
|
||||||
from kafka.protocol import CODEC_NONE, ALL_CODECS, create_message_set
|
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
from kafka.protocol import CODEC_NONE, ALL_CODECS, create_message_set
|
||||||
|
from kafka.util import kafka_bytestring
|
||||||
|
|
||||||
|
log = logging.getLogger('kafka.producer')
|
||||||
|
|
||||||
BATCH_SEND_DEFAULT_INTERVAL = 20
|
BATCH_SEND_DEFAULT_INTERVAL = 20
|
||||||
BATCH_SEND_MSG_COUNT = 20
|
BATCH_SEND_MSG_COUNT = 20
|
||||||
|
|
||||||
|
# unlimited
|
||||||
|
ASYNC_QUEUE_MAXSIZE = 0
|
||||||
|
ASYNC_QUEUE_PUT_TIMEOUT = 0
|
||||||
|
# unlimited retries by default
|
||||||
|
ASYNC_RETRY_LIMIT = None
|
||||||
|
ASYNC_RETRY_BACKOFF_MS = 100
|
||||||
|
ASYNC_RETRY_ON_TIMEOUTS = True
|
||||||
|
ASYNC_LOG_MESSAGES_ON_ERROR = True
|
||||||
|
|
||||||
STOP_ASYNC_PRODUCER = -1
|
STOP_ASYNC_PRODUCER = -1
|
||||||
|
ASYNC_STOP_TIMEOUT_SECS = 30
|
||||||
|
|
||||||
|
SYNC_FAIL_ON_ERROR_DEFAULT = True
|
||||||
|
|
||||||
|
|
||||||
def _send_upstream(queue, client, codec, batch_time, batch_size,
|
def _send_upstream(queue, client, codec, batch_time, batch_size,
|
||||||
req_acks, ack_timeout):
|
req_acks, ack_timeout, retry_options, stop_event,
|
||||||
"""
|
log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR,
|
||||||
Listen on the queue for a specified number of messages or till
|
stop_timeout=ASYNC_STOP_TIMEOUT_SECS,
|
||||||
a specified timeout and send them upstream to the brokers in one
|
codec_compresslevel=None):
|
||||||
request
|
"""Private method to manage producing messages asynchronously
|
||||||
|
|
||||||
NOTE: Ideally, this should have been a method inside the Producer
|
Listens on the queue for a specified number of messages or until
|
||||||
class. However, multiprocessing module has issues in windows. The
|
a specified timeout and then sends messages to the brokers in grouped
|
||||||
functionality breaks unless this function is kept outside of a class
|
requests (one per broker).
|
||||||
"""
|
|
||||||
stop = False
|
Messages placed on the queue should be tuples that conform to this format:
|
||||||
client.reinit()
|
((topic, partition), message, key)
|
||||||
|
|
||||||
|
Currently does not mark messages with task_done. Do not attempt to join()!
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
queue (threading.Queue): the queue from which to get messages
|
||||||
|
client (KafkaClient): instance to use for communicating with brokers
|
||||||
|
codec (kafka.protocol.ALL_CODECS): compression codec to use
|
||||||
|
batch_time (int): interval in seconds to send message batches
|
||||||
|
batch_size (int): count of messages that will trigger an immediate send
|
||||||
|
req_acks: required acks to use with ProduceRequests. see server protocol
|
||||||
|
ack_timeout: timeout to wait for required acks. see server protocol
|
||||||
|
retry_options (RetryOptions): settings for retry limits, backoff etc
|
||||||
|
stop_event (threading.Event): event to monitor for shutdown signal.
|
||||||
|
when this event is 'set', the producer will stop sending messages.
|
||||||
|
log_messages_on_error (bool, optional): log stringified message-contents
|
||||||
|
on any produce error, otherwise only log a hash() of the contents,
|
||||||
|
defaults to True.
|
||||||
|
stop_timeout (int or float, optional): number of seconds to continue
|
||||||
|
retrying messages after stop_event is set, defaults to 30.
|
||||||
|
"""
|
||||||
|
request_tries = {}
|
||||||
|
|
||||||
|
while not stop_event.is_set():
|
||||||
|
try:
|
||||||
|
client.reinit()
|
||||||
|
except Exception as e:
|
||||||
|
log.warn('Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms)
|
||||||
|
time.sleep(float(retry_options.backoff_ms) / 1000)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
stop_at = None
|
||||||
|
while not (stop_event.is_set() and queue.empty() and not request_tries):
|
||||||
|
|
||||||
|
# Handle stop_timeout
|
||||||
|
if stop_event.is_set():
|
||||||
|
if not stop_at:
|
||||||
|
stop_at = stop_timeout + time.time()
|
||||||
|
if time.time() > stop_at:
|
||||||
|
log.debug('Async producer stopping due to stop_timeout')
|
||||||
|
break
|
||||||
|
|
||||||
while not stop:
|
|
||||||
timeout = batch_time
|
timeout = batch_time
|
||||||
count = batch_size
|
count = batch_size
|
||||||
send_at = time.time() + timeout
|
send_at = time.time() + timeout
|
||||||
msgset = defaultdict(list)
|
msgset = defaultdict(list)
|
||||||
|
|
||||||
|
# Merging messages will require a bit more work to manage correctly
|
||||||
|
# for now, dont look for new batches if we have old ones to retry
|
||||||
|
if request_tries:
|
||||||
|
count = 0
|
||||||
|
log.debug('Skipping new batch collection to handle retries')
|
||||||
|
else:
|
||||||
|
log.debug('Batching size: %s, timeout: %s', count, timeout)
|
||||||
|
|
||||||
# Keep fetching till we gather enough messages or a
|
# Keep fetching till we gather enough messages or a
|
||||||
# timeout is reached
|
# timeout is reached
|
||||||
while count > 0 and timeout >= 0:
|
while count > 0 and timeout >= 0:
|
||||||
try:
|
try:
|
||||||
topic_partition, msg, key = queue.get(timeout=timeout)
|
topic_partition, msg, key = queue.get(timeout=timeout)
|
||||||
|
|
||||||
except Empty:
|
except Empty:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Check if the controller has requested us to stop
|
# Check if the controller has requested us to stop
|
||||||
if topic_partition == STOP_ASYNC_PRODUCER:
|
if topic_partition == STOP_ASYNC_PRODUCER:
|
||||||
stop = True
|
stop_event.set()
|
||||||
break
|
break
|
||||||
|
|
||||||
# Adjust the timeout to match the remaining period
|
# Adjust the timeout to match the remaining period
|
||||||
count -= 1
|
count -= 1
|
||||||
timeout = send_at - time.time()
|
timeout = send_at - time.time()
|
||||||
msgset[topic_partition].append(msg)
|
msgset[topic_partition].append((msg, key))
|
||||||
|
|
||||||
# Send collected requests upstream
|
# Send collected requests upstream
|
||||||
reqs = []
|
|
||||||
for topic_partition, msg in msgset.items():
|
for topic_partition, msg in msgset.items():
|
||||||
messages = create_message_set(msg, codec, key)
|
messages = create_message_set(msg, codec, key, codec_compresslevel)
|
||||||
req = ProduceRequest(topic_partition.topic,
|
req = ProduceRequest(topic_partition.topic,
|
||||||
topic_partition.partition,
|
topic_partition.partition,
|
||||||
messages)
|
tuple(messages))
|
||||||
reqs.append(req)
|
request_tries[req] = 0
|
||||||
|
|
||||||
try:
|
if not request_tries:
|
||||||
client.send_produce_request(reqs,
|
continue
|
||||||
acks=req_acks,
|
|
||||||
timeout=ack_timeout)
|
reqs_to_retry, error_cls = [], None
|
||||||
except Exception:
|
retry_state = {
|
||||||
log.exception("Unable to send message")
|
'do_backoff': False,
|
||||||
|
'do_refresh': False
|
||||||
|
}
|
||||||
|
|
||||||
|
def _handle_error(error_cls, request):
|
||||||
|
if issubclass(error_cls, RETRY_ERROR_TYPES) or (retry_options.retry_on_timeouts and issubclass(error_cls, RequestTimedOutError)):
|
||||||
|
reqs_to_retry.append(request)
|
||||||
|
if issubclass(error_cls, RETRY_BACKOFF_ERROR_TYPES):
|
||||||
|
retry_state['do_backoff'] |= True
|
||||||
|
if issubclass(error_cls, RETRY_REFRESH_ERROR_TYPES):
|
||||||
|
retry_state['do_refresh'] |= True
|
||||||
|
|
||||||
|
requests = list(request_tries.keys())
|
||||||
|
log.debug('Sending: %s', requests)
|
||||||
|
responses = client.send_produce_request(requests,
|
||||||
|
acks=req_acks,
|
||||||
|
timeout=ack_timeout,
|
||||||
|
fail_on_error=False)
|
||||||
|
|
||||||
|
log.debug('Received: %s', responses)
|
||||||
|
for i, response in enumerate(responses):
|
||||||
|
error_cls = None
|
||||||
|
if isinstance(response, FailedPayloadsError):
|
||||||
|
error_cls = response.__class__
|
||||||
|
orig_req = response.payload
|
||||||
|
|
||||||
|
elif isinstance(response, ProduceResponse) and response.error:
|
||||||
|
error_cls = kafka_errors.get(response.error, UnknownError)
|
||||||
|
orig_req = requests[i]
|
||||||
|
|
||||||
|
if error_cls:
|
||||||
|
_handle_error(error_cls, orig_req)
|
||||||
|
log.error('%s sending ProduceRequest (#%d of %d) '
|
||||||
|
'to %s:%d with msgs %s',
|
||||||
|
error_cls.__name__, (i + 1), len(requests),
|
||||||
|
orig_req.topic, orig_req.partition,
|
||||||
|
orig_req.messages if log_messages_on_error
|
||||||
|
else hash(orig_req.messages))
|
||||||
|
|
||||||
|
if not reqs_to_retry:
|
||||||
|
request_tries = {}
|
||||||
|
continue
|
||||||
|
|
||||||
|
# doing backoff before next retry
|
||||||
|
if retry_state['do_backoff'] and retry_options.backoff_ms:
|
||||||
|
log.warn('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms)
|
||||||
|
time.sleep(float(retry_options.backoff_ms) / 1000)
|
||||||
|
|
||||||
|
# refresh topic metadata before next retry
|
||||||
|
if retry_state['do_refresh']:
|
||||||
|
log.warn('Async producer forcing metadata refresh metadata before retrying')
|
||||||
|
try:
|
||||||
|
client.load_metadata_for_topics()
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Async producer couldn't reload topic metadata. Error: `%s`", e.message)
|
||||||
|
|
||||||
|
# Apply retry limit, dropping messages that are over
|
||||||
|
request_tries = dict(
|
||||||
|
(key, count + 1)
|
||||||
|
for (key, count) in request_tries.items()
|
||||||
|
if key in reqs_to_retry
|
||||||
|
and (retry_options.limit is None
|
||||||
|
or (count < retry_options.limit))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log messages we are going to retry
|
||||||
|
for orig_req in request_tries.keys():
|
||||||
|
log.info('Retrying ProduceRequest to %s:%d with msgs %s',
|
||||||
|
orig_req.topic, orig_req.partition,
|
||||||
|
orig_req.messages if log_messages_on_error
|
||||||
|
else hash(orig_req.messages))
|
||||||
|
|
||||||
|
if request_tries or not queue.empty():
|
||||||
|
log.error('Stopped producer with {0} unsent messages'
|
||||||
|
.format(len(request_tries) + queue.qsize()))
|
||||||
|
|
||||||
|
|
||||||
class Producer(object):
|
class Producer(object):
|
||||||
@@ -86,47 +225,84 @@ class Producer(object):
|
|||||||
Base class to be used by producers
|
Base class to be used by producers
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
client: The Kafka client instance to use
|
client (KafkaClient): instance to use for broker communications.
|
||||||
async: If set to true, the messages are sent asynchronously via another
|
If async=True, the background thread will use client.copy(),
|
||||||
thread (process). We will not wait for a response to these
|
which is expected to return a thread-safe object.
|
||||||
WARNING!!! current implementation of async producer does not
|
codec (kafka.protocol.ALL_CODECS): compression codec to use.
|
||||||
guarantee message delivery. Use at your own risk! Or help us
|
req_acks (int, optional): A value indicating the acknowledgements that
|
||||||
improve with a PR!
|
the server must receive before responding to the request,
|
||||||
req_acks: A value indicating the acknowledgements that the server must
|
defaults to 1 (local ack).
|
||||||
receive before responding to the request
|
ack_timeout (int, optional): millisecond timeout to wait for the
|
||||||
ack_timeout: Value (in milliseconds) indicating a timeout for waiting
|
configured req_acks, defaults to 1000.
|
||||||
for an acknowledgement
|
sync_fail_on_error (bool, optional): whether sync producer should
|
||||||
batch_send: If True, messages are send in batches
|
raise exceptions (True), or just return errors (False),
|
||||||
batch_send_every_n: If set, messages are send in batches of this size
|
defaults to True.
|
||||||
batch_send_every_t: If set, messages are send after this timeout
|
async (bool, optional): send message using a background thread,
|
||||||
"""
|
defaults to False.
|
||||||
|
batch_send_every_n (int, optional): If async is True, messages are
|
||||||
|
sent in batches of this size, defaults to 20.
|
||||||
|
batch_send_every_t (int or float, optional): If async is True,
|
||||||
|
messages are sent immediately after this timeout in seconds, even
|
||||||
|
if there are fewer than batch_send_every_n, defaults to 20.
|
||||||
|
async_retry_limit (int, optional): number of retries for failed messages
|
||||||
|
or None for unlimited, defaults to None / unlimited.
|
||||||
|
async_retry_backoff_ms (int, optional): milliseconds to backoff on
|
||||||
|
failed messages, defaults to 100.
|
||||||
|
async_retry_on_timeouts (bool, optional): whether to retry on
|
||||||
|
RequestTimeoutError, defaults to True.
|
||||||
|
async_queue_maxsize (int, optional): limit to the size of the
|
||||||
|
internal message queue in number of messages (not size), defaults
|
||||||
|
to 0 (no limit).
|
||||||
|
async_queue_put_timeout (int or float, optional): timeout seconds
|
||||||
|
for queue.put in send_messages for async producers -- will only
|
||||||
|
apply if async_queue_maxsize > 0 and the queue is Full,
|
||||||
|
defaults to 0 (fail immediately on full queue).
|
||||||
|
async_log_messages_on_error (bool, optional): set to False and the
|
||||||
|
async producer will only log hash() contents on failed produce
|
||||||
|
requests, defaults to True (log full messages). Hash logging
|
||||||
|
will not allow you to identify the specific message that failed,
|
||||||
|
but it will allow you to match failures with retries.
|
||||||
|
async_stop_timeout (int or float, optional): seconds to continue
|
||||||
|
attempting to send queued messages after producer.stop(),
|
||||||
|
defaults to 30.
|
||||||
|
|
||||||
|
Deprecated Arguments:
|
||||||
|
batch_send (bool, optional): If True, messages are sent by a background
|
||||||
|
thread in batches, defaults to False. Deprecated, use 'async'
|
||||||
|
"""
|
||||||
ACK_NOT_REQUIRED = 0 # No ack is required
|
ACK_NOT_REQUIRED = 0 # No ack is required
|
||||||
ACK_AFTER_LOCAL_WRITE = 1 # Send response after it is written to log
|
ACK_AFTER_LOCAL_WRITE = 1 # Send response after it is written to log
|
||||||
ACK_AFTER_CLUSTER_COMMIT = -1 # Send response after data is committed
|
ACK_AFTER_CLUSTER_COMMIT = -1 # Send response after data is committed
|
||||||
|
|
||||||
DEFAULT_ACK_TIMEOUT = 1000
|
DEFAULT_ACK_TIMEOUT = 1000
|
||||||
|
|
||||||
def __init__(self, client, async=False,
|
def __init__(self, client,
|
||||||
req_acks=ACK_AFTER_LOCAL_WRITE,
|
req_acks=ACK_AFTER_LOCAL_WRITE,
|
||||||
ack_timeout=DEFAULT_ACK_TIMEOUT,
|
ack_timeout=DEFAULT_ACK_TIMEOUT,
|
||||||
codec=None,
|
codec=None,
|
||||||
batch_send=False,
|
codec_compresslevel=None,
|
||||||
|
sync_fail_on_error=SYNC_FAIL_ON_ERROR_DEFAULT,
|
||||||
|
async=False,
|
||||||
|
batch_send=False, # deprecated, use async
|
||||||
batch_send_every_n=BATCH_SEND_MSG_COUNT,
|
batch_send_every_n=BATCH_SEND_MSG_COUNT,
|
||||||
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
|
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL,
|
||||||
|
async_retry_limit=ASYNC_RETRY_LIMIT,
|
||||||
|
async_retry_backoff_ms=ASYNC_RETRY_BACKOFF_MS,
|
||||||
|
async_retry_on_timeouts=ASYNC_RETRY_ON_TIMEOUTS,
|
||||||
|
async_queue_maxsize=ASYNC_QUEUE_MAXSIZE,
|
||||||
|
async_queue_put_timeout=ASYNC_QUEUE_PUT_TIMEOUT,
|
||||||
|
async_log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR,
|
||||||
|
async_stop_timeout=ASYNC_STOP_TIMEOUT_SECS):
|
||||||
|
|
||||||
if batch_send:
|
if async:
|
||||||
async = True
|
|
||||||
assert batch_send_every_n > 0
|
assert batch_send_every_n > 0
|
||||||
assert batch_send_every_t > 0
|
assert batch_send_every_t > 0
|
||||||
else:
|
assert async_queue_maxsize >= 0
|
||||||
batch_send_every_n = 1
|
|
||||||
batch_send_every_t = 3600
|
|
||||||
|
|
||||||
self.client = client
|
self.client = client
|
||||||
self.async = async
|
self.async = async
|
||||||
self.req_acks = req_acks
|
self.req_acks = req_acks
|
||||||
self.ack_timeout = ack_timeout
|
self.ack_timeout = ack_timeout
|
||||||
|
self.stopped = False
|
||||||
|
|
||||||
if codec is None:
|
if codec is None:
|
||||||
codec = CODEC_NONE
|
codec = CODEC_NONE
|
||||||
@@ -134,24 +310,39 @@ class Producer(object):
|
|||||||
raise UnsupportedCodecError("Codec 0x%02x unsupported" % codec)
|
raise UnsupportedCodecError("Codec 0x%02x unsupported" % codec)
|
||||||
|
|
||||||
self.codec = codec
|
self.codec = codec
|
||||||
|
self.codec_compresslevel = codec_compresslevel
|
||||||
|
|
||||||
if self.async:
|
if self.async:
|
||||||
log.warning("async producer does not guarantee message delivery!")
|
# Messages are sent through this queue
|
||||||
log.warning("Current implementation does not retry Failed messages")
|
self.queue = Queue(async_queue_maxsize)
|
||||||
log.warning("Use at your own risk! (or help improve with a PR!)")
|
self.async_queue_put_timeout = async_queue_put_timeout
|
||||||
self.queue = Queue() # Messages are sent through this queue
|
async_retry_options = RetryOptions(
|
||||||
self.proc = Process(target=_send_upstream,
|
limit=async_retry_limit,
|
||||||
args=(self.queue,
|
backoff_ms=async_retry_backoff_ms,
|
||||||
self.client.copy(),
|
retry_on_timeouts=async_retry_on_timeouts)
|
||||||
self.codec,
|
self.thread_stop_event = Event()
|
||||||
batch_send_every_t,
|
self.thread = Thread(
|
||||||
batch_send_every_n,
|
target=_send_upstream,
|
||||||
self.req_acks,
|
args=(self.queue, self.client.copy(), self.codec,
|
||||||
self.ack_timeout))
|
batch_send_every_t, batch_send_every_n,
|
||||||
|
self.req_acks, self.ack_timeout,
|
||||||
|
async_retry_options, self.thread_stop_event),
|
||||||
|
kwargs={'log_messages_on_error': async_log_messages_on_error,
|
||||||
|
'stop_timeout': async_stop_timeout,
|
||||||
|
'codec_compresslevel': self.codec_compresslevel}
|
||||||
|
)
|
||||||
|
|
||||||
# Process will die if main thread exits
|
# Thread will die if main thread exits
|
||||||
self.proc.daemon = True
|
self.thread.daemon = True
|
||||||
self.proc.start()
|
self.thread.start()
|
||||||
|
|
||||||
|
def cleanup(obj):
|
||||||
|
if not obj.stopped:
|
||||||
|
obj.stop()
|
||||||
|
self._cleanup_func = cleanup
|
||||||
|
atexit.register(cleanup, self)
|
||||||
|
else:
|
||||||
|
self.sync_fail_on_error = sync_fail_on_error
|
||||||
|
|
||||||
def send_messages(self, topic, partition, *msg):
|
def send_messages(self, topic, partition, *msg):
|
||||||
"""
|
"""
|
||||||
@@ -169,6 +360,7 @@ class Producer(object):
|
|||||||
|
|
||||||
All messages produced via this method will set the message 'key' to Null
|
All messages produced via this method will set the message 'key' to Null
|
||||||
"""
|
"""
|
||||||
|
topic = kafka_bytestring(topic)
|
||||||
return self._send_messages(topic, partition, *msg)
|
return self._send_messages(topic, partition, *msg)
|
||||||
|
|
||||||
def _send_messages(self, topic, partition, *msg, **kwargs):
|
def _send_messages(self, topic, partition, *msg, **kwargs):
|
||||||
@@ -178,37 +370,93 @@ class Producer(object):
|
|||||||
if not isinstance(msg, (list, tuple)):
|
if not isinstance(msg, (list, tuple)):
|
||||||
raise TypeError("msg is not a list or tuple!")
|
raise TypeError("msg is not a list or tuple!")
|
||||||
|
|
||||||
# Raise TypeError if any message is not encoded as bytes
|
for m in msg:
|
||||||
if any(not isinstance(m, six.binary_type) for m in msg):
|
# The protocol allows to have key & payload with null values both,
|
||||||
raise TypeError("all produce message payloads must be type bytes")
|
# (https://goo.gl/o694yN) but having (null,null) pair doesn't make sense.
|
||||||
|
if m is None:
|
||||||
|
if key is None:
|
||||||
|
raise TypeError("key and payload can't be null in one")
|
||||||
|
# Raise TypeError if any non-null message is not encoded as bytes
|
||||||
|
elif not isinstance(m, six.binary_type):
|
||||||
|
raise TypeError("all produce message payloads must be null or type bytes")
|
||||||
|
|
||||||
|
# Raise TypeError if topic is not encoded as bytes
|
||||||
|
if not isinstance(topic, six.binary_type):
|
||||||
|
raise TypeError("the topic must be type bytes")
|
||||||
|
|
||||||
# Raise TypeError if the key is not encoded as bytes
|
# Raise TypeError if the key is not encoded as bytes
|
||||||
if key is not None and not isinstance(key, six.binary_type):
|
if key is not None and not isinstance(key, six.binary_type):
|
||||||
raise TypeError("the key must be type bytes")
|
raise TypeError("the key must be type bytes")
|
||||||
|
|
||||||
if self.async:
|
if self.async:
|
||||||
for m in msg:
|
for idx, m in enumerate(msg):
|
||||||
self.queue.put((TopicAndPartition(topic, partition), m, key))
|
try:
|
||||||
|
item = (TopicAndPartition(topic, partition), m, key)
|
||||||
|
if self.async_queue_put_timeout == 0:
|
||||||
|
self.queue.put_nowait(item)
|
||||||
|
else:
|
||||||
|
self.queue.put(item, True, self.async_queue_put_timeout)
|
||||||
|
except Full:
|
||||||
|
raise AsyncProducerQueueFull(
|
||||||
|
msg[idx:],
|
||||||
|
'Producer async queue overfilled. '
|
||||||
|
'Current queue size %d.' % self.queue.qsize())
|
||||||
resp = []
|
resp = []
|
||||||
else:
|
else:
|
||||||
messages = create_message_set(msg, self.codec, key)
|
messages = create_message_set([(m, key) for m in msg], self.codec, key, self.codec_compresslevel)
|
||||||
req = ProduceRequest(topic, partition, messages)
|
req = ProduceRequest(topic, partition, messages)
|
||||||
try:
|
try:
|
||||||
resp = self.client.send_produce_request([req], acks=self.req_acks,
|
resp = self.client.send_produce_request(
|
||||||
timeout=self.ack_timeout)
|
[req], acks=self.req_acks, timeout=self.ack_timeout,
|
||||||
|
fail_on_error=self.sync_fail_on_error
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception("Unable to send messages")
|
log.exception("Unable to send messages")
|
||||||
raise
|
raise
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
def stop(self, timeout=1):
|
def stop(self, timeout=None):
|
||||||
"""
|
"""
|
||||||
Stop the producer. Optionally wait for the specified timeout before
|
Stop the producer (async mode). Blocks until async thread completes.
|
||||||
forcefully cleaning up.
|
|
||||||
"""
|
"""
|
||||||
|
if timeout is not None:
|
||||||
|
log.warning('timeout argument to stop() is deprecated - '
|
||||||
|
'it will be removed in future release')
|
||||||
|
|
||||||
|
if not self.async:
|
||||||
|
log.warning('producer.stop() called, but producer is not async')
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.stopped:
|
||||||
|
log.warning('producer.stop() called, but producer is already stopped')
|
||||||
|
return
|
||||||
|
|
||||||
if self.async:
|
if self.async:
|
||||||
self.queue.put((STOP_ASYNC_PRODUCER, None, None))
|
self.queue.put((STOP_ASYNC_PRODUCER, None, None))
|
||||||
self.proc.join(timeout)
|
self.thread_stop_event.set()
|
||||||
|
self.thread.join()
|
||||||
|
|
||||||
if self.proc.is_alive():
|
if hasattr(self, '_cleanup_func'):
|
||||||
self.proc.terminate()
|
# Remove cleanup handler now that we've stopped
|
||||||
|
|
||||||
|
# py3 supports unregistering
|
||||||
|
if hasattr(atexit, 'unregister'):
|
||||||
|
atexit.unregister(self._cleanup_func) # pylint: disable=no-member
|
||||||
|
|
||||||
|
# py2 requires removing from private attribute...
|
||||||
|
else:
|
||||||
|
|
||||||
|
# ValueError on list.remove() if the exithandler no longer exists
|
||||||
|
# but that is fine here
|
||||||
|
try:
|
||||||
|
atexit._exithandlers.remove((self._cleanup_func, (self,), {}))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
del self._cleanup_func
|
||||||
|
|
||||||
|
self.stopped = True
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
if not self.stopped:
|
||||||
|
self.stop()
|
||||||
|
|||||||
@@ -1,50 +1,31 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import warnings
|
||||||
|
|
||||||
from kafka.partitioner import HashedPartitioner
|
from .base import Producer
|
||||||
from .base import (
|
from ..partitioner import HashedPartitioner
|
||||||
Producer, BATCH_SEND_DEFAULT_INTERVAL,
|
from ..util import kafka_bytestring
|
||||||
BATCH_SEND_MSG_COUNT
|
|
||||||
)
|
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class KeyedProducer(Producer):
|
class KeyedProducer(Producer):
|
||||||
"""
|
"""
|
||||||
A producer which distributes messages to partitions based on the key
|
A producer which distributes messages to partitions based on the key
|
||||||
|
|
||||||
Arguments:
|
See Producer class for Arguments
|
||||||
client: The kafka client instance
|
|
||||||
|
|
||||||
Keyword Arguments:
|
Additional Arguments:
|
||||||
partitioner: A partitioner class that will be used to get the partition
|
partitioner: A partitioner class that will be used to get the partition
|
||||||
to send the message to. Must be derived from Partitioner
|
to send the message to. Must be derived from Partitioner.
|
||||||
async: If True, the messages are sent asynchronously via another
|
Defaults to HashedPartitioner.
|
||||||
thread (process). We will not wait for a response to these
|
|
||||||
ack_timeout: Value (in milliseconds) indicating a timeout for waiting
|
|
||||||
for an acknowledgement
|
|
||||||
batch_send: If True, messages are send in batches
|
|
||||||
batch_send_every_n: If set, messages are send in batches of this size
|
|
||||||
batch_send_every_t: If set, messages are send after this timeout
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, client, partitioner=None, async=False,
|
def __init__(self, *args, **kwargs):
|
||||||
req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
|
self.partitioner_class = kwargs.pop('partitioner', HashedPartitioner)
|
||||||
ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
|
|
||||||
codec=None,
|
|
||||||
batch_send=False,
|
|
||||||
batch_send_every_n=BATCH_SEND_MSG_COUNT,
|
|
||||||
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
|
|
||||||
if not partitioner:
|
|
||||||
partitioner = HashedPartitioner
|
|
||||||
self.partitioner_class = partitioner
|
|
||||||
self.partitioners = {}
|
self.partitioners = {}
|
||||||
|
super(KeyedProducer, self).__init__(*args, **kwargs)
|
||||||
super(KeyedProducer, self).__init__(client, async, req_acks,
|
|
||||||
ack_timeout, codec, batch_send,
|
|
||||||
batch_send_every_n,
|
|
||||||
batch_send_every_t)
|
|
||||||
|
|
||||||
def _next_partition(self, topic, key):
|
def _next_partition(self, topic, key):
|
||||||
if topic not in self.partitioners:
|
if topic not in self.partitioners:
|
||||||
@@ -54,15 +35,17 @@ class KeyedProducer(Producer):
|
|||||||
self.partitioners[topic] = self.partitioner_class(self.client.get_partition_ids_for_topic(topic))
|
self.partitioners[topic] = self.partitioner_class(self.client.get_partition_ids_for_topic(topic))
|
||||||
|
|
||||||
partitioner = self.partitioners[topic]
|
partitioner = self.partitioners[topic]
|
||||||
return partitioner.partition(key, self.client.get_partition_ids_for_topic(topic))
|
return partitioner.partition(key)
|
||||||
|
|
||||||
def send_messages(self,topic,key,*msg):
|
def send_messages(self, topic, key, *msg):
|
||||||
|
topic = kafka_bytestring(topic)
|
||||||
partition = self._next_partition(topic, key)
|
partition = self._next_partition(topic, key)
|
||||||
return self._send_messages(topic, partition, *msg,key=key)
|
return self._send_messages(topic, partition, *msg, key=key)
|
||||||
|
|
||||||
|
# DEPRECATED
|
||||||
def send(self, topic, key, msg):
|
def send(self, topic, key, msg):
|
||||||
partition = self._next_partition(topic, key)
|
warnings.warn("KeyedProducer.send is deprecated in favor of send_messages", DeprecationWarning)
|
||||||
return self._send_messages(topic, partition, msg, key=key)
|
return self.send_messages(topic, key, msg)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<KeyedProducer batch=%s>' % self.async
|
return '<KeyedProducer batch=%s>' % self.async
|
||||||
|
|||||||
@@ -1,57 +1,34 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
from itertools import cycle
|
||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
import six
|
import six
|
||||||
|
|
||||||
from itertools import cycle
|
|
||||||
|
|
||||||
from six.moves import xrange
|
from six.moves import xrange
|
||||||
|
|
||||||
from .base import (
|
from .base import Producer
|
||||||
Producer, BATCH_SEND_DEFAULT_INTERVAL,
|
|
||||||
BATCH_SEND_MSG_COUNT
|
|
||||||
)
|
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class SimpleProducer(Producer):
|
class SimpleProducer(Producer):
|
||||||
"""
|
"""A simple, round-robin producer.
|
||||||
A simple, round-robin producer. Each message goes to exactly one partition
|
|
||||||
|
|
||||||
Arguments:
|
See Producer class for Base Arguments
|
||||||
client: The Kafka client instance to use
|
|
||||||
|
|
||||||
Keyword Arguments:
|
Additional Arguments:
|
||||||
async: If True, the messages are sent asynchronously via another
|
random_start (bool, optional): randomize the initial partition which
|
||||||
thread (process). We will not wait for a response to these
|
|
||||||
req_acks: A value indicating the acknowledgements that the server must
|
|
||||||
receive before responding to the request
|
|
||||||
ack_timeout: Value (in milliseconds) indicating a timeout for waiting
|
|
||||||
for an acknowledgement
|
|
||||||
batch_send: If True, messages are send in batches
|
|
||||||
batch_send_every_n: If set, messages are send in batches of this size
|
|
||||||
batch_send_every_t: If set, messages are send after this timeout
|
|
||||||
random_start: If true, randomize the initial partition which the
|
|
||||||
the first message block will be published to, otherwise
|
the first message block will be published to, otherwise
|
||||||
if false, the first message block will always publish
|
if false, the first message block will always publish
|
||||||
to partition 0 before cycling through each partition
|
to partition 0 before cycling through each partition,
|
||||||
|
defaults to True.
|
||||||
"""
|
"""
|
||||||
def __init__(self, client, async=False,
|
def __init__(self, *args, **kwargs):
|
||||||
req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
|
|
||||||
ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
|
|
||||||
codec=None,
|
|
||||||
batch_send=False,
|
|
||||||
batch_send_every_n=BATCH_SEND_MSG_COUNT,
|
|
||||||
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL,
|
|
||||||
random_start=True):
|
|
||||||
self.partition_cycles = {}
|
self.partition_cycles = {}
|
||||||
self.random_start = random_start
|
self.random_start = kwargs.pop('random_start', True)
|
||||||
super(SimpleProducer, self).__init__(client, async, req_acks,
|
super(SimpleProducer, self).__init__(*args, **kwargs)
|
||||||
ack_timeout, codec, batch_send,
|
|
||||||
batch_send_every_n,
|
|
||||||
batch_send_every_t)
|
|
||||||
|
|
||||||
def _next_partition(self, topic):
|
def _next_partition(self, topic):
|
||||||
if topic not in self.partition_cycles:
|
if topic not in self.partition_cycles:
|
||||||
|
|||||||
@@ -14,14 +14,16 @@ from kafka.common import (
|
|||||||
MetadataResponse, ProduceResponse, FetchResponse,
|
MetadataResponse, ProduceResponse, FetchResponse,
|
||||||
OffsetResponse, OffsetCommitResponse, OffsetFetchResponse,
|
OffsetResponse, OffsetCommitResponse, OffsetFetchResponse,
|
||||||
ProtocolError, BufferUnderflowError, ChecksumError,
|
ProtocolError, BufferUnderflowError, ChecksumError,
|
||||||
ConsumerFetchSizeTooSmall, UnsupportedCodecError
|
ConsumerFetchSizeTooSmall, UnsupportedCodecError,
|
||||||
|
ConsumerMetadataResponse
|
||||||
)
|
)
|
||||||
from kafka.util import (
|
from kafka.util import (
|
||||||
crc32, read_short_string, read_int_string, relative_unpack,
|
crc32, read_short_string, read_int_string, relative_unpack,
|
||||||
write_short_string, write_int_string, group_by_topic_and_partition
|
write_short_string, write_int_string, group_by_topic_and_partition
|
||||||
)
|
)
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
ATTRIBUTE_CODEC_MASK = 0x03
|
ATTRIBUTE_CODEC_MASK = 0x03
|
||||||
CODEC_NONE = 0x00
|
CODEC_NONE = 0x00
|
||||||
@@ -42,19 +44,21 @@ class KafkaProtocol(object):
|
|||||||
METADATA_KEY = 3
|
METADATA_KEY = 3
|
||||||
OFFSET_COMMIT_KEY = 8
|
OFFSET_COMMIT_KEY = 8
|
||||||
OFFSET_FETCH_KEY = 9
|
OFFSET_FETCH_KEY = 9
|
||||||
|
CONSUMER_METADATA_KEY = 10
|
||||||
|
|
||||||
###################
|
###################
|
||||||
# Private API #
|
# Private API #
|
||||||
###################
|
###################
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _encode_message_header(cls, client_id, correlation_id, request_key):
|
def _encode_message_header(cls, client_id, correlation_id, request_key,
|
||||||
|
version=0):
|
||||||
"""
|
"""
|
||||||
Encode the common request envelope
|
Encode the common request envelope
|
||||||
"""
|
"""
|
||||||
return struct.pack('>hhih%ds' % len(client_id),
|
return struct.pack('>hhih%ds' % len(client_id),
|
||||||
request_key, # ApiKey
|
request_key, # ApiKey
|
||||||
0, # ApiVersion
|
version, # ApiVersion
|
||||||
correlation_id, # CorrelationId
|
correlation_id, # CorrelationId
|
||||||
len(client_id), # ClientId size
|
len(client_id), # ClientId size
|
||||||
client_id) # ClientId
|
client_id) # ClientId
|
||||||
@@ -231,12 +235,12 @@ class KafkaProtocol(object):
|
|||||||
"""
|
"""
|
||||||
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
||||||
|
|
||||||
for i in range(num_topics):
|
for _ in range(num_topics):
|
||||||
((strlen,), cur) = relative_unpack('>h', data, cur)
|
((strlen,), cur) = relative_unpack('>h', data, cur)
|
||||||
topic = data[cur:cur + strlen]
|
topic = data[cur:cur + strlen]
|
||||||
cur += strlen
|
cur += strlen
|
||||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||||
for i in range(num_partitions):
|
for _ in range(num_partitions):
|
||||||
((partition, error, offset), cur) = relative_unpack('>ihq',
|
((partition, error, offset), cur) = relative_unpack('>ihq',
|
||||||
data, cur)
|
data, cur)
|
||||||
|
|
||||||
@@ -288,11 +292,11 @@ class KafkaProtocol(object):
|
|||||||
"""
|
"""
|
||||||
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
||||||
|
|
||||||
for i in range(num_topics):
|
for _ in range(num_topics):
|
||||||
(topic, cur) = read_short_string(data, cur)
|
(topic, cur) = read_short_string(data, cur)
|
||||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||||
|
|
||||||
for i in range(num_partitions):
|
for j in range(num_partitions):
|
||||||
((partition, error, highwater_mark_offset), cur) = \
|
((partition, error, highwater_mark_offset), cur) = \
|
||||||
relative_unpack('>ihq', data, cur)
|
relative_unpack('>ihq', data, cur)
|
||||||
|
|
||||||
@@ -336,16 +340,16 @@ class KafkaProtocol(object):
|
|||||||
"""
|
"""
|
||||||
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
|
||||||
|
|
||||||
for i in range(num_topics):
|
for _ in range(num_topics):
|
||||||
(topic, cur) = read_short_string(data, cur)
|
(topic, cur) = read_short_string(data, cur)
|
||||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||||
|
|
||||||
for i in range(num_partitions):
|
for _ in range(num_partitions):
|
||||||
((partition, error, num_offsets,), cur) = \
|
((partition, error, num_offsets,), cur) = \
|
||||||
relative_unpack('>ihi', data, cur)
|
relative_unpack('>ihi', data, cur)
|
||||||
|
|
||||||
offsets = []
|
offsets = []
|
||||||
for j in range(num_offsets):
|
for k in range(num_offsets):
|
||||||
((offset,), cur) = relative_unpack('>q', data, cur)
|
((offset,), cur) = relative_unpack('>q', data, cur)
|
||||||
offsets.append(offset)
|
offsets.append(offset)
|
||||||
|
|
||||||
@@ -391,7 +395,7 @@ class KafkaProtocol(object):
|
|||||||
|
|
||||||
# Broker info
|
# Broker info
|
||||||
brokers = []
|
brokers = []
|
||||||
for i in range(numbrokers):
|
for _ in range(numbrokers):
|
||||||
((nodeId, ), cur) = relative_unpack('>i', data, cur)
|
((nodeId, ), cur) = relative_unpack('>i', data, cur)
|
||||||
(host, cur) = read_short_string(data, cur)
|
(host, cur) = read_short_string(data, cur)
|
||||||
((port,), cur) = relative_unpack('>i', data, cur)
|
((port,), cur) = relative_unpack('>i', data, cur)
|
||||||
@@ -401,13 +405,13 @@ class KafkaProtocol(object):
|
|||||||
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
||||||
topic_metadata = []
|
topic_metadata = []
|
||||||
|
|
||||||
for i in range(num_topics):
|
for _ in range(num_topics):
|
||||||
((topic_error,), cur) = relative_unpack('>h', data, cur)
|
((topic_error,), cur) = relative_unpack('>h', data, cur)
|
||||||
(topic_name, cur) = read_short_string(data, cur)
|
(topic_name, cur) = read_short_string(data, cur)
|
||||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||||
partition_metadata = []
|
partition_metadata = []
|
||||||
|
|
||||||
for j in range(num_partitions):
|
for _ in range(num_partitions):
|
||||||
((partition_error_code, partition, leader, numReplicas), cur) = \
|
((partition_error_code, partition, leader, numReplicas), cur) = \
|
||||||
relative_unpack('>hiii', data, cur)
|
relative_unpack('>hiii', data, cur)
|
||||||
|
|
||||||
@@ -428,6 +432,38 @@ class KafkaProtocol(object):
|
|||||||
|
|
||||||
return MetadataResponse(brokers, topic_metadata)
|
return MetadataResponse(brokers, topic_metadata)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def encode_consumer_metadata_request(cls, client_id, correlation_id, payloads):
|
||||||
|
"""
|
||||||
|
Encode a ConsumerMetadataRequest
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
client_id: string
|
||||||
|
correlation_id: int
|
||||||
|
payloads: string (consumer group)
|
||||||
|
"""
|
||||||
|
message = []
|
||||||
|
message.append(cls._encode_message_header(client_id, correlation_id,
|
||||||
|
KafkaProtocol.CONSUMER_METADATA_KEY))
|
||||||
|
message.append(struct.pack('>h%ds' % len(payloads), len(payloads), payloads))
|
||||||
|
|
||||||
|
msg = b''.join(message)
|
||||||
|
return write_int_string(msg)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def decode_consumer_metadata_response(cls, data):
|
||||||
|
"""
|
||||||
|
Decode bytes to a ConsumerMetadataResponse
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
data: bytes to decode
|
||||||
|
"""
|
||||||
|
((correlation_id, error, nodeId), cur) = relative_unpack('>ihi', data, 0)
|
||||||
|
(host, cur) = read_short_string(data, cur)
|
||||||
|
((port,), cur) = relative_unpack('>i', data, cur)
|
||||||
|
|
||||||
|
return ConsumerMetadataResponse(error, nodeId, host, port)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def encode_offset_commit_request(cls, client_id, correlation_id,
|
def encode_offset_commit_request(cls, client_id, correlation_id,
|
||||||
group, payloads):
|
group, payloads):
|
||||||
@@ -470,31 +506,37 @@ class KafkaProtocol(object):
|
|||||||
((correlation_id,), cur) = relative_unpack('>i', data, 0)
|
((correlation_id,), cur) = relative_unpack('>i', data, 0)
|
||||||
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
||||||
|
|
||||||
for i in xrange(num_topics):
|
for _ in xrange(num_topics):
|
||||||
(topic, cur) = read_short_string(data, cur)
|
(topic, cur) = read_short_string(data, cur)
|
||||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||||
|
|
||||||
for i in xrange(num_partitions):
|
for _ in xrange(num_partitions):
|
||||||
((partition, error), cur) = relative_unpack('>ih', data, cur)
|
((partition, error), cur) = relative_unpack('>ih', data, cur)
|
||||||
yield OffsetCommitResponse(topic, partition, error)
|
yield OffsetCommitResponse(topic, partition, error)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def encode_offset_fetch_request(cls, client_id, correlation_id,
|
def encode_offset_fetch_request(cls, client_id, correlation_id,
|
||||||
group, payloads):
|
group, payloads, from_kafka=False):
|
||||||
"""
|
"""
|
||||||
Encode some OffsetFetchRequest structs
|
Encode some OffsetFetchRequest structs. The request is encoded using
|
||||||
|
version 0 if from_kafka is false, indicating a request for Zookeeper
|
||||||
|
offsets. It is encoded using version 1 otherwise, indicating a request
|
||||||
|
for Kafka offsets.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
client_id: string
|
client_id: string
|
||||||
correlation_id: int
|
correlation_id: int
|
||||||
group: string, the consumer group you are fetching offsets for
|
group: string, the consumer group you are fetching offsets for
|
||||||
payloads: list of OffsetFetchRequest
|
payloads: list of OffsetFetchRequest
|
||||||
|
from_kafka: bool, default False, set True for Kafka-committed offsets
|
||||||
"""
|
"""
|
||||||
grouped_payloads = group_by_topic_and_partition(payloads)
|
grouped_payloads = group_by_topic_and_partition(payloads)
|
||||||
|
|
||||||
message = []
|
message = []
|
||||||
|
reqver = 1 if from_kafka else 0
|
||||||
message.append(cls._encode_message_header(client_id, correlation_id,
|
message.append(cls._encode_message_header(client_id, correlation_id,
|
||||||
KafkaProtocol.OFFSET_FETCH_KEY))
|
KafkaProtocol.OFFSET_FETCH_KEY,
|
||||||
|
version=reqver))
|
||||||
|
|
||||||
message.append(write_short_string(group))
|
message.append(write_short_string(group))
|
||||||
message.append(struct.pack('>i', len(grouped_payloads)))
|
message.append(struct.pack('>i', len(grouped_payloads)))
|
||||||
@@ -521,11 +563,11 @@ class KafkaProtocol(object):
|
|||||||
((correlation_id,), cur) = relative_unpack('>i', data, 0)
|
((correlation_id,), cur) = relative_unpack('>i', data, 0)
|
||||||
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
((num_topics,), cur) = relative_unpack('>i', data, cur)
|
||||||
|
|
||||||
for i in range(num_topics):
|
for _ in range(num_topics):
|
||||||
(topic, cur) = read_short_string(data, cur)
|
(topic, cur) = read_short_string(data, cur)
|
||||||
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
((num_partitions,), cur) = relative_unpack('>i', data, cur)
|
||||||
|
|
||||||
for i in range(num_partitions):
|
for _ in range(num_partitions):
|
||||||
((partition, offset), cur) = relative_unpack('>iq', data, cur)
|
((partition, offset), cur) = relative_unpack('>iq', data, cur)
|
||||||
(metadata, cur) = read_short_string(data, cur)
|
(metadata, cur) = read_short_string(data, cur)
|
||||||
((error,), cur) = relative_unpack('>h', data, cur)
|
((error,), cur) = relative_unpack('>h', data, cur)
|
||||||
@@ -546,7 +588,7 @@ def create_message(payload, key=None):
|
|||||||
return Message(0, 0, key, payload)
|
return Message(0, 0, key, payload)
|
||||||
|
|
||||||
|
|
||||||
def create_gzip_message(payloads, key=None):
|
def create_gzip_message(payloads, key=None, compresslevel=None):
|
||||||
"""
|
"""
|
||||||
Construct a Gzipped Message containing multiple Messages
|
Construct a Gzipped Message containing multiple Messages
|
||||||
|
|
||||||
@@ -559,9 +601,9 @@ def create_gzip_message(payloads, key=None):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
message_set = KafkaProtocol._encode_message_set(
|
message_set = KafkaProtocol._encode_message_set(
|
||||||
[create_message(payload, key) for payload in payloads])
|
[create_message(payload, pl_key) for payload, pl_key in payloads])
|
||||||
|
|
||||||
gzipped = gzip_encode(message_set)
|
gzipped = gzip_encode(message_set, compresslevel=compresslevel)
|
||||||
codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP
|
codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP
|
||||||
|
|
||||||
return Message(0, 0x00 | codec, key, gzipped)
|
return Message(0, 0x00 | codec, key, gzipped)
|
||||||
@@ -580,7 +622,7 @@ def create_snappy_message(payloads, key=None):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
message_set = KafkaProtocol._encode_message_set(
|
message_set = KafkaProtocol._encode_message_set(
|
||||||
[create_message(payload, key) for payload in payloads])
|
[create_message(payload, pl_key) for payload, pl_key in payloads])
|
||||||
|
|
||||||
snapped = snappy_encode(message_set)
|
snapped = snappy_encode(message_set)
|
||||||
codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY
|
codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY
|
||||||
@@ -588,16 +630,16 @@ def create_snappy_message(payloads, key=None):
|
|||||||
return Message(0, 0x00 | codec, key, snapped)
|
return Message(0, 0x00 | codec, key, snapped)
|
||||||
|
|
||||||
|
|
||||||
def create_message_set(messages, codec=CODEC_NONE, key=None):
|
def create_message_set(messages, codec=CODEC_NONE, key=None, compresslevel=None):
|
||||||
"""Create a message set using the given codec.
|
"""Create a message set using the given codec.
|
||||||
|
|
||||||
If codec is CODEC_NONE, return a list of raw Kafka messages. Otherwise,
|
If codec is CODEC_NONE, return a list of raw Kafka messages. Otherwise,
|
||||||
return a list containing a single codec-encoded message.
|
return a list containing a single codec-encoded message.
|
||||||
"""
|
"""
|
||||||
if codec == CODEC_NONE:
|
if codec == CODEC_NONE:
|
||||||
return [create_message(m, key) for m in messages]
|
return [create_message(m, k) for m, k in messages]
|
||||||
elif codec == CODEC_GZIP:
|
elif codec == CODEC_GZIP:
|
||||||
return [create_gzip_message(messages, key)]
|
return [create_gzip_message(messages, key, compresslevel)]
|
||||||
elif codec == CODEC_SNAPPY:
|
elif codec == CODEC_SNAPPY:
|
||||||
return [create_snappy_message(messages, key)]
|
return [create_snappy_message(messages, key)]
|
||||||
else:
|
else:
|
||||||
|
|||||||
215
kafka/queue.py
215
kafka/queue.py
@@ -1,215 +0,0 @@
|
|||||||
from __future__ import absolute_import
|
|
||||||
|
|
||||||
from copy import copy
|
|
||||||
import logging
|
|
||||||
from multiprocessing import Process, Queue, Event
|
|
||||||
from Queue import Empty
|
|
||||||
import time
|
|
||||||
|
|
||||||
from kafka.client import KafkaClient, FetchRequest, ProduceRequest
|
|
||||||
|
|
||||||
log = logging.getLogger("kafka")
|
|
||||||
|
|
||||||
raise NotImplementedError("Still need to refactor this class")
|
|
||||||
|
|
||||||
|
|
||||||
class KafkaConsumerProcess(Process):
|
|
||||||
def __init__(self, client, topic, partition, out_queue, barrier,
|
|
||||||
consumer_fetch_size=1024, consumer_sleep=200):
|
|
||||||
self.client = copy(client)
|
|
||||||
self.topic = topic
|
|
||||||
self.partition = partition
|
|
||||||
self.out_queue = out_queue
|
|
||||||
self.barrier = barrier
|
|
||||||
self.consumer_fetch_size = consumer_fetch_size
|
|
||||||
self.consumer_sleep = consumer_sleep / 1000.
|
|
||||||
log.info("Initializing %s" % self)
|
|
||||||
Process.__init__(self)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "[KafkaConsumerProcess: topic=%s, \
|
|
||||||
partition=%s, sleep=%s]" % \
|
|
||||||
(self.topic, self.partition, self.consumer_sleep)
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
self.barrier.wait()
|
|
||||||
log.info("Starting %s" % self)
|
|
||||||
fetchRequest = FetchRequest(self.topic, self.partition,
|
|
||||||
offset=0, size=self.consumer_fetch_size)
|
|
||||||
|
|
||||||
while True:
|
|
||||||
if self.barrier.is_set() is False:
|
|
||||||
log.info("Shutdown %s" % self)
|
|
||||||
self.client.close()
|
|
||||||
break
|
|
||||||
|
|
||||||
lastOffset = fetchRequest.offset
|
|
||||||
(messages, fetchRequest) = self.client.get_message_set(fetchRequest)
|
|
||||||
|
|
||||||
if fetchRequest.offset == lastOffset:
|
|
||||||
log.debug("No more data for this partition, "
|
|
||||||
"sleeping a bit (200ms)")
|
|
||||||
time.sleep(self.consumer_sleep)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for message in messages:
|
|
||||||
self.out_queue.put(message)
|
|
||||||
|
|
||||||
|
|
||||||
class KafkaProducerProcess(Process):
|
|
||||||
def __init__(self, client, topic, in_queue, barrier,
|
|
||||||
producer_flush_buffer=500,
|
|
||||||
producer_flush_timeout=2000,
|
|
||||||
producer_timeout=100):
|
|
||||||
|
|
||||||
self.client = copy(client)
|
|
||||||
self.topic = topic
|
|
||||||
self.in_queue = in_queue
|
|
||||||
self.barrier = barrier
|
|
||||||
self.producer_flush_buffer = producer_flush_buffer
|
|
||||||
self.producer_flush_timeout = producer_flush_timeout / 1000.
|
|
||||||
self.producer_timeout = producer_timeout / 1000.
|
|
||||||
log.info("Initializing %s" % self)
|
|
||||||
Process.__init__(self)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "[KafkaProducerProcess: topic=%s, \
|
|
||||||
flush_buffer=%s, flush_timeout=%s, timeout=%s]" % \
|
|
||||||
(self.topic,
|
|
||||||
self.producer_flush_buffer,
|
|
||||||
self.producer_flush_timeout,
|
|
||||||
self.producer_timeout)
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
self.barrier.wait()
|
|
||||||
log.info("Starting %s" % self)
|
|
||||||
messages = []
|
|
||||||
last_produce = time.time()
|
|
||||||
|
|
||||||
def flush(messages):
|
|
||||||
self.client.send_message_set(ProduceRequest(self.topic, -1,
|
|
||||||
messages))
|
|
||||||
del messages[:]
|
|
||||||
|
|
||||||
while True:
|
|
||||||
if self.barrier.is_set() is False:
|
|
||||||
log.info("Shutdown %s, flushing messages" % self)
|
|
||||||
flush(messages)
|
|
||||||
self.client.close()
|
|
||||||
break
|
|
||||||
|
|
||||||
if len(messages) > self.producer_flush_buffer:
|
|
||||||
log.debug("Message count threshold reached. Flushing messages")
|
|
||||||
flush(messages)
|
|
||||||
last_produce = time.time()
|
|
||||||
|
|
||||||
elif (time.time() - last_produce) > self.producer_flush_timeout:
|
|
||||||
log.debug("Producer timeout reached. Flushing messages")
|
|
||||||
flush(messages)
|
|
||||||
last_produce = time.time()
|
|
||||||
|
|
||||||
try:
|
|
||||||
msg = KafkaClient.create_message(
|
|
||||||
self.in_queue.get(True, self.producer_timeout))
|
|
||||||
messages.append(msg)
|
|
||||||
|
|
||||||
except Empty:
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
|
||||||
class KafkaQueue(object):
|
|
||||||
def __init__(self, client, topic, partitions,
|
|
||||||
producer_config=None, consumer_config=None):
|
|
||||||
"""
|
|
||||||
KafkaQueue a Queue-like object backed by a Kafka producer and some
|
|
||||||
number of consumers
|
|
||||||
|
|
||||||
Messages are eagerly loaded by the consumer in batches of size
|
|
||||||
consumer_fetch_size.
|
|
||||||
Messages are buffered in the producer thread until
|
|
||||||
producer_flush_timeout or producer_flush_buffer is reached.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
client: KafkaClient object
|
|
||||||
topic: str, the topic name
|
|
||||||
partitions: list of ints, the partions to consume from
|
|
||||||
producer_config: dict, see below
|
|
||||||
consumer_config: dict, see below
|
|
||||||
|
|
||||||
Consumer Config
|
|
||||||
===============
|
|
||||||
consumer_fetch_size: int, number of bytes to fetch in one call
|
|
||||||
to Kafka. Default is 1024
|
|
||||||
consumer_sleep: int, time in milliseconds a consumer should sleep
|
|
||||||
when it reaches the end of a partition. Default is 200
|
|
||||||
|
|
||||||
Producer Config
|
|
||||||
===============
|
|
||||||
producer_timeout: int, time in milliseconds a producer should
|
|
||||||
wait for messages to enqueue for producing.
|
|
||||||
Default is 100
|
|
||||||
producer_flush_timeout: int, time in milliseconds a producer
|
|
||||||
should allow messages to accumulate before
|
|
||||||
sending to Kafka. Default is 2000
|
|
||||||
producer_flush_buffer: int, number of messages a producer should
|
|
||||||
allow to accumulate. Default is 500
|
|
||||||
|
|
||||||
"""
|
|
||||||
producer_config = {} if producer_config is None else producer_config
|
|
||||||
consumer_config = {} if consumer_config is None else consumer_config
|
|
||||||
|
|
||||||
self.in_queue = Queue()
|
|
||||||
self.out_queue = Queue()
|
|
||||||
self.consumers = []
|
|
||||||
self.barrier = Event()
|
|
||||||
|
|
||||||
# Initialize and start consumer threads
|
|
||||||
for partition in partitions:
|
|
||||||
consumer = KafkaConsumerProcess(client, topic, partition,
|
|
||||||
self.in_queue, self.barrier,
|
|
||||||
**consumer_config)
|
|
||||||
consumer.start()
|
|
||||||
self.consumers.append(consumer)
|
|
||||||
|
|
||||||
# Initialize and start producer thread
|
|
||||||
self.producer = KafkaProducerProcess(client, topic, self.out_queue,
|
|
||||||
self.barrier, **producer_config)
|
|
||||||
self.producer.start()
|
|
||||||
|
|
||||||
# Trigger everything to start
|
|
||||||
self.barrier.set()
|
|
||||||
|
|
||||||
def get(self, block=True, timeout=None):
|
|
||||||
"""
|
|
||||||
Consume a message from Kafka
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
block: boolean, default True
|
|
||||||
timeout: int, number of seconds to wait when blocking, default None
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
msg: str, the payload from Kafka
|
|
||||||
"""
|
|
||||||
return self.in_queue.get(block, timeout).payload
|
|
||||||
|
|
||||||
def put(self, msg, block=True, timeout=None):
|
|
||||||
"""
|
|
||||||
Send a message to Kafka
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
msg: std, the message to send
|
|
||||||
block: boolean, default True
|
|
||||||
timeout: int, number of seconds to wait when blocking, default None
|
|
||||||
"""
|
|
||||||
self.out_queue.put(msg, block, timeout)
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
"""
|
|
||||||
Close the internal queues and Kafka consumers/producer
|
|
||||||
"""
|
|
||||||
self.in_queue.close()
|
|
||||||
self.out_queue.close()
|
|
||||||
self.barrier.clear()
|
|
||||||
self.producer.join()
|
|
||||||
for consumer in self.consumers:
|
|
||||||
consumer.join()
|
|
||||||
@@ -82,6 +82,9 @@ def relative_unpack(fmt, data, cur):
|
|||||||
def group_by_topic_and_partition(tuples):
|
def group_by_topic_and_partition(tuples):
|
||||||
out = collections.defaultdict(dict)
|
out = collections.defaultdict(dict)
|
||||||
for t in tuples:
|
for t in tuples:
|
||||||
|
assert t.topic not in out or t.partition not in out[t.topic], \
|
||||||
|
'Duplicate {0}s for {1} {2}'.format(t.__class__.__name__,
|
||||||
|
t.topic, t.partition)
|
||||||
out[t.topic][t.partition] = t
|
out[t.topic][t.partition] = t
|
||||||
return out
|
return out
|
||||||
|
|
||||||
@@ -151,3 +154,6 @@ class ReentrantTimer(object):
|
|||||||
# noinspection PyAttributeOutsideInit
|
# noinspection PyAttributeOutsideInit
|
||||||
self.timer = None
|
self.timer = None
|
||||||
self.fn = None
|
self.fn = None
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self.stop()
|
||||||
|
|||||||
1
kafka/version.py
Normal file
1
kafka/version.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
__version__ = '0.9.5'
|
||||||
@@ -35,6 +35,10 @@ log.dirs={tmp_dir}/data
|
|||||||
num.partitions={partitions}
|
num.partitions={partitions}
|
||||||
default.replication.factor={replicas}
|
default.replication.factor={replicas}
|
||||||
|
|
||||||
|
## Short Replica Lag -- Drops failed brokers out of ISR
|
||||||
|
replica.lag.time.max.ms=1000
|
||||||
|
replica.socket.timeout.ms=1000
|
||||||
|
|
||||||
############################# Log Flush Policy #############################
|
############################# Log Flush Policy #############################
|
||||||
|
|
||||||
log.flush.interval.messages=10000
|
log.flush.interval.messages=10000
|
||||||
@@ -49,7 +53,11 @@ log.cleanup.interval.mins=1
|
|||||||
############################# Zookeeper #############################
|
############################# Zookeeper #############################
|
||||||
|
|
||||||
zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
|
zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
|
||||||
|
|
||||||
|
# Timeout in ms for connecting to zookeeper
|
||||||
zookeeper.connection.timeout.ms=1000000
|
zookeeper.connection.timeout.ms=1000000
|
||||||
|
# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
|
||||||
|
zookeeper.session.timeout.ms=500
|
||||||
|
|
||||||
kafka.metrics.polling.interval.secs=5
|
kafka.metrics.polling.interval.secs=5
|
||||||
kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter
|
kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter
|
||||||
|
|||||||
@@ -63,6 +63,10 @@ log.dirs={tmp_dir}/data
|
|||||||
num.partitions={partitions}
|
num.partitions={partitions}
|
||||||
default.replication.factor={replicas}
|
default.replication.factor={replicas}
|
||||||
|
|
||||||
|
## Short Replica Lag -- Drops failed brokers out of ISR
|
||||||
|
replica.lag.time.max.ms=1000
|
||||||
|
replica.socket.timeout.ms=1000
|
||||||
|
|
||||||
############################# Log Flush Policy #############################
|
############################# Log Flush Policy #############################
|
||||||
|
|
||||||
# Messages are immediately written to the filesystem but by default we only fsync() to sync
|
# Messages are immediately written to the filesystem but by default we only fsync() to sync
|
||||||
@@ -116,3 +120,5 @@ zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
|
|||||||
|
|
||||||
# Timeout in ms for connecting to zookeeper
|
# Timeout in ms for connecting to zookeeper
|
||||||
zookeeper.connection.timeout.ms=1000000
|
zookeeper.connection.timeout.ms=1000000
|
||||||
|
# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
|
||||||
|
zookeeper.session.timeout.ms=500
|
||||||
|
|||||||
@@ -35,6 +35,10 @@ log.dirs={tmp_dir}/data
|
|||||||
num.partitions={partitions}
|
num.partitions={partitions}
|
||||||
default.replication.factor={replicas}
|
default.replication.factor={replicas}
|
||||||
|
|
||||||
|
## Short Replica Lag -- Drops failed brokers out of ISR
|
||||||
|
replica.lag.time.max.ms=1000
|
||||||
|
replica.socket.timeout.ms=1000
|
||||||
|
|
||||||
############################# Log Flush Policy #############################
|
############################# Log Flush Policy #############################
|
||||||
|
|
||||||
log.flush.interval.messages=10000
|
log.flush.interval.messages=10000
|
||||||
@@ -56,4 +60,8 @@ log.cleaner.enable=false
|
|||||||
# You can also append an optional chroot string to the urls to specify the
|
# You can also append an optional chroot string to the urls to specify the
|
||||||
# root directory for all kafka znodes.
|
# root directory for all kafka znodes.
|
||||||
zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
|
zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
|
||||||
|
|
||||||
|
# Timeout in ms for connecting to zookeeper
|
||||||
zookeeper.connection.timeout.ms=1000000
|
zookeeper.connection.timeout.ms=1000000
|
||||||
|
# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
|
||||||
|
zookeeper.session.timeout.ms=500
|
||||||
|
|||||||
@@ -63,6 +63,10 @@ log.dirs={tmp_dir}/data
|
|||||||
num.partitions={partitions}
|
num.partitions={partitions}
|
||||||
default.replication.factor={replicas}
|
default.replication.factor={replicas}
|
||||||
|
|
||||||
|
## Short Replica Lag -- Drops failed brokers out of ISR
|
||||||
|
replica.lag.time.max.ms=1000
|
||||||
|
replica.socket.timeout.ms=1000
|
||||||
|
|
||||||
############################# Log Flush Policy #############################
|
############################# Log Flush Policy #############################
|
||||||
|
|
||||||
# Messages are immediately written to the filesystem but by default we only fsync() to sync
|
# Messages are immediately written to the filesystem but by default we only fsync() to sync
|
||||||
@@ -116,3 +120,5 @@ zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
|
|||||||
|
|
||||||
# Timeout in ms for connecting to zookeeper
|
# Timeout in ms for connecting to zookeeper
|
||||||
zookeeper.connection.timeout.ms=1000000
|
zookeeper.connection.timeout.ms=1000000
|
||||||
|
# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
|
||||||
|
zookeeper.session.timeout.ms=500
|
||||||
|
|||||||
124
servers/0.8.2.1/resources/kafka.properties
Normal file
124
servers/0.8.2.1/resources/kafka.properties
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# see kafka.server.KafkaConfig for additional details and defaults
|
||||||
|
|
||||||
|
############################# Server Basics #############################
|
||||||
|
|
||||||
|
# The id of the broker. This must be set to a unique integer for each broker.
|
||||||
|
broker.id={broker_id}
|
||||||
|
|
||||||
|
############################# Socket Server Settings #############################
|
||||||
|
|
||||||
|
# The port the socket server listens on
|
||||||
|
port={port}
|
||||||
|
|
||||||
|
# Hostname the broker will bind to. If not set, the server will bind to all interfaces
|
||||||
|
host.name={host}
|
||||||
|
|
||||||
|
# Hostname the broker will advertise to producers and consumers. If not set, it uses the
|
||||||
|
# value for "host.name" if configured. Otherwise, it will use the value returned from
|
||||||
|
# java.net.InetAddress.getCanonicalHostName().
|
||||||
|
#advertised.host.name=<hostname routable by clients>
|
||||||
|
|
||||||
|
# The port to publish to ZooKeeper for clients to use. If this is not set,
|
||||||
|
# it will publish the same port that the broker binds to.
|
||||||
|
#advertised.port=<port accessible by clients>
|
||||||
|
|
||||||
|
# The number of threads handling network requests
|
||||||
|
num.network.threads=2
|
||||||
|
|
||||||
|
# The number of threads doing disk I/O
|
||||||
|
num.io.threads=8
|
||||||
|
|
||||||
|
# The send buffer (SO_SNDBUF) used by the socket server
|
||||||
|
socket.send.buffer.bytes=1048576
|
||||||
|
|
||||||
|
# The receive buffer (SO_RCVBUF) used by the socket server
|
||||||
|
socket.receive.buffer.bytes=1048576
|
||||||
|
|
||||||
|
# The maximum size of a request that the socket server will accept (protection against OOM)
|
||||||
|
socket.request.max.bytes=104857600
|
||||||
|
|
||||||
|
|
||||||
|
############################# Log Basics #############################
|
||||||
|
|
||||||
|
# A comma seperated list of directories under which to store log files
|
||||||
|
log.dirs={tmp_dir}/data
|
||||||
|
|
||||||
|
# The default number of log partitions per topic. More partitions allow greater
|
||||||
|
# parallelism for consumption, but this will also result in more files across
|
||||||
|
# the brokers.
|
||||||
|
num.partitions={partitions}
|
||||||
|
default.replication.factor={replicas}
|
||||||
|
|
||||||
|
## Short Replica Lag -- Drops failed brokers out of ISR
|
||||||
|
replica.lag.time.max.ms=1000
|
||||||
|
replica.socket.timeout.ms=1000
|
||||||
|
|
||||||
|
############################# Log Flush Policy #############################
|
||||||
|
|
||||||
|
# Messages are immediately written to the filesystem but by default we only fsync() to sync
|
||||||
|
# the OS cache lazily. The following configurations control the flush of data to disk.
|
||||||
|
# There are a few important trade-offs here:
|
||||||
|
# 1. Durability: Unflushed data may be lost if you are not using replication.
|
||||||
|
# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
|
||||||
|
# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
|
||||||
|
# The settings below allow one to configure the flush policy to flush data after a period of time or
|
||||||
|
# every N messages (or both). This can be done globally and overridden on a per-topic basis.
|
||||||
|
|
||||||
|
# The number of messages to accept before forcing a flush of data to disk
|
||||||
|
#log.flush.interval.messages=10000
|
||||||
|
|
||||||
|
# The maximum amount of time a message can sit in a log before we force a flush
|
||||||
|
#log.flush.interval.ms=1000
|
||||||
|
|
||||||
|
############################# Log Retention Policy #############################
|
||||||
|
|
||||||
|
# The following configurations control the disposal of log segments. The policy can
|
||||||
|
# be set to delete segments after a period of time, or after a given size has accumulated.
|
||||||
|
# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
|
||||||
|
# from the end of the log.
|
||||||
|
|
||||||
|
# The minimum age of a log file to be eligible for deletion
|
||||||
|
log.retention.hours=168
|
||||||
|
|
||||||
|
# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
|
||||||
|
# segments don't drop below log.retention.bytes.
|
||||||
|
#log.retention.bytes=1073741824
|
||||||
|
|
||||||
|
# The maximum size of a log segment file. When this size is reached a new log segment will be created.
|
||||||
|
log.segment.bytes=536870912
|
||||||
|
|
||||||
|
# The interval at which log segments are checked to see if they can be deleted according
|
||||||
|
# to the retention policies
|
||||||
|
log.retention.check.interval.ms=60000
|
||||||
|
|
||||||
|
# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
|
||||||
|
# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
|
||||||
|
log.cleaner.enable=false
|
||||||
|
|
||||||
|
############################# Zookeeper #############################
|
||||||
|
|
||||||
|
# Zookeeper connection string (see zookeeper docs for details).
|
||||||
|
# This is a comma separated host:port pairs, each corresponding to a zk
|
||||||
|
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
|
||||||
|
# You can also append an optional chroot string to the urls to specify the
|
||||||
|
# root directory for all kafka znodes.
|
||||||
|
zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
|
||||||
|
|
||||||
|
# Timeout in ms for connecting to zookeeper
|
||||||
|
zookeeper.connection.timeout.ms=1000000
|
||||||
|
# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
|
||||||
|
zookeeper.session.timeout.ms=500
|
||||||
24
servers/0.8.2.1/resources/log4j.properties
Normal file
24
servers/0.8.2.1/resources/log4j.properties
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
log4j.rootLogger=INFO, stdout
|
||||||
|
|
||||||
|
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
|
||||||
|
|
||||||
|
log4j.logger.kafka=DEBUG, stdout
|
||||||
|
log4j.logger.org.I0Itec.zkclient.ZkClient=INFO, stdout
|
||||||
|
log4j.logger.org.apache.zookeeper=INFO, stdout
|
||||||
21
servers/0.8.2.1/resources/zookeeper.properties
Normal file
21
servers/0.8.2.1/resources/zookeeper.properties
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# the directory where the snapshot is stored.
|
||||||
|
dataDir={tmp_dir}
|
||||||
|
# the port at which the clients will connect
|
||||||
|
clientPort={port}
|
||||||
|
clientPortAddress={host}
|
||||||
|
# disable the per-ip limit on the number of connections since this is a non-production config
|
||||||
|
maxClientCnxns=0
|
||||||
124
servers/0.8.2.2/resources/kafka.properties
Normal file
124
servers/0.8.2.2/resources/kafka.properties
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# see kafka.server.KafkaConfig for additional details and defaults
|
||||||
|
|
||||||
|
############################# Server Basics #############################
|
||||||
|
|
||||||
|
# The id of the broker. This must be set to a unique integer for each broker.
|
||||||
|
broker.id={broker_id}
|
||||||
|
|
||||||
|
############################# Socket Server Settings #############################
|
||||||
|
|
||||||
|
# The port the socket server listens on
|
||||||
|
port={port}
|
||||||
|
|
||||||
|
# Hostname the broker will bind to. If not set, the server will bind to all interfaces
|
||||||
|
host.name={host}
|
||||||
|
|
||||||
|
# Hostname the broker will advertise to producers and consumers. If not set, it uses the
|
||||||
|
# value for "host.name" if configured. Otherwise, it will use the value returned from
|
||||||
|
# java.net.InetAddress.getCanonicalHostName().
|
||||||
|
#advertised.host.name=<hostname routable by clients>
|
||||||
|
|
||||||
|
# The port to publish to ZooKeeper for clients to use. If this is not set,
|
||||||
|
# it will publish the same port that the broker binds to.
|
||||||
|
#advertised.port=<port accessible by clients>
|
||||||
|
|
||||||
|
# The number of threads handling network requests
|
||||||
|
num.network.threads=2
|
||||||
|
|
||||||
|
# The number of threads doing disk I/O
|
||||||
|
num.io.threads=8
|
||||||
|
|
||||||
|
# The send buffer (SO_SNDBUF) used by the socket server
|
||||||
|
socket.send.buffer.bytes=1048576
|
||||||
|
|
||||||
|
# The receive buffer (SO_RCVBUF) used by the socket server
|
||||||
|
socket.receive.buffer.bytes=1048576
|
||||||
|
|
||||||
|
# The maximum size of a request that the socket server will accept (protection against OOM)
|
||||||
|
socket.request.max.bytes=104857600
|
||||||
|
|
||||||
|
|
||||||
|
############################# Log Basics #############################
|
||||||
|
|
||||||
|
# A comma seperated list of directories under which to store log files
|
||||||
|
log.dirs={tmp_dir}/data
|
||||||
|
|
||||||
|
# The default number of log partitions per topic. More partitions allow greater
|
||||||
|
# parallelism for consumption, but this will also result in more files across
|
||||||
|
# the brokers.
|
||||||
|
num.partitions={partitions}
|
||||||
|
default.replication.factor={replicas}
|
||||||
|
|
||||||
|
## Short Replica Lag -- Drops failed brokers out of ISR
|
||||||
|
replica.lag.time.max.ms=1000
|
||||||
|
replica.socket.timeout.ms=1000
|
||||||
|
|
||||||
|
############################# Log Flush Policy #############################
|
||||||
|
|
||||||
|
# Messages are immediately written to the filesystem but by default we only fsync() to sync
|
||||||
|
# the OS cache lazily. The following configurations control the flush of data to disk.
|
||||||
|
# There are a few important trade-offs here:
|
||||||
|
# 1. Durability: Unflushed data may be lost if you are not using replication.
|
||||||
|
# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
|
||||||
|
# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
|
||||||
|
# The settings below allow one to configure the flush policy to flush data after a period of time or
|
||||||
|
# every N messages (or both). This can be done globally and overridden on a per-topic basis.
|
||||||
|
|
||||||
|
# The number of messages to accept before forcing a flush of data to disk
|
||||||
|
#log.flush.interval.messages=10000
|
||||||
|
|
||||||
|
# The maximum amount of time a message can sit in a log before we force a flush
|
||||||
|
#log.flush.interval.ms=1000
|
||||||
|
|
||||||
|
############################# Log Retention Policy #############################
|
||||||
|
|
||||||
|
# The following configurations control the disposal of log segments. The policy can
|
||||||
|
# be set to delete segments after a period of time, or after a given size has accumulated.
|
||||||
|
# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
|
||||||
|
# from the end of the log.
|
||||||
|
|
||||||
|
# The minimum age of a log file to be eligible for deletion
|
||||||
|
log.retention.hours=168
|
||||||
|
|
||||||
|
# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
|
||||||
|
# segments don't drop below log.retention.bytes.
|
||||||
|
#log.retention.bytes=1073741824
|
||||||
|
|
||||||
|
# The maximum size of a log segment file. When this size is reached a new log segment will be created.
|
||||||
|
log.segment.bytes=536870912
|
||||||
|
|
||||||
|
# The interval at which log segments are checked to see if they can be deleted according
|
||||||
|
# to the retention policies
|
||||||
|
log.retention.check.interval.ms=60000
|
||||||
|
|
||||||
|
# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
|
||||||
|
# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
|
||||||
|
log.cleaner.enable=false
|
||||||
|
|
||||||
|
############################# Zookeeper #############################
|
||||||
|
|
||||||
|
# Zookeeper connection string (see zookeeper docs for details).
|
||||||
|
# This is a comma separated host:port pairs, each corresponding to a zk
|
||||||
|
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
|
||||||
|
# You can also append an optional chroot string to the urls to specify the
|
||||||
|
# root directory for all kafka znodes.
|
||||||
|
zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
|
||||||
|
|
||||||
|
# Timeout in ms for connecting to zookeeper
|
||||||
|
zookeeper.connection.timeout.ms=1000000
|
||||||
|
# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
|
||||||
|
zookeeper.session.timeout.ms=500
|
||||||
24
servers/0.8.2.2/resources/log4j.properties
Normal file
24
servers/0.8.2.2/resources/log4j.properties
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
log4j.rootLogger=INFO, stdout
|
||||||
|
|
||||||
|
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
|
||||||
|
|
||||||
|
log4j.logger.kafka=DEBUG, stdout
|
||||||
|
log4j.logger.org.I0Itec.zkclient.ZkClient=INFO, stdout
|
||||||
|
log4j.logger.org.apache.zookeeper=INFO, stdout
|
||||||
21
servers/0.8.2.2/resources/zookeeper.properties
Normal file
21
servers/0.8.2.2/resources/zookeeper.properties
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# the directory where the snapshot is stored.
|
||||||
|
dataDir={tmp_dir}
|
||||||
|
# the port at which the clients will connect
|
||||||
|
clientPort={port}
|
||||||
|
clientPortAddress={host}
|
||||||
|
# disable the per-ip limit on the number of connections since this is a non-production config
|
||||||
|
maxClientCnxns=0
|
||||||
124
servers/0.9.0.0/resources/kafka.properties
Normal file
124
servers/0.9.0.0/resources/kafka.properties
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# see kafka.server.KafkaConfig for additional details and defaults
|
||||||
|
|
||||||
|
############################# Server Basics #############################
|
||||||
|
|
||||||
|
# The id of the broker. This must be set to a unique integer for each broker.
|
||||||
|
broker.id={broker_id}
|
||||||
|
|
||||||
|
############################# Socket Server Settings #############################
|
||||||
|
|
||||||
|
# The port the socket server listens on
|
||||||
|
port={port}
|
||||||
|
|
||||||
|
# Hostname the broker will bind to. If not set, the server will bind to all interfaces
|
||||||
|
host.name={host}
|
||||||
|
|
||||||
|
# Hostname the broker will advertise to producers and consumers. If not set, it uses the
|
||||||
|
# value for "host.name" if configured. Otherwise, it will use the value returned from
|
||||||
|
# java.net.InetAddress.getCanonicalHostName().
|
||||||
|
#advertised.host.name=<hostname routable by clients>
|
||||||
|
|
||||||
|
# The port to publish to ZooKeeper for clients to use. If this is not set,
|
||||||
|
# it will publish the same port that the broker binds to.
|
||||||
|
#advertised.port=<port accessible by clients>
|
||||||
|
|
||||||
|
# The number of threads handling network requests
|
||||||
|
num.network.threads=2
|
||||||
|
|
||||||
|
# The number of threads doing disk I/O
|
||||||
|
num.io.threads=8
|
||||||
|
|
||||||
|
# The send buffer (SO_SNDBUF) used by the socket server
|
||||||
|
socket.send.buffer.bytes=1048576
|
||||||
|
|
||||||
|
# The receive buffer (SO_RCVBUF) used by the socket server
|
||||||
|
socket.receive.buffer.bytes=1048576
|
||||||
|
|
||||||
|
# The maximum size of a request that the socket server will accept (protection against OOM)
|
||||||
|
socket.request.max.bytes=104857600
|
||||||
|
|
||||||
|
|
||||||
|
############################# Log Basics #############################
|
||||||
|
|
||||||
|
# A comma seperated list of directories under which to store log files
|
||||||
|
log.dirs={tmp_dir}/data
|
||||||
|
|
||||||
|
# The default number of log partitions per topic. More partitions allow greater
|
||||||
|
# parallelism for consumption, but this will also result in more files across
|
||||||
|
# the brokers.
|
||||||
|
num.partitions={partitions}
|
||||||
|
default.replication.factor={replicas}
|
||||||
|
|
||||||
|
## Short Replica Lag -- Drops failed brokers out of ISR
|
||||||
|
replica.lag.time.max.ms=1000
|
||||||
|
replica.socket.timeout.ms=1000
|
||||||
|
|
||||||
|
############################# Log Flush Policy #############################
|
||||||
|
|
||||||
|
# Messages are immediately written to the filesystem but by default we only fsync() to sync
|
||||||
|
# the OS cache lazily. The following configurations control the flush of data to disk.
|
||||||
|
# There are a few important trade-offs here:
|
||||||
|
# 1. Durability: Unflushed data may be lost if you are not using replication.
|
||||||
|
# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
|
||||||
|
# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
|
||||||
|
# The settings below allow one to configure the flush policy to flush data after a period of time or
|
||||||
|
# every N messages (or both). This can be done globally and overridden on a per-topic basis.
|
||||||
|
|
||||||
|
# The number of messages to accept before forcing a flush of data to disk
|
||||||
|
#log.flush.interval.messages=10000
|
||||||
|
|
||||||
|
# The maximum amount of time a message can sit in a log before we force a flush
|
||||||
|
#log.flush.interval.ms=1000
|
||||||
|
|
||||||
|
############################# Log Retention Policy #############################
|
||||||
|
|
||||||
|
# The following configurations control the disposal of log segments. The policy can
|
||||||
|
# be set to delete segments after a period of time, or after a given size has accumulated.
|
||||||
|
# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
|
||||||
|
# from the end of the log.
|
||||||
|
|
||||||
|
# The minimum age of a log file to be eligible for deletion
|
||||||
|
log.retention.hours=168
|
||||||
|
|
||||||
|
# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
|
||||||
|
# segments don't drop below log.retention.bytes.
|
||||||
|
#log.retention.bytes=1073741824
|
||||||
|
|
||||||
|
# The maximum size of a log segment file. When this size is reached a new log segment will be created.
|
||||||
|
log.segment.bytes=536870912
|
||||||
|
|
||||||
|
# The interval at which log segments are checked to see if they can be deleted according
|
||||||
|
# to the retention policies
|
||||||
|
log.retention.check.interval.ms=60000
|
||||||
|
|
||||||
|
# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
|
||||||
|
# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
|
||||||
|
log.cleaner.enable=false
|
||||||
|
|
||||||
|
############################# Zookeeper #############################
|
||||||
|
|
||||||
|
# Zookeeper connection string (see zookeeper docs for details).
|
||||||
|
# This is a comma separated host:port pairs, each corresponding to a zk
|
||||||
|
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
|
||||||
|
# You can also append an optional chroot string to the urls to specify the
|
||||||
|
# root directory for all kafka znodes.
|
||||||
|
zookeeper.connect={zk_host}:{zk_port}/{zk_chroot}
|
||||||
|
|
||||||
|
# Timeout in ms for connecting to zookeeper
|
||||||
|
zookeeper.connection.timeout.ms=1000000
|
||||||
|
# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly
|
||||||
|
zookeeper.session.timeout.ms=500
|
||||||
24
servers/0.9.0.0/resources/log4j.properties
Normal file
24
servers/0.9.0.0/resources/log4j.properties
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
log4j.rootLogger=INFO, stdout
|
||||||
|
|
||||||
|
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
|
||||||
|
|
||||||
|
log4j.logger.kafka=DEBUG, stdout
|
||||||
|
log4j.logger.org.I0Itec.zkclient.ZkClient=INFO, stdout
|
||||||
|
log4j.logger.org.apache.zookeeper=INFO, stdout
|
||||||
21
servers/0.9.0.0/resources/zookeeper.properties
Normal file
21
servers/0.9.0.0/resources/zookeeper.properties
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# the directory where the snapshot is stored.
|
||||||
|
dataDir={tmp_dir}
|
||||||
|
# the port at which the clients will connect
|
||||||
|
clientPort={port}
|
||||||
|
clientPortAddress={host}
|
||||||
|
# disable the per-ip limit on the number of connections since this is a non-production config
|
||||||
|
maxClientCnxns=0
|
||||||
31
setup.py
31
setup.py
@@ -1,10 +1,10 @@
|
|||||||
import sys
|
import sys
|
||||||
|
import os
|
||||||
from setuptools import setup, Command
|
from setuptools import setup, Command
|
||||||
|
|
||||||
with open('VERSION', 'r') as v:
|
# Pull version from source without importing
|
||||||
__version__ = v.read().rstrip()
|
# since we can't import something we haven't built yet :)
|
||||||
|
exec(open('kafka/version.py').read())
|
||||||
|
|
||||||
class Tox(Command):
|
class Tox(Command):
|
||||||
|
|
||||||
@@ -26,6 +26,10 @@ test_require = ['tox', 'mock']
|
|||||||
if sys.version_info < (2, 7):
|
if sys.version_info < (2, 7):
|
||||||
test_require.append('unittest2')
|
test_require.append('unittest2')
|
||||||
|
|
||||||
|
here = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
with open(os.path.join(here, 'README.rst')) as f:
|
||||||
|
README = f.read()
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="kafka-python",
|
name="kafka-python",
|
||||||
@@ -41,20 +45,15 @@ setup(
|
|||||||
"kafka.producer",
|
"kafka.producer",
|
||||||
],
|
],
|
||||||
|
|
||||||
author="David Arthur",
|
author="Dana Powers",
|
||||||
author_email="mumrah@gmail.com",
|
author_email="dana.powers@gmail.com",
|
||||||
url="https://github.com/mumrah/kafka-python",
|
url="https://github.com/dpkp/kafka-python",
|
||||||
license="Apache License 2.0",
|
license="Apache License 2.0",
|
||||||
description="Pure Python client for Apache Kafka",
|
description="Pure Python client for Apache Kafka",
|
||||||
long_description="""
|
long_description=README,
|
||||||
This module provides low-level protocol support for Apache Kafka as well as
|
|
||||||
high-level consumer and producer classes. Request batching is supported by the
|
|
||||||
protocol as well as broker-aware request routing. Gzip and Snappy compression
|
|
||||||
is also supported for message sets.
|
|
||||||
""",
|
|
||||||
keywords="apache kafka",
|
keywords="apache kafka",
|
||||||
install_requires=['six'],
|
install_requires=['six'],
|
||||||
classifiers = [
|
classifiers=[
|
||||||
"Development Status :: 4 - Beta",
|
"Development Status :: 4 - Beta",
|
||||||
"Intended Audience :: Developers",
|
"Intended Audience :: Developers",
|
||||||
"License :: OSI Approved :: Apache Software License",
|
"License :: OSI Approved :: Apache Software License",
|
||||||
@@ -62,6 +61,10 @@ is also supported for message sets.
|
|||||||
"Programming Language :: Python :: 2",
|
"Programming Language :: Python :: 2",
|
||||||
"Programming Language :: Python :: 2.6",
|
"Programming Language :: Python :: 2.6",
|
||||||
"Programming Language :: Python :: 2.7",
|
"Programming Language :: Python :: 2.7",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.3",
|
||||||
|
"Programming Language :: Python :: 3.4",
|
||||||
|
"Programming Language :: Python :: 3.5",
|
||||||
"Programming Language :: Python :: Implementation :: PyPy",
|
"Programming Language :: Python :: Implementation :: PyPy",
|
||||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -4,13 +4,18 @@ import os.path
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
from six.moves import urllib
|
from six.moves import urllib
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from six.moves.urllib.parse import urlparse # pylint: disable-msg=E0611
|
from six.moves.urllib.parse import urlparse # pylint: disable-msg=E0611,F0401
|
||||||
from test.service import ExternalService, SpawnedService
|
from test.service import ExternalService, SpawnedService
|
||||||
from test.testutil import get_open_port
|
from test.testutil import get_open_port
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Fixture(object):
|
class Fixture(object):
|
||||||
kafka_version = os.environ.get('KAFKA_VERSION', '0.8.0')
|
kafka_version = os.environ.get('KAFKA_VERSION', '0.8.0')
|
||||||
scala_version = os.environ.get("SCALA_VERSION", '2.8.0')
|
scala_version = os.environ.get("SCALA_VERSION", '2.8.0')
|
||||||
@@ -35,21 +40,21 @@ class Fixture(object):
|
|||||||
output_file = os.path.join(output_dir, distfile + '.tgz')
|
output_file = os.path.join(output_dir, distfile + '.tgz')
|
||||||
|
|
||||||
if os.path.isfile(output_file):
|
if os.path.isfile(output_file):
|
||||||
logging.info("Found file already on disk: %s", output_file)
|
log.info("Found file already on disk: %s", output_file)
|
||||||
return output_file
|
return output_file
|
||||||
|
|
||||||
# New tarballs are .tgz, older ones are sometimes .tar.gz
|
# New tarballs are .tgz, older ones are sometimes .tar.gz
|
||||||
try:
|
try:
|
||||||
url = url_base + distfile + '.tgz'
|
url = url_base + distfile + '.tgz'
|
||||||
logging.info("Attempting to download %s", url)
|
log.info("Attempting to download %s", url)
|
||||||
response = urllib.request.urlopen(url)
|
response = urllib.request.urlopen(url)
|
||||||
except urllib.error.HTTPError:
|
except urllib.error.HTTPError:
|
||||||
logging.exception("HTTP Error")
|
log.exception("HTTP Error")
|
||||||
url = url_base + distfile + '.tar.gz'
|
url = url_base + distfile + '.tar.gz'
|
||||||
logging.info("Attempting to download %s", url)
|
log.info("Attempting to download %s", url)
|
||||||
response = urllib.request.urlopen(url)
|
response = urllib.request.urlopen(url)
|
||||||
|
|
||||||
logging.info("Saving distribution file to %s", output_file)
|
log.info("Saving distribution file to %s", output_file)
|
||||||
with open(output_file, 'w') as output_file_fd:
|
with open(output_file, 'w') as output_file_fd:
|
||||||
output_file_fd.write(response.read())
|
output_file_fd.write(response.read())
|
||||||
|
|
||||||
@@ -101,14 +106,14 @@ class ZookeeperFixture(Fixture):
|
|||||||
self.child = None
|
self.child = None
|
||||||
|
|
||||||
def out(self, message):
|
def out(self, message):
|
||||||
logging.info("*** Zookeeper [%s:%d]: %s", self.host, self.port, message)
|
log.info("*** Zookeeper [%s:%d]: %s", self.host, self.port, message)
|
||||||
|
|
||||||
def open(self):
|
def open(self):
|
||||||
self.tmp_dir = tempfile.mkdtemp()
|
self.tmp_dir = tempfile.mkdtemp()
|
||||||
self.out("Running local instance...")
|
self.out("Running local instance...")
|
||||||
logging.info(" host = %s", self.host)
|
log.info(" host = %s", self.host)
|
||||||
logging.info(" port = %s", self.port)
|
log.info(" port = %s", self.port)
|
||||||
logging.info(" tmp_dir = %s", self.tmp_dir)
|
log.info(" tmp_dir = %s", self.tmp_dir)
|
||||||
|
|
||||||
# Generate configs
|
# Generate configs
|
||||||
template = self.test_resource("zookeeper.properties")
|
template = self.test_resource("zookeeper.properties")
|
||||||
@@ -118,12 +123,21 @@ class ZookeeperFixture(Fixture):
|
|||||||
# Configure Zookeeper child process
|
# Configure Zookeeper child process
|
||||||
args = self.kafka_run_class_args("org.apache.zookeeper.server.quorum.QuorumPeerMain", properties)
|
args = self.kafka_run_class_args("org.apache.zookeeper.server.quorum.QuorumPeerMain", properties)
|
||||||
env = self.kafka_run_class_env()
|
env = self.kafka_run_class_env()
|
||||||
self.child = SpawnedService(args, env)
|
|
||||||
|
|
||||||
# Party!
|
# Party!
|
||||||
self.out("Starting...")
|
self.out("Starting...")
|
||||||
self.child.start()
|
timeout = 5
|
||||||
self.child.wait_for(r"binding to port")
|
max_timeout = 30
|
||||||
|
backoff = 1
|
||||||
|
while True:
|
||||||
|
self.child = SpawnedService(args, env)
|
||||||
|
self.child.start()
|
||||||
|
timeout = min(timeout, max_timeout)
|
||||||
|
if self.child.wait_for(r"binding to port", timeout=timeout):
|
||||||
|
break
|
||||||
|
self.child.stop()
|
||||||
|
timeout *= 2
|
||||||
|
time.sleep(backoff)
|
||||||
self.out("Done!")
|
self.out("Done!")
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
@@ -167,7 +181,7 @@ class KafkaFixture(Fixture):
|
|||||||
self.running = False
|
self.running = False
|
||||||
|
|
||||||
def out(self, message):
|
def out(self, message):
|
||||||
logging.info("*** Kafka [%s:%d]: %s", self.host, self.port, message)
|
log.info("*** Kafka [%s:%d]: %s", self.host, self.port, message)
|
||||||
|
|
||||||
def open(self):
|
def open(self):
|
||||||
if self.running:
|
if self.running:
|
||||||
@@ -176,15 +190,15 @@ class KafkaFixture(Fixture):
|
|||||||
|
|
||||||
self.tmp_dir = tempfile.mkdtemp()
|
self.tmp_dir = tempfile.mkdtemp()
|
||||||
self.out("Running local instance...")
|
self.out("Running local instance...")
|
||||||
logging.info(" host = %s", self.host)
|
log.info(" host = %s", self.host)
|
||||||
logging.info(" port = %s", self.port)
|
log.info(" port = %s", self.port)
|
||||||
logging.info(" broker_id = %s", self.broker_id)
|
log.info(" broker_id = %s", self.broker_id)
|
||||||
logging.info(" zk_host = %s", self.zk_host)
|
log.info(" zk_host = %s", self.zk_host)
|
||||||
logging.info(" zk_port = %s", self.zk_port)
|
log.info(" zk_port = %s", self.zk_port)
|
||||||
logging.info(" zk_chroot = %s", self.zk_chroot)
|
log.info(" zk_chroot = %s", self.zk_chroot)
|
||||||
logging.info(" replicas = %s", self.replicas)
|
log.info(" replicas = %s", self.replicas)
|
||||||
logging.info(" partitions = %s", self.partitions)
|
log.info(" partitions = %s", self.partitions)
|
||||||
logging.info(" tmp_dir = %s", self.tmp_dir)
|
log.info(" tmp_dir = %s", self.tmp_dir)
|
||||||
|
|
||||||
# Create directories
|
# Create directories
|
||||||
os.mkdir(os.path.join(self.tmp_dir, "logs"))
|
os.mkdir(os.path.join(self.tmp_dir, "logs"))
|
||||||
@@ -195,11 +209,6 @@ class KafkaFixture(Fixture):
|
|||||||
properties = os.path.join(self.tmp_dir, "kafka.properties")
|
properties = os.path.join(self.tmp_dir, "kafka.properties")
|
||||||
self.render_template(template, properties, vars(self))
|
self.render_template(template, properties, vars(self))
|
||||||
|
|
||||||
# Configure Kafka child process
|
|
||||||
args = self.kafka_run_class_args("kafka.Kafka", properties)
|
|
||||||
env = self.kafka_run_class_env()
|
|
||||||
self.child = SpawnedService(args, env)
|
|
||||||
|
|
||||||
# Party!
|
# Party!
|
||||||
self.out("Creating Zookeeper chroot node...")
|
self.out("Creating Zookeeper chroot node...")
|
||||||
args = self.kafka_run_class_args("org.apache.zookeeper.ZooKeeperMain",
|
args = self.kafka_run_class_args("org.apache.zookeeper.ZooKeeperMain",
|
||||||
@@ -218,8 +227,24 @@ class KafkaFixture(Fixture):
|
|||||||
self.out("Done!")
|
self.out("Done!")
|
||||||
|
|
||||||
self.out("Starting...")
|
self.out("Starting...")
|
||||||
self.child.start()
|
|
||||||
self.child.wait_for(r"\[Kafka Server %d\], Started" % self.broker_id)
|
# Configure Kafka child process
|
||||||
|
args = self.kafka_run_class_args("kafka.Kafka", properties)
|
||||||
|
env = self.kafka_run_class_env()
|
||||||
|
|
||||||
|
timeout = 5
|
||||||
|
max_timeout = 30
|
||||||
|
backoff = 1
|
||||||
|
while True:
|
||||||
|
self.child = SpawnedService(args, env)
|
||||||
|
self.child.start()
|
||||||
|
timeout = min(timeout, max_timeout)
|
||||||
|
if self.child.wait_for(r"\[Kafka Server %d\], Started" %
|
||||||
|
self.broker_id, timeout=timeout):
|
||||||
|
break
|
||||||
|
self.child.stop()
|
||||||
|
timeout *= 2
|
||||||
|
time.sleep(backoff)
|
||||||
self.out("Done!")
|
self.out("Done!")
|
||||||
self.running = True
|
self.running = True
|
||||||
|
|
||||||
|
|||||||
@@ -11,9 +11,13 @@ __all__ = [
|
|||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ExternalService(object):
|
class ExternalService(object):
|
||||||
def __init__(self, host, port):
|
def __init__(self, host, port):
|
||||||
logging.info("Using already running service at %s:%d", host, port)
|
log.info("Using already running service at %s:%d", host, port)
|
||||||
self.host = host
|
self.host = host
|
||||||
self.port = port
|
self.port = port
|
||||||
|
|
||||||
@@ -36,19 +40,38 @@ class SpawnedService(threading.Thread):
|
|||||||
self.captured_stderr = []
|
self.captured_stderr = []
|
||||||
|
|
||||||
self.should_die = threading.Event()
|
self.should_die = threading.Event()
|
||||||
|
self.child = None
|
||||||
|
self.alive = False
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.run_with_handles()
|
self.run_with_handles()
|
||||||
|
|
||||||
def run_with_handles(self):
|
def _spawn(self):
|
||||||
|
if self.alive: return
|
||||||
|
if self.child and self.child.poll() is None: return
|
||||||
|
|
||||||
self.child = subprocess.Popen(
|
self.child = subprocess.Popen(
|
||||||
self.args,
|
self.args,
|
||||||
env=self.env,
|
env=self.env,
|
||||||
bufsize=1,
|
bufsize=1,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE)
|
||||||
alive = True
|
self.alive = True
|
||||||
|
|
||||||
|
def _despawn(self):
|
||||||
|
if self.child.poll() is None:
|
||||||
|
self.child.terminate()
|
||||||
|
self.alive = False
|
||||||
|
for _ in range(50):
|
||||||
|
if self.child.poll() is not None:
|
||||||
|
self.child = None
|
||||||
|
break
|
||||||
|
time.sleep(0.1)
|
||||||
|
else:
|
||||||
|
self.child.kill()
|
||||||
|
|
||||||
|
def run_with_handles(self):
|
||||||
|
self._spawn()
|
||||||
while True:
|
while True:
|
||||||
(rds, _, _) = select.select([self.child.stdout, self.child.stderr], [], [], 1)
|
(rds, _, _) = select.select([self.child.stdout, self.child.stderr], [], [], 1)
|
||||||
|
|
||||||
@@ -60,26 +83,22 @@ class SpawnedService(threading.Thread):
|
|||||||
line = self.child.stderr.readline()
|
line = self.child.stderr.readline()
|
||||||
self.captured_stderr.append(line.decode('utf-8'))
|
self.captured_stderr.append(line.decode('utf-8'))
|
||||||
|
|
||||||
if self.should_die.is_set():
|
if self.child.poll() is not None:
|
||||||
self.child.terminate()
|
self.dump_logs()
|
||||||
alive = False
|
self._spawn()
|
||||||
|
|
||||||
poll_results = self.child.poll()
|
if self.should_die.is_set():
|
||||||
if poll_results is not None:
|
self._despawn()
|
||||||
if not alive:
|
break
|
||||||
break
|
|
||||||
else:
|
|
||||||
self.dump_logs()
|
|
||||||
raise RuntimeError("Subprocess has died. Aborting. (args=%s)" % ' '.join(str(x) for x in self.args))
|
|
||||||
|
|
||||||
def dump_logs(self):
|
def dump_logs(self):
|
||||||
logging.critical('stderr')
|
log.critical('stderr')
|
||||||
for line in self.captured_stderr:
|
for line in self.captured_stderr:
|
||||||
logging.critical(line.rstrip())
|
log.critical(line.rstrip())
|
||||||
|
|
||||||
logging.critical('stdout')
|
log.critical('stdout')
|
||||||
for line in self.captured_stdout:
|
for line in self.captured_stdout:
|
||||||
logging.critical(line.rstrip())
|
log.critical(line.rstrip())
|
||||||
|
|
||||||
def wait_for(self, pattern, timeout=30):
|
def wait_for(self, pattern, timeout=30):
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
@@ -89,17 +108,18 @@ class SpawnedService(threading.Thread):
|
|||||||
try:
|
try:
|
||||||
self.child.kill()
|
self.child.kill()
|
||||||
except:
|
except:
|
||||||
logging.exception("Received exception when killing child process")
|
log.exception("Received exception when killing child process")
|
||||||
self.dump_logs()
|
self.dump_logs()
|
||||||
|
|
||||||
raise RuntimeError("Waiting for %r timed out after %d seconds" % (pattern, timeout))
|
log.error("Waiting for %r timed out after %d seconds", pattern, timeout)
|
||||||
|
return False
|
||||||
|
|
||||||
if re.search(pattern, '\n'.join(self.captured_stdout), re.IGNORECASE) is not None:
|
if re.search(pattern, '\n'.join(self.captured_stdout), re.IGNORECASE) is not None:
|
||||||
logging.info("Found pattern %r in %d seconds via stdout", pattern, (t2 - t1))
|
log.info("Found pattern %r in %d seconds via stdout", pattern, (t2 - t1))
|
||||||
return
|
return True
|
||||||
if re.search(pattern, '\n'.join(self.captured_stderr), re.IGNORECASE) is not None:
|
if re.search(pattern, '\n'.join(self.captured_stderr), re.IGNORECASE) is not None:
|
||||||
logging.info("Found pattern %r in %d seconds via stderr", pattern, (t2 - t1))
|
log.info("Found pattern %r in %d seconds via stderr", pattern, (t2 - t1))
|
||||||
return
|
return True
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
|||||||
@@ -117,21 +117,21 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
]
|
]
|
||||||
|
|
||||||
topics = [
|
topics = [
|
||||||
TopicMetadata('topic_1', NO_ERROR, [
|
TopicMetadata(b'topic_1', NO_ERROR, [
|
||||||
PartitionMetadata('topic_1', 0, 1, [1, 2], [1, 2], NO_ERROR)
|
PartitionMetadata(b'topic_1', 0, 1, [1, 2], [1, 2], NO_ERROR)
|
||||||
]),
|
]),
|
||||||
TopicMetadata('topic_noleader', NO_ERROR, [
|
TopicMetadata(b'topic_noleader', NO_ERROR, [
|
||||||
PartitionMetadata('topic_noleader', 0, -1, [], [],
|
PartitionMetadata(b'topic_noleader', 0, -1, [], [],
|
||||||
NO_LEADER),
|
NO_LEADER),
|
||||||
PartitionMetadata('topic_noleader', 1, -1, [], [],
|
PartitionMetadata(b'topic_noleader', 1, -1, [], [],
|
||||||
NO_LEADER),
|
NO_LEADER),
|
||||||
]),
|
]),
|
||||||
TopicMetadata('topic_no_partitions', NO_LEADER, []),
|
TopicMetadata(b'topic_no_partitions', NO_LEADER, []),
|
||||||
TopicMetadata('topic_unknown', UNKNOWN_TOPIC_OR_PARTITION, []),
|
TopicMetadata(b'topic_unknown', UNKNOWN_TOPIC_OR_PARTITION, []),
|
||||||
TopicMetadata('topic_3', NO_ERROR, [
|
TopicMetadata(b'topic_3', NO_ERROR, [
|
||||||
PartitionMetadata('topic_3', 0, 0, [0, 1], [0, 1], NO_ERROR),
|
PartitionMetadata(b'topic_3', 0, 0, [0, 1], [0, 1], NO_ERROR),
|
||||||
PartitionMetadata('topic_3', 1, 1, [1, 0], [1, 0], NO_ERROR),
|
PartitionMetadata(b'topic_3', 1, 1, [1, 0], [1, 0], NO_ERROR),
|
||||||
PartitionMetadata('topic_3', 2, 0, [0, 1], [0, 1], NO_ERROR)
|
PartitionMetadata(b'topic_3', 2, 0, [0, 1], [0, 1], NO_ERROR)
|
||||||
])
|
])
|
||||||
]
|
]
|
||||||
protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
|
protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
|
||||||
@@ -139,12 +139,12 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
# client loads metadata at init
|
# client loads metadata at init
|
||||||
client = KafkaClient(hosts=['broker_1:4567'])
|
client = KafkaClient(hosts=['broker_1:4567'])
|
||||||
self.assertDictEqual({
|
self.assertDictEqual({
|
||||||
TopicAndPartition('topic_1', 0): brokers[1],
|
TopicAndPartition(b'topic_1', 0): brokers[1],
|
||||||
TopicAndPartition('topic_noleader', 0): None,
|
TopicAndPartition(b'topic_noleader', 0): None,
|
||||||
TopicAndPartition('topic_noleader', 1): None,
|
TopicAndPartition(b'topic_noleader', 1): None,
|
||||||
TopicAndPartition('topic_3', 0): brokers[0],
|
TopicAndPartition(b'topic_3', 0): brokers[0],
|
||||||
TopicAndPartition('topic_3', 1): brokers[1],
|
TopicAndPartition(b'topic_3', 1): brokers[1],
|
||||||
TopicAndPartition('topic_3', 2): brokers[0]},
|
TopicAndPartition(b'topic_3', 2): brokers[0]},
|
||||||
client.topics_to_brokers)
|
client.topics_to_brokers)
|
||||||
|
|
||||||
# if we ask for metadata explicitly, it should raise errors
|
# if we ask for metadata explicitly, it should raise errors
|
||||||
@@ -156,6 +156,7 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
|
|
||||||
# This should not raise
|
# This should not raise
|
||||||
client.load_metadata_for_topics('topic_no_leader')
|
client.load_metadata_for_topics('topic_no_leader')
|
||||||
|
client.load_metadata_for_topics(b'topic_no_leader')
|
||||||
|
|
||||||
@patch('kafka.client.KafkaConnection')
|
@patch('kafka.client.KafkaConnection')
|
||||||
@patch('kafka.client.KafkaProtocol')
|
@patch('kafka.client.KafkaProtocol')
|
||||||
@@ -169,11 +170,11 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
]
|
]
|
||||||
|
|
||||||
topics = [
|
topics = [
|
||||||
TopicMetadata('topic_still_creating', NO_LEADER, []),
|
TopicMetadata(b'topic_still_creating', NO_LEADER, []),
|
||||||
TopicMetadata('topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
|
TopicMetadata(b'topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
|
||||||
TopicMetadata('topic_noleaders', NO_ERROR, [
|
TopicMetadata(b'topic_noleaders', NO_ERROR, [
|
||||||
PartitionMetadata('topic_noleaders', 0, -1, [], [], NO_LEADER),
|
PartitionMetadata(b'topic_noleaders', 0, -1, [], [], NO_LEADER),
|
||||||
PartitionMetadata('topic_noleaders', 1, -1, [], [], NO_LEADER),
|
PartitionMetadata(b'topic_noleaders', 1, -1, [], [], NO_LEADER),
|
||||||
]),
|
]),
|
||||||
]
|
]
|
||||||
protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
|
protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
|
||||||
@@ -188,8 +189,8 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
self.assertTrue(client.has_metadata_for_topic('topic_noleaders'))
|
self.assertTrue(client.has_metadata_for_topic('topic_noleaders'))
|
||||||
|
|
||||||
@patch('kafka.client.KafkaConnection')
|
@patch('kafka.client.KafkaConnection')
|
||||||
@patch('kafka.client.KafkaProtocol')
|
@patch('kafka.client.KafkaProtocol.decode_metadata_response')
|
||||||
def test_ensure_topic_exists(self, protocol, conn):
|
def test_ensure_topic_exists(self, decode_metadata_response, conn):
|
||||||
|
|
||||||
conn.recv.return_value = 'response' # anything but None
|
conn.recv.return_value = 'response' # anything but None
|
||||||
|
|
||||||
@@ -199,14 +200,14 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
]
|
]
|
||||||
|
|
||||||
topics = [
|
topics = [
|
||||||
TopicMetadata('topic_still_creating', NO_LEADER, []),
|
TopicMetadata(b'topic_still_creating', NO_LEADER, []),
|
||||||
TopicMetadata('topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
|
TopicMetadata(b'topic_doesnt_exist', UNKNOWN_TOPIC_OR_PARTITION, []),
|
||||||
TopicMetadata('topic_noleaders', NO_ERROR, [
|
TopicMetadata(b'topic_noleaders', NO_ERROR, [
|
||||||
PartitionMetadata('topic_noleaders', 0, -1, [], [], NO_LEADER),
|
PartitionMetadata(b'topic_noleaders', 0, -1, [], [], NO_LEADER),
|
||||||
PartitionMetadata('topic_noleaders', 1, -1, [], [], NO_LEADER),
|
PartitionMetadata(b'topic_noleaders', 1, -1, [], [], NO_LEADER),
|
||||||
]),
|
]),
|
||||||
]
|
]
|
||||||
protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
|
decode_metadata_response.return_value = MetadataResponse(brokers, topics)
|
||||||
|
|
||||||
client = KafkaClient(hosts=['broker_1:4567'])
|
client = KafkaClient(hosts=['broker_1:4567'])
|
||||||
|
|
||||||
@@ -218,6 +219,7 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
|
|
||||||
# This should not raise
|
# This should not raise
|
||||||
client.ensure_topic_exists('topic_noleaders', timeout=1)
|
client.ensure_topic_exists('topic_noleaders', timeout=1)
|
||||||
|
client.ensure_topic_exists(b'topic_noleaders', timeout=1)
|
||||||
|
|
||||||
@patch('kafka.client.KafkaConnection')
|
@patch('kafka.client.KafkaConnection')
|
||||||
@patch('kafka.client.KafkaProtocol')
|
@patch('kafka.client.KafkaProtocol')
|
||||||
@@ -269,8 +271,8 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
]
|
]
|
||||||
|
|
||||||
topics = [
|
topics = [
|
||||||
TopicMetadata('topic_no_partitions', NO_LEADER, []),
|
TopicMetadata(b'topic_no_partitions', NO_LEADER, []),
|
||||||
TopicMetadata('topic_unknown', UNKNOWN_TOPIC_OR_PARTITION, []),
|
TopicMetadata(b'topic_unknown', UNKNOWN_TOPIC_OR_PARTITION, []),
|
||||||
]
|
]
|
||||||
protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
|
protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
|
||||||
|
|
||||||
@@ -279,10 +281,10 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
self.assertDictEqual({}, client.topics_to_brokers)
|
self.assertDictEqual({}, client.topics_to_brokers)
|
||||||
|
|
||||||
with self.assertRaises(LeaderNotAvailableError):
|
with self.assertRaises(LeaderNotAvailableError):
|
||||||
client._get_leader_for_partition('topic_no_partitions', 0)
|
client._get_leader_for_partition(b'topic_no_partitions', 0)
|
||||||
|
|
||||||
with self.assertRaises(UnknownTopicOrPartitionError):
|
with self.assertRaises(UnknownTopicOrPartitionError):
|
||||||
client._get_leader_for_partition('topic_unknown', 0)
|
client._get_leader_for_partition(b'topic_unknown', 0)
|
||||||
|
|
||||||
@patch('kafka.client.KafkaConnection')
|
@patch('kafka.client.KafkaConnection')
|
||||||
@patch('kafka.client.KafkaProtocol')
|
@patch('kafka.client.KafkaProtocol')
|
||||||
@@ -401,3 +403,11 @@ class TestKafkaClient(unittest.TestCase):
|
|||||||
with self.assertRaises(ConnectionError):
|
with self.assertRaises(ConnectionError):
|
||||||
KafkaConnection("nowhere", 1234, 1.0)
|
KafkaConnection("nowhere", 1234, 1.0)
|
||||||
self.assertGreaterEqual(t.interval, 1.0)
|
self.assertGreaterEqual(t.interval, 1.0)
|
||||||
|
|
||||||
|
def test_correlation_rollover(self):
|
||||||
|
with patch.object(KafkaClient, 'load_metadata_for_topics'):
|
||||||
|
big_num = 2**31 - 3
|
||||||
|
client = KafkaClient(hosts=[], correlation_id=big_num)
|
||||||
|
self.assertEqual(big_num + 1, client._next_id())
|
||||||
|
self.assertEqual(big_num + 2, client._next_id())
|
||||||
|
self.assertEqual(0, client._next_id())
|
||||||
|
|||||||
@@ -2,13 +2,13 @@ import os
|
|||||||
|
|
||||||
from kafka.common import (
|
from kafka.common import (
|
||||||
FetchRequest, OffsetCommitRequest, OffsetFetchRequest,
|
FetchRequest, OffsetCommitRequest, OffsetFetchRequest,
|
||||||
KafkaTimeoutError
|
KafkaTimeoutError, ProduceRequest
|
||||||
)
|
)
|
||||||
|
from kafka.protocol import create_message
|
||||||
|
|
||||||
from test.fixtures import ZookeeperFixture, KafkaFixture
|
from test.fixtures import ZookeeperFixture, KafkaFixture
|
||||||
from test.testutil import (
|
from test.testutil import KafkaIntegrationTestCase, kafka_versions
|
||||||
KafkaIntegrationTestCase, kafka_versions
|
|
||||||
)
|
|
||||||
|
|
||||||
class TestKafkaClientIntegration(KafkaIntegrationTestCase):
|
class TestKafkaClientIntegration(KafkaIntegrationTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -29,11 +29,11 @@ class TestKafkaClientIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_consume_none(self):
|
def test_consume_none(self):
|
||||||
fetch = FetchRequest(self.topic, 0, 0, 1024)
|
fetch = FetchRequest(self.bytes_topic, 0, 0, 1024)
|
||||||
|
|
||||||
fetch_resp, = self.client.send_fetch_request([fetch])
|
fetch_resp, = self.client.send_fetch_request([fetch])
|
||||||
self.assertEqual(fetch_resp.error, 0)
|
self.assertEqual(fetch_resp.error, 0)
|
||||||
self.assertEqual(fetch_resp.topic, self.topic)
|
self.assertEqual(fetch_resp.topic, self.bytes_topic)
|
||||||
self.assertEqual(fetch_resp.partition, 0)
|
self.assertEqual(fetch_resp.partition, 0)
|
||||||
|
|
||||||
messages = list(fetch_resp.messages)
|
messages = list(fetch_resp.messages)
|
||||||
@@ -50,17 +50,46 @@ class TestKafkaClientIntegration(KafkaIntegrationTestCase):
|
|||||||
with self.assertRaises(KafkaTimeoutError):
|
with self.assertRaises(KafkaTimeoutError):
|
||||||
self.client.ensure_topic_exists(b"this_topic_doesnt_exist", timeout=0)
|
self.client.ensure_topic_exists(b"this_topic_doesnt_exist", timeout=0)
|
||||||
|
|
||||||
|
@kafka_versions('all')
|
||||||
|
def test_send_produce_request_maintains_request_response_order(self):
|
||||||
|
|
||||||
|
self.client.ensure_topic_exists(b'foo')
|
||||||
|
self.client.ensure_topic_exists(b'bar')
|
||||||
|
|
||||||
|
requests = [
|
||||||
|
ProduceRequest(
|
||||||
|
b'foo', 0,
|
||||||
|
[create_message(b'a'), create_message(b'b')]),
|
||||||
|
ProduceRequest(
|
||||||
|
b'bar', 1,
|
||||||
|
[create_message(b'a'), create_message(b'b')]),
|
||||||
|
ProduceRequest(
|
||||||
|
b'foo', 1,
|
||||||
|
[create_message(b'a'), create_message(b'b')]),
|
||||||
|
ProduceRequest(
|
||||||
|
b'bar', 0,
|
||||||
|
[create_message(b'a'), create_message(b'b')]),
|
||||||
|
]
|
||||||
|
|
||||||
|
responses = self.client.send_produce_request(requests)
|
||||||
|
while len(responses):
|
||||||
|
request = requests.pop()
|
||||||
|
response = responses.pop()
|
||||||
|
self.assertEqual(request.topic, response.topic)
|
||||||
|
self.assertEqual(request.partition, response.partition)
|
||||||
|
|
||||||
|
|
||||||
####################
|
####################
|
||||||
# Offset Tests #
|
# Offset Tests #
|
||||||
####################
|
####################
|
||||||
|
|
||||||
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.0")
|
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
|
||||||
def test_commit_fetch_offsets(self):
|
def test_commit_fetch_offsets(self):
|
||||||
req = OffsetCommitRequest(self.topic, 0, 42, b"metadata")
|
req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata")
|
||||||
(resp,) = self.client.send_offset_commit_request(b"group", [req])
|
(resp,) = self.client.send_offset_commit_request(b"group", [req])
|
||||||
self.assertEqual(resp.error, 0)
|
self.assertEqual(resp.error, 0)
|
||||||
|
|
||||||
req = OffsetFetchRequest(self.topic, 0)
|
req = OffsetFetchRequest(self.bytes_topic, 0)
|
||||||
(resp,) = self.client.send_offset_fetch_request(b"group", [req])
|
(resp,) = self.client.send_offset_fetch_request(b"group", [req])
|
||||||
self.assertEqual(resp.error, 0)
|
self.assertEqual(resp.error, 0)
|
||||||
self.assertEqual(resp.offset, 42)
|
self.assertEqual(resp.offset, 42)
|
||||||
|
|||||||
@@ -13,16 +13,16 @@ from test.testutil import random_string
|
|||||||
class TestCodec(unittest.TestCase):
|
class TestCodec(unittest.TestCase):
|
||||||
def test_gzip(self):
|
def test_gzip(self):
|
||||||
for i in xrange(1000):
|
for i in xrange(1000):
|
||||||
s1 = random_string(100)
|
b1 = random_string(100).encode('utf-8')
|
||||||
s2 = gzip_decode(gzip_encode(s1))
|
b2 = gzip_decode(gzip_encode(b1))
|
||||||
self.assertEqual(s1, s2)
|
self.assertEqual(b1, b2)
|
||||||
|
|
||||||
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
||||||
def test_snappy(self):
|
def test_snappy(self):
|
||||||
for i in xrange(1000):
|
for i in xrange(1000):
|
||||||
s1 = random_string(100)
|
b1 = random_string(100).encode('utf-8')
|
||||||
s2 = snappy_decode(snappy_encode(s1))
|
b2 = snappy_decode(snappy_encode(b1))
|
||||||
self.assertEqual(s1, s2)
|
self.assertEqual(b1, b2)
|
||||||
|
|
||||||
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
||||||
def test_snappy_detect_xerial(self):
|
def test_snappy_detect_xerial(self):
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
|
import logging
|
||||||
import socket
|
import socket
|
||||||
import struct
|
import struct
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
import mock
|
import mock
|
||||||
from . import unittest
|
from . import unittest
|
||||||
@@ -9,6 +11,10 @@ from kafka.conn import KafkaConnection, collect_hosts, DEFAULT_SOCKET_TIMEOUT_SE
|
|||||||
|
|
||||||
class ConnTest(unittest.TestCase):
|
class ConnTest(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
|
||||||
|
# kafka.conn debug logging is verbose, so only enable in conn tests
|
||||||
|
logging.getLogger('kafka.conn').setLevel(logging.DEBUG)
|
||||||
|
|
||||||
self.config = {
|
self.config = {
|
||||||
'host': 'localhost',
|
'host': 'localhost',
|
||||||
'port': 9090,
|
'port': 9090,
|
||||||
@@ -44,6 +50,11 @@ class ConnTest(unittest.TestCase):
|
|||||||
# Reset any mock counts caused by __init__
|
# Reset any mock counts caused by __init__
|
||||||
self.MockCreateConn.reset_mock()
|
self.MockCreateConn.reset_mock()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
# Return connection logging to INFO
|
||||||
|
logging.getLogger('kafka.conn').setLevel(logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
def test_collect_hosts__happy_path(self):
|
def test_collect_hosts__happy_path(self):
|
||||||
hosts = "localhost:1234,localhost"
|
hosts = "localhost:1234,localhost"
|
||||||
results = collect_hosts(hosts)
|
results = collect_hosts(hosts)
|
||||||
@@ -154,6 +165,23 @@ class ConnTest(unittest.TestCase):
|
|||||||
self.assertEqual(self.conn.recv(self.config['request_id']), self.config['payload'])
|
self.assertEqual(self.conn.recv(self.config['request_id']), self.config['payload'])
|
||||||
self.assertEqual(self.conn.recv(self.config['request_id']), self.config['payload2'])
|
self.assertEqual(self.conn.recv(self.config['request_id']), self.config['payload2'])
|
||||||
|
|
||||||
|
def test_get_connected_socket(self):
|
||||||
|
s = self.conn.get_connected_socket()
|
||||||
|
|
||||||
|
self.assertEqual(s, self.MockCreateConn())
|
||||||
|
|
||||||
|
def test_get_connected_socket_on_dirty_conn(self):
|
||||||
|
# Dirty the connection
|
||||||
|
try:
|
||||||
|
self.conn._raise_connection_error()
|
||||||
|
except ConnectionError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Test that get_connected_socket tries to connect
|
||||||
|
self.assertEqual(self.MockCreateConn.call_count, 0)
|
||||||
|
self.conn.get_connected_socket()
|
||||||
|
self.assertEqual(self.MockCreateConn.call_count, 1)
|
||||||
|
|
||||||
def test_close__object_is_reusable(self):
|
def test_close__object_is_reusable(self):
|
||||||
|
|
||||||
# test that sending to a closed connection
|
# test that sending to a closed connection
|
||||||
@@ -162,3 +190,54 @@ class ConnTest(unittest.TestCase):
|
|||||||
self.conn.send(self.config['request_id'], self.config['payload'])
|
self.conn.send(self.config['request_id'], self.config['payload'])
|
||||||
self.assertEqual(self.MockCreateConn.call_count, 1)
|
self.assertEqual(self.MockCreateConn.call_count, 1)
|
||||||
self.conn._sock.sendall.assert_called_with(self.config['payload'])
|
self.conn._sock.sendall.assert_called_with(self.config['payload'])
|
||||||
|
|
||||||
|
|
||||||
|
class TestKafkaConnection(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
# kafka.conn debug logging is verbose, so only enable in conn tests
|
||||||
|
logging.getLogger('kafka.conn').setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
# Return connection logging to INFO
|
||||||
|
logging.getLogger('kafka.conn').setLevel(logging.INFO)
|
||||||
|
|
||||||
|
@mock.patch('socket.create_connection')
|
||||||
|
def test_copy(self, socket):
|
||||||
|
"""KafkaConnection copies work as expected"""
|
||||||
|
|
||||||
|
conn = KafkaConnection('kafka', 9092)
|
||||||
|
self.assertEqual(socket.call_count, 1)
|
||||||
|
|
||||||
|
copy = conn.copy()
|
||||||
|
self.assertEqual(socket.call_count, 1)
|
||||||
|
self.assertEqual(copy.host, 'kafka')
|
||||||
|
self.assertEqual(copy.port, 9092)
|
||||||
|
self.assertEqual(copy._sock, None)
|
||||||
|
|
||||||
|
copy.reinit()
|
||||||
|
self.assertEqual(socket.call_count, 2)
|
||||||
|
self.assertNotEqual(copy._sock, None)
|
||||||
|
|
||||||
|
@mock.patch('socket.create_connection')
|
||||||
|
def test_copy_thread(self, socket):
|
||||||
|
"""KafkaConnection copies work in other threads"""
|
||||||
|
|
||||||
|
err = []
|
||||||
|
copy = KafkaConnection('kafka', 9092).copy()
|
||||||
|
|
||||||
|
def thread_func(err, copy):
|
||||||
|
try:
|
||||||
|
self.assertEqual(copy.host, 'kafka')
|
||||||
|
self.assertEqual(copy.port, 9092)
|
||||||
|
self.assertNotEqual(copy._sock, None)
|
||||||
|
except Exception as e:
|
||||||
|
err.append(e)
|
||||||
|
else:
|
||||||
|
err.append(None)
|
||||||
|
thread = Thread(target=thread_func, args=(err, copy))
|
||||||
|
thread.start()
|
||||||
|
thread.join()
|
||||||
|
|
||||||
|
self.assertEqual(err, [None])
|
||||||
|
self.assertEqual(socket.call_count, 2)
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
|
|
||||||
from mock import MagicMock
|
from mock import MagicMock, patch
|
||||||
from . import unittest
|
from . import unittest
|
||||||
|
|
||||||
from kafka import SimpleConsumer, KafkaConsumer
|
from kafka import SimpleConsumer, KafkaConsumer, MultiProcessConsumer
|
||||||
from kafka.common import KafkaConfigurationError
|
from kafka.common import (
|
||||||
|
KafkaConfigurationError, FetchResponse, OffsetFetchResponse,
|
||||||
|
FailedPayloadsError, OffsetAndMessage,
|
||||||
|
NotLeaderForPartitionError, UnknownTopicOrPartitionError
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestKafkaConsumer(unittest.TestCase):
|
class TestKafkaConsumer(unittest.TestCase):
|
||||||
def test_non_integer_partitions(self):
|
def test_non_integer_partitions(self):
|
||||||
@@ -13,3 +18,120 @@ class TestKafkaConsumer(unittest.TestCase):
|
|||||||
def test_broker_list_required(self):
|
def test_broker_list_required(self):
|
||||||
with self.assertRaises(KafkaConfigurationError):
|
with self.assertRaises(KafkaConfigurationError):
|
||||||
KafkaConsumer()
|
KafkaConsumer()
|
||||||
|
|
||||||
|
|
||||||
|
class TestMultiProcessConsumer(unittest.TestCase):
|
||||||
|
def test_partition_list(self):
|
||||||
|
client = MagicMock()
|
||||||
|
partitions = (0,)
|
||||||
|
with patch.object(MultiProcessConsumer, 'fetch_last_known_offsets') as fetch_last_known_offsets:
|
||||||
|
MultiProcessConsumer(client, 'testing-group', 'testing-topic', partitions=partitions)
|
||||||
|
self.assertEqual(fetch_last_known_offsets.call_args[0], (partitions,) )
|
||||||
|
self.assertEqual(client.get_partition_ids_for_topic.call_count, 0) # pylint: disable=no-member
|
||||||
|
|
||||||
|
class TestSimpleConsumer(unittest.TestCase):
|
||||||
|
def test_simple_consumer_failed_payloads(self):
|
||||||
|
client = MagicMock()
|
||||||
|
consumer = SimpleConsumer(client, group=None,
|
||||||
|
topic='topic', partitions=[0, 1],
|
||||||
|
auto_commit=False)
|
||||||
|
|
||||||
|
def failed_payloads(payload):
|
||||||
|
return FailedPayloadsError(payload)
|
||||||
|
|
||||||
|
client.send_fetch_request.side_effect = self.fail_requests_factory(failed_payloads)
|
||||||
|
|
||||||
|
# This should not raise an exception
|
||||||
|
consumer.get_messages(5)
|
||||||
|
|
||||||
|
def test_simple_consumer_leader_change(self):
|
||||||
|
client = MagicMock()
|
||||||
|
consumer = SimpleConsumer(client, group=None,
|
||||||
|
topic='topic', partitions=[0, 1],
|
||||||
|
auto_commit=False)
|
||||||
|
|
||||||
|
# Mock so that only the first request gets a valid response
|
||||||
|
def not_leader(request):
|
||||||
|
return FetchResponse(request.topic, request.partition,
|
||||||
|
NotLeaderForPartitionError.errno, -1, ())
|
||||||
|
|
||||||
|
client.send_fetch_request.side_effect = self.fail_requests_factory(not_leader)
|
||||||
|
|
||||||
|
# This should not raise an exception
|
||||||
|
consumer.get_messages(20)
|
||||||
|
|
||||||
|
# client should have updated metadata
|
||||||
|
self.assertGreaterEqual(client.reset_topic_metadata.call_count, 1)
|
||||||
|
self.assertGreaterEqual(client.load_metadata_for_topics.call_count, 1)
|
||||||
|
|
||||||
|
def test_simple_consumer_unknown_topic_partition(self):
|
||||||
|
client = MagicMock()
|
||||||
|
consumer = SimpleConsumer(client, group=None,
|
||||||
|
topic='topic', partitions=[0, 1],
|
||||||
|
auto_commit=False)
|
||||||
|
|
||||||
|
# Mock so that only the first request gets a valid response
|
||||||
|
def unknown_topic_partition(request):
|
||||||
|
return FetchResponse(request.topic, request.partition,
|
||||||
|
UnknownTopicOrPartitionError.errno, -1, ())
|
||||||
|
|
||||||
|
client.send_fetch_request.side_effect = self.fail_requests_factory(unknown_topic_partition)
|
||||||
|
|
||||||
|
# This should not raise an exception
|
||||||
|
with self.assertRaises(UnknownTopicOrPartitionError):
|
||||||
|
consumer.get_messages(20)
|
||||||
|
|
||||||
|
def test_simple_consumer_commit_does_not_raise(self):
|
||||||
|
client = MagicMock()
|
||||||
|
client.get_partition_ids_for_topic.return_value = [0, 1]
|
||||||
|
|
||||||
|
def mock_offset_fetch_request(group, payloads, **kwargs):
|
||||||
|
return [OffsetFetchResponse(p.topic, p.partition, 0, b'', 0) for p in payloads]
|
||||||
|
|
||||||
|
client.send_offset_fetch_request.side_effect = mock_offset_fetch_request
|
||||||
|
|
||||||
|
def mock_offset_commit_request(group, payloads, **kwargs):
|
||||||
|
raise FailedPayloadsError(payloads[0])
|
||||||
|
|
||||||
|
client.send_offset_commit_request.side_effect = mock_offset_commit_request
|
||||||
|
|
||||||
|
consumer = SimpleConsumer(client, group='foobar',
|
||||||
|
topic='topic', partitions=[0, 1],
|
||||||
|
auto_commit=False)
|
||||||
|
|
||||||
|
# Mock internal commit check
|
||||||
|
consumer.count_since_commit = 10
|
||||||
|
|
||||||
|
# This should not raise an exception
|
||||||
|
self.assertFalse(consumer.commit(partitions=[0, 1]))
|
||||||
|
|
||||||
|
def test_simple_consumer_reset_partition_offset(self):
|
||||||
|
client = MagicMock()
|
||||||
|
|
||||||
|
def mock_offset_request(payloads, **kwargs):
|
||||||
|
raise FailedPayloadsError(payloads[0])
|
||||||
|
|
||||||
|
client.send_offset_request.side_effect = mock_offset_request
|
||||||
|
|
||||||
|
consumer = SimpleConsumer(client, group='foobar',
|
||||||
|
topic='topic', partitions=[0, 1],
|
||||||
|
auto_commit=False)
|
||||||
|
|
||||||
|
# This should not raise an exception
|
||||||
|
self.assertEqual(consumer.reset_partition_offset(0), None)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def fail_requests_factory(error_factory):
|
||||||
|
# Mock so that only the first request gets a valid response
|
||||||
|
def fail_requests(payloads, **kwargs):
|
||||||
|
responses = [
|
||||||
|
FetchResponse(payloads[0].topic, payloads[0].partition, 0, 0,
|
||||||
|
(OffsetAndMessage(
|
||||||
|
payloads[0].offset + i,
|
||||||
|
"msg %d" % (payloads[0].offset + i))
|
||||||
|
for i in range(10))),
|
||||||
|
]
|
||||||
|
for failure in payloads[1:]:
|
||||||
|
responses.append(error_factory(failure))
|
||||||
|
return responses
|
||||||
|
return fail_requests
|
||||||
|
|||||||
@@ -3,9 +3,12 @@ import os
|
|||||||
|
|
||||||
from six.moves import xrange
|
from six.moves import xrange
|
||||||
|
|
||||||
from kafka import SimpleConsumer, MultiProcessConsumer, KafkaConsumer, create_message
|
from kafka import (
|
||||||
|
KafkaConsumer, MultiProcessConsumer, SimpleConsumer, create_message
|
||||||
|
)
|
||||||
from kafka.common import (
|
from kafka.common import (
|
||||||
ProduceRequest, ConsumerFetchSizeTooSmall, ConsumerTimeout
|
ProduceRequest, ConsumerFetchSizeTooSmall, ConsumerTimeout,
|
||||||
|
OffsetOutOfRangeError
|
||||||
)
|
)
|
||||||
from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES
|
from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES
|
||||||
|
|
||||||
@@ -14,6 +17,7 @@ from test.testutil import (
|
|||||||
KafkaIntegrationTestCase, kafka_versions, random_string, Timer
|
KafkaIntegrationTestCase, kafka_versions, random_string, Timer
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestConsumerIntegration(KafkaIntegrationTestCase):
|
class TestConsumerIntegration(KafkaIntegrationTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
@@ -37,7 +41,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
def send_messages(self, partition, messages):
|
def send_messages(self, partition, messages):
|
||||||
messages = [ create_message(self.msg(str(msg))) for msg in messages ]
|
messages = [ create_message(self.msg(str(msg))) for msg in messages ]
|
||||||
produce = ProduceRequest(self.topic, partition, messages = messages)
|
produce = ProduceRequest(self.bytes_topic, partition, messages = messages)
|
||||||
resp, = self.client.send_produce_request([produce])
|
resp, = self.client.send_produce_request([produce])
|
||||||
self.assertEqual(resp.error, 0)
|
self.assertEqual(resp.error, 0)
|
||||||
|
|
||||||
@@ -53,6 +57,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
def consumer(self, **kwargs):
|
def consumer(self, **kwargs):
|
||||||
if os.environ['KAFKA_VERSION'] == "0.8.0":
|
if os.environ['KAFKA_VERSION'] == "0.8.0":
|
||||||
# Kafka 0.8.0 simply doesn't support offset requests, so hard code it being off
|
# Kafka 0.8.0 simply doesn't support offset requests, so hard code it being off
|
||||||
|
kwargs['group'] = None
|
||||||
kwargs['auto_commit'] = False
|
kwargs['auto_commit'] = False
|
||||||
else:
|
else:
|
||||||
kwargs.setdefault('auto_commit', True)
|
kwargs.setdefault('auto_commit', True)
|
||||||
@@ -61,7 +66,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
group = kwargs.pop('group', self.id().encode('utf-8'))
|
group = kwargs.pop('group', self.id().encode('utf-8'))
|
||||||
topic = kwargs.pop('topic', self.topic)
|
topic = kwargs.pop('topic', self.topic)
|
||||||
|
|
||||||
if consumer_class == SimpleConsumer:
|
if consumer_class in [SimpleConsumer, MultiProcessConsumer]:
|
||||||
kwargs.setdefault('iter_timeout', 0)
|
kwargs.setdefault('iter_timeout', 0)
|
||||||
|
|
||||||
return consumer_class(self.client, group, topic, **kwargs)
|
return consumer_class(self.client, group, topic, **kwargs)
|
||||||
@@ -69,7 +74,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
def kafka_consumer(self, **configs):
|
def kafka_consumer(self, **configs):
|
||||||
brokers = '%s:%d' % (self.server.host, self.server.port)
|
brokers = '%s:%d' % (self.server.host, self.server.port)
|
||||||
consumer = KafkaConsumer(self.topic,
|
consumer = KafkaConsumer(self.topic,
|
||||||
metadata_broker_list=brokers,
|
bootstrap_servers=brokers,
|
||||||
**configs)
|
**configs)
|
||||||
return consumer
|
return consumer
|
||||||
|
|
||||||
@@ -85,6 +90,65 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
consumer.stop()
|
consumer.stop()
|
||||||
|
|
||||||
|
@kafka_versions('all')
|
||||||
|
def test_simple_consumer_smallest_offset_reset(self):
|
||||||
|
self.send_messages(0, range(0, 100))
|
||||||
|
self.send_messages(1, range(100, 200))
|
||||||
|
|
||||||
|
consumer = self.consumer(auto_offset_reset='smallest')
|
||||||
|
# Move fetch offset ahead of 300 message (out of range)
|
||||||
|
consumer.seek(300, 2)
|
||||||
|
# Since auto_offset_reset is set to smallest we should read all 200
|
||||||
|
# messages from beginning.
|
||||||
|
self.assert_message_count([message for message in consumer], 200)
|
||||||
|
|
||||||
|
@kafka_versions('all')
|
||||||
|
def test_simple_consumer_largest_offset_reset(self):
|
||||||
|
self.send_messages(0, range(0, 100))
|
||||||
|
self.send_messages(1, range(100, 200))
|
||||||
|
|
||||||
|
# Default largest
|
||||||
|
consumer = self.consumer()
|
||||||
|
# Move fetch offset ahead of 300 message (out of range)
|
||||||
|
consumer.seek(300, 2)
|
||||||
|
# Since auto_offset_reset is set to largest we should not read any
|
||||||
|
# messages.
|
||||||
|
self.assert_message_count([message for message in consumer], 0)
|
||||||
|
# Send 200 new messages to the queue
|
||||||
|
self.send_messages(0, range(200, 300))
|
||||||
|
self.send_messages(1, range(300, 400))
|
||||||
|
# Since the offset is set to largest we should read all the new messages.
|
||||||
|
self.assert_message_count([message for message in consumer], 200)
|
||||||
|
|
||||||
|
@kafka_versions('all')
|
||||||
|
def test_simple_consumer_no_reset(self):
|
||||||
|
self.send_messages(0, range(0, 100))
|
||||||
|
self.send_messages(1, range(100, 200))
|
||||||
|
|
||||||
|
# Default largest
|
||||||
|
consumer = self.consumer(auto_offset_reset=None)
|
||||||
|
# Move fetch offset ahead of 300 message (out of range)
|
||||||
|
consumer.seek(300, 2)
|
||||||
|
with self.assertRaises(OffsetOutOfRangeError):
|
||||||
|
consumer.get_message()
|
||||||
|
|
||||||
|
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
|
||||||
|
def test_simple_consumer_load_initial_offsets(self):
|
||||||
|
self.send_messages(0, range(0, 100))
|
||||||
|
self.send_messages(1, range(100, 200))
|
||||||
|
|
||||||
|
# Create 1st consumer and change offsets
|
||||||
|
consumer = self.consumer()
|
||||||
|
self.assertEqual(consumer.offsets, {0: 0, 1: 0})
|
||||||
|
consumer.offsets.update({0:51, 1:101})
|
||||||
|
# Update counter after manual offsets update
|
||||||
|
consumer.count_since_commit += 1
|
||||||
|
consumer.commit()
|
||||||
|
|
||||||
|
# Create 2nd consumer and check initial offsets
|
||||||
|
consumer = self.consumer(auto_commit=False)
|
||||||
|
self.assertEqual(consumer.offsets, {0: 51, 1: 101})
|
||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_simple_consumer__seek(self):
|
def test_simple_consumer__seek(self):
|
||||||
self.send_messages(0, range(0, 100))
|
self.send_messages(0, range(0, 100))
|
||||||
@@ -100,17 +164,31 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
consumer.seek(-13, 2)
|
consumer.seek(-13, 2)
|
||||||
self.assert_message_count([ message for message in consumer ], 13)
|
self.assert_message_count([ message for message in consumer ], 13)
|
||||||
|
|
||||||
|
# Set absolute offset
|
||||||
|
consumer.seek(100)
|
||||||
|
self.assert_message_count([ message for message in consumer ], 0)
|
||||||
|
consumer.seek(100, partition=0)
|
||||||
|
self.assert_message_count([ message for message in consumer ], 0)
|
||||||
|
consumer.seek(101, partition=1)
|
||||||
|
self.assert_message_count([ message for message in consumer ], 0)
|
||||||
|
consumer.seek(90, partition=0)
|
||||||
|
self.assert_message_count([ message for message in consumer ], 10)
|
||||||
|
consumer.seek(20, partition=1)
|
||||||
|
self.assert_message_count([ message for message in consumer ], 80)
|
||||||
|
consumer.seek(0, partition=1)
|
||||||
|
self.assert_message_count([ message for message in consumer ], 100)
|
||||||
|
|
||||||
consumer.stop()
|
consumer.stop()
|
||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_simple_consumer_blocking(self):
|
def test_simple_consumer_blocking(self):
|
||||||
consumer = self.consumer()
|
consumer = self.consumer()
|
||||||
|
|
||||||
# Ask for 5 messages, nothing in queue, block 5 seconds
|
# Ask for 5 messages, nothing in queue, block 1 second
|
||||||
with Timer() as t:
|
with Timer() as t:
|
||||||
messages = consumer.get_messages(block=True, timeout=5)
|
messages = consumer.get_messages(block=True, timeout=1)
|
||||||
self.assert_message_count(messages, 0)
|
self.assert_message_count(messages, 0)
|
||||||
self.assertGreaterEqual(t.interval, 5)
|
self.assertGreaterEqual(t.interval, 1)
|
||||||
|
|
||||||
self.send_messages(0, range(0, 10))
|
self.send_messages(0, range(0, 10))
|
||||||
|
|
||||||
@@ -120,11 +198,19 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
self.assert_message_count(messages, 5)
|
self.assert_message_count(messages, 5)
|
||||||
self.assertLessEqual(t.interval, 1)
|
self.assertLessEqual(t.interval, 1)
|
||||||
|
|
||||||
# Ask for 10 messages, get 5 back, block 5 seconds
|
# Ask for 10 messages, get 5 back, block 1 second
|
||||||
with Timer() as t:
|
with Timer() as t:
|
||||||
messages = consumer.get_messages(count=10, block=True, timeout=5)
|
messages = consumer.get_messages(count=10, block=True, timeout=1)
|
||||||
self.assert_message_count(messages, 5)
|
self.assert_message_count(messages, 5)
|
||||||
self.assertGreaterEqual(t.interval, 5)
|
self.assertGreaterEqual(t.interval, 1)
|
||||||
|
|
||||||
|
# Ask for 10 messages, 5 in queue, ask to block for 1 message or 1
|
||||||
|
# second, get 5 back, no blocking
|
||||||
|
self.send_messages(0, range(0, 5))
|
||||||
|
with Timer() as t:
|
||||||
|
messages = consumer.get_messages(count=10, block=1, timeout=1)
|
||||||
|
self.assert_message_count(messages, 5)
|
||||||
|
self.assertLessEqual(t.interval, 1)
|
||||||
|
|
||||||
consumer.stop()
|
consumer.stop()
|
||||||
|
|
||||||
@@ -172,12 +258,12 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
def test_multi_process_consumer_blocking(self):
|
def test_multi_process_consumer_blocking(self):
|
||||||
consumer = self.consumer(consumer = MultiProcessConsumer)
|
consumer = self.consumer(consumer = MultiProcessConsumer)
|
||||||
|
|
||||||
# Ask for 5 messages, No messages in queue, block 5 seconds
|
# Ask for 5 messages, No messages in queue, block 1 second
|
||||||
with Timer() as t:
|
with Timer() as t:
|
||||||
messages = consumer.get_messages(block=True, timeout=5)
|
messages = consumer.get_messages(block=True, timeout=1)
|
||||||
self.assert_message_count(messages, 0)
|
self.assert_message_count(messages, 0)
|
||||||
|
|
||||||
self.assertGreaterEqual(t.interval, 5)
|
self.assertGreaterEqual(t.interval, 1)
|
||||||
|
|
||||||
# Send 10 messages
|
# Send 10 messages
|
||||||
self.send_messages(0, range(0, 10))
|
self.send_messages(0, range(0, 10))
|
||||||
@@ -188,11 +274,21 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
self.assert_message_count(messages, 5)
|
self.assert_message_count(messages, 5)
|
||||||
self.assertLessEqual(t.interval, 1)
|
self.assertLessEqual(t.interval, 1)
|
||||||
|
|
||||||
# Ask for 10 messages, 5 in queue, block 5 seconds
|
# Ask for 10 messages, 5 in queue, block 1 second
|
||||||
with Timer() as t:
|
with Timer() as t:
|
||||||
messages = consumer.get_messages(count=10, block=True, timeout=5)
|
messages = consumer.get_messages(count=10, block=True, timeout=1)
|
||||||
self.assert_message_count(messages, 5)
|
self.assert_message_count(messages, 5)
|
||||||
self.assertGreaterEqual(t.interval, 4.95)
|
self.assertGreaterEqual(t.interval, 1)
|
||||||
|
|
||||||
|
# Ask for 10 messages, 5 in queue, ask to block for 1 message or 1
|
||||||
|
# second, get at least one back, no blocking
|
||||||
|
self.send_messages(0, range(0, 5))
|
||||||
|
with Timer() as t:
|
||||||
|
messages = consumer.get_messages(count=10, block=1, timeout=1)
|
||||||
|
received_message_count = len(messages)
|
||||||
|
self.assertGreaterEqual(received_message_count, 1)
|
||||||
|
self.assert_message_count(messages, received_message_count)
|
||||||
|
self.assertLessEqual(t.interval, 1)
|
||||||
|
|
||||||
consumer.stop()
|
consumer.stop()
|
||||||
|
|
||||||
@@ -201,7 +297,10 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
self.send_messages(0, range(0, 10))
|
self.send_messages(0, range(0, 10))
|
||||||
self.send_messages(1, range(10, 20))
|
self.send_messages(1, range(10, 20))
|
||||||
|
|
||||||
consumer = MultiProcessConsumer(self.client, "group1", self.topic, auto_commit=False)
|
# set group to None and auto_commit to False to avoid interactions w/
|
||||||
|
# offset commit/fetch apis
|
||||||
|
consumer = MultiProcessConsumer(self.client, None, self.topic,
|
||||||
|
auto_commit=False, iter_timeout=0)
|
||||||
|
|
||||||
self.assertEqual(consumer.pending(), 20)
|
self.assertEqual(consumer.pending(), 20)
|
||||||
self.assertEqual(consumer.pending(partitions=[0]), 10)
|
self.assertEqual(consumer.pending(partitions=[0]), 10)
|
||||||
@@ -209,6 +308,24 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
consumer.stop()
|
consumer.stop()
|
||||||
|
|
||||||
|
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
|
||||||
|
def test_multi_process_consumer_load_initial_offsets(self):
|
||||||
|
self.send_messages(0, range(0, 10))
|
||||||
|
self.send_messages(1, range(10, 20))
|
||||||
|
|
||||||
|
# Create 1st consumer and change offsets
|
||||||
|
consumer = self.consumer()
|
||||||
|
self.assertEqual(consumer.offsets, {0: 0, 1: 0})
|
||||||
|
consumer.offsets.update({0:5, 1:15})
|
||||||
|
# Update counter after manual offsets update
|
||||||
|
consumer.count_since_commit += 1
|
||||||
|
consumer.commit()
|
||||||
|
|
||||||
|
# Create 2nd consumer and check initial offsets
|
||||||
|
consumer = self.consumer(consumer = MultiProcessConsumer,
|
||||||
|
auto_commit=False)
|
||||||
|
self.assertEqual(consumer.offsets, {0: 5, 1: 15})
|
||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_large_messages(self):
|
def test_large_messages(self):
|
||||||
# Produce 10 "normal" size messages
|
# Produce 10 "normal" size messages
|
||||||
@@ -257,7 +374,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
big_consumer.stop()
|
big_consumer.stop()
|
||||||
|
|
||||||
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.0")
|
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
|
||||||
def test_offset_behavior__resuming_behavior(self):
|
def test_offset_behavior__resuming_behavior(self):
|
||||||
self.send_messages(0, range(0, 100))
|
self.send_messages(0, range(0, 100))
|
||||||
self.send_messages(1, range(100, 200))
|
self.send_messages(1, range(100, 200))
|
||||||
@@ -284,6 +401,41 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
consumer1.stop()
|
consumer1.stop()
|
||||||
consumer2.stop()
|
consumer2.stop()
|
||||||
|
|
||||||
|
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
|
||||||
|
def test_multi_process_offset_behavior__resuming_behavior(self):
|
||||||
|
self.send_messages(0, range(0, 100))
|
||||||
|
self.send_messages(1, range(100, 200))
|
||||||
|
|
||||||
|
# Start a consumer
|
||||||
|
consumer1 = self.consumer(
|
||||||
|
consumer=MultiProcessConsumer,
|
||||||
|
auto_commit_every_t = None,
|
||||||
|
auto_commit_every_n = 20,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Grab the first 195 messages
|
||||||
|
output_msgs1 = []
|
||||||
|
idx = 0
|
||||||
|
for message in consumer1:
|
||||||
|
output_msgs1.append(message.message.value)
|
||||||
|
idx += 1
|
||||||
|
if idx >= 195:
|
||||||
|
break
|
||||||
|
self.assert_message_count(output_msgs1, 195)
|
||||||
|
|
||||||
|
# The total offset across both partitions should be at 180
|
||||||
|
consumer2 = self.consumer(
|
||||||
|
consumer=MultiProcessConsumer,
|
||||||
|
auto_commit_every_t = None,
|
||||||
|
auto_commit_every_n = 20,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 181-200
|
||||||
|
self.assert_message_count([ message for message in consumer2 ], 20)
|
||||||
|
|
||||||
|
consumer1.stop()
|
||||||
|
consumer2.stop()
|
||||||
|
|
||||||
# TODO: Make this a unit test -- should not require integration
|
# TODO: Make this a unit test -- should not require integration
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_fetch_buffer_size(self):
|
def test_fetch_buffer_size(self):
|
||||||
@@ -330,7 +482,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
consumer = self.kafka_consumer(auto_offset_reset='smallest',
|
consumer = self.kafka_consumer(auto_offset_reset='smallest',
|
||||||
consumer_timeout_ms=TIMEOUT_MS)
|
consumer_timeout_ms=TIMEOUT_MS)
|
||||||
|
|
||||||
# Ask for 5 messages, nothing in queue, block 5 seconds
|
# Ask for 5 messages, nothing in queue, block 500ms
|
||||||
with Timer() as t:
|
with Timer() as t:
|
||||||
with self.assertRaises(ConsumerTimeout):
|
with self.assertRaises(ConsumerTimeout):
|
||||||
msg = consumer.next()
|
msg = consumer.next()
|
||||||
@@ -347,7 +499,7 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
self.assertEqual(len(messages), 5)
|
self.assertEqual(len(messages), 5)
|
||||||
self.assertLess(t.interval, TIMEOUT_MS / 1000.0 )
|
self.assertLess(t.interval, TIMEOUT_MS / 1000.0 )
|
||||||
|
|
||||||
# Ask for 10 messages, get 5 back, block 5 seconds
|
# Ask for 10 messages, get 5 back, block 500ms
|
||||||
messages = set()
|
messages = set()
|
||||||
with Timer() as t:
|
with Timer() as t:
|
||||||
with self.assertRaises(ConsumerTimeout):
|
with self.assertRaises(ConsumerTimeout):
|
||||||
@@ -357,9 +509,9 @@ class TestConsumerIntegration(KafkaIntegrationTestCase):
|
|||||||
self.assertEqual(len(messages), 5)
|
self.assertEqual(len(messages), 5)
|
||||||
self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 )
|
self.assertGreaterEqual(t.interval, TIMEOUT_MS / 1000.0 )
|
||||||
|
|
||||||
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.0")
|
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1")
|
||||||
def test_kafka_consumer__offset_commit_resume(self):
|
def test_kafka_consumer__offset_commit_resume(self):
|
||||||
GROUP_ID = random_string(10)
|
GROUP_ID = random_string(10).encode('utf-8')
|
||||||
|
|
||||||
self.send_messages(0, range(0, 100))
|
self.send_messages(0, range(0, 100))
|
||||||
self.send_messages(1, range(100, 200))
|
self.send_messages(1, range(100, 200))
|
||||||
|
|||||||
@@ -2,11 +2,10 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from . import unittest
|
from kafka import KafkaClient, SimpleConsumer, KeyedProducer
|
||||||
|
|
||||||
from kafka import KafkaClient, SimpleConsumer
|
|
||||||
from kafka.common import TopicAndPartition, FailedPayloadsError, ConnectionError
|
from kafka.common import TopicAndPartition, FailedPayloadsError, ConnectionError
|
||||||
from kafka.producer.base import Producer
|
from kafka.producer.base import Producer
|
||||||
|
from kafka.util import kafka_bytestring
|
||||||
|
|
||||||
from test.fixtures import ZookeeperFixture, KafkaFixture
|
from test.fixtures import ZookeeperFixture, KafkaFixture
|
||||||
from test.testutil import (
|
from test.testutil import (
|
||||||
@@ -14,46 +13,56 @@ from test.testutil import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class TestFailover(KafkaIntegrationTestCase):
|
class TestFailover(KafkaIntegrationTestCase):
|
||||||
create_client = False
|
create_client = False
|
||||||
|
|
||||||
@classmethod
|
def setUp(self):
|
||||||
def setUpClass(cls): # noqa
|
|
||||||
if not os.environ.get('KAFKA_VERSION'):
|
if not os.environ.get('KAFKA_VERSION'):
|
||||||
return
|
return
|
||||||
|
|
||||||
zk_chroot = random_string(10)
|
zk_chroot = random_string(10)
|
||||||
replicas = 2
|
replicas = 3
|
||||||
partitions = 2
|
partitions = 3
|
||||||
|
|
||||||
# mini zookeeper, 2 kafka brokers
|
# mini zookeeper, 3 kafka brokers
|
||||||
cls.zk = ZookeeperFixture.instance()
|
self.zk = ZookeeperFixture.instance()
|
||||||
kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
|
kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions]
|
||||||
cls.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]
|
self.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]
|
||||||
|
|
||||||
hosts = ['%s:%d' % (b.host, b.port) for b in cls.brokers]
|
hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
|
||||||
cls.client = KafkaClient(hosts)
|
self.client = KafkaClient(hosts)
|
||||||
|
super(TestFailover, self).setUp()
|
||||||
|
|
||||||
@classmethod
|
def tearDown(self):
|
||||||
def tearDownClass(cls):
|
super(TestFailover, self).tearDown()
|
||||||
if not os.environ.get('KAFKA_VERSION'):
|
if not os.environ.get('KAFKA_VERSION'):
|
||||||
return
|
return
|
||||||
|
|
||||||
cls.client.close()
|
self.client.close()
|
||||||
for broker in cls.brokers:
|
for broker in self.brokers:
|
||||||
broker.close()
|
broker.close()
|
||||||
cls.zk.close()
|
self.zk.close()
|
||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_switch_leader(self):
|
def test_switch_leader(self):
|
||||||
topic = self.topic
|
topic = self.topic
|
||||||
partition = 0
|
partition = 0
|
||||||
|
|
||||||
# Test the base class Producer -- send_messages to a specific partition
|
# Testing the base Producer class here so that we can easily send
|
||||||
|
# messages to a specific partition, kill the leader for that partition
|
||||||
|
# and check that after another broker takes leadership the producer
|
||||||
|
# is able to resume sending messages
|
||||||
|
|
||||||
|
# require that the server commit messages to all in-sync replicas
|
||||||
|
# so that failover doesn't lose any messages on server-side
|
||||||
|
# and we can assert that server-side message count equals client-side
|
||||||
producer = Producer(self.client, async=False,
|
producer = Producer(self.client, async=False,
|
||||||
req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT)
|
req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT)
|
||||||
|
|
||||||
# Send 10 random messages
|
# Send 100 random messages to a specific partition
|
||||||
self._send_random_messages(producer, topic, partition, 100)
|
self._send_random_messages(producer, topic, partition, 100)
|
||||||
|
|
||||||
# kill leader for partition
|
# kill leader for partition
|
||||||
@@ -65,12 +74,12 @@ class TestFailover(KafkaIntegrationTestCase):
|
|||||||
timeout = 60
|
timeout = 60
|
||||||
while not recovered and (time.time() - started) < timeout:
|
while not recovered and (time.time() - started) < timeout:
|
||||||
try:
|
try:
|
||||||
logging.debug("attempting to send 'success' message after leader killed")
|
log.debug("attempting to send 'success' message after leader killed")
|
||||||
producer.send_messages(topic, partition, b'success')
|
producer.send_messages(topic, partition, b'success')
|
||||||
logging.debug("success!")
|
log.debug("success!")
|
||||||
recovered = True
|
recovered = True
|
||||||
except (FailedPayloadsError, ConnectionError):
|
except (FailedPayloadsError, ConnectionError):
|
||||||
logging.debug("caught exception sending message -- will retry")
|
log.debug("caught exception sending message -- will retry")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Verify we successfully sent the message
|
# Verify we successfully sent the message
|
||||||
@@ -80,63 +89,132 @@ class TestFailover(KafkaIntegrationTestCase):
|
|||||||
self._send_random_messages(producer, topic, partition, 100)
|
self._send_random_messages(producer, topic, partition, 100)
|
||||||
|
|
||||||
# count number of messages
|
# count number of messages
|
||||||
# Should be equal to 10 before + 1 recovery + 10 after
|
# Should be equal to 100 before + 1 recovery + 100 after
|
||||||
self.assert_message_count(topic, 201, partitions=(partition,))
|
# at_least=True because exactly once delivery isn't really a thing
|
||||||
|
self.assert_message_count(topic, 201, partitions=(partition,),
|
||||||
|
at_least=True)
|
||||||
|
|
||||||
|
@kafka_versions("all")
|
||||||
#@kafka_versions("all")
|
|
||||||
@unittest.skip("async producer does not support reliable failover yet")
|
|
||||||
def test_switch_leader_async(self):
|
def test_switch_leader_async(self):
|
||||||
topic = self.topic
|
topic = self.topic
|
||||||
partition = 0
|
partition = 0
|
||||||
|
|
||||||
# Test the base class Producer -- send_messages to a specific partition
|
# Test the base class Producer -- send_messages to a specific partition
|
||||||
producer = Producer(self.client, async=True)
|
producer = Producer(self.client, async=True,
|
||||||
|
batch_send_every_n=15,
|
||||||
|
batch_send_every_t=3,
|
||||||
|
req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT,
|
||||||
|
async_log_messages_on_error=False)
|
||||||
|
|
||||||
# Send 10 random messages
|
# Send 10 random messages
|
||||||
self._send_random_messages(producer, topic, partition, 10)
|
self._send_random_messages(producer, topic, partition, 10)
|
||||||
|
self._send_random_messages(producer, topic, partition + 1, 10)
|
||||||
|
|
||||||
# kill leader for partition
|
# kill leader for partition
|
||||||
self._kill_leader(topic, partition)
|
self._kill_leader(topic, partition)
|
||||||
|
|
||||||
logging.debug("attempting to send 'success' message after leader killed")
|
log.debug("attempting to send 'success' message after leader killed")
|
||||||
|
|
||||||
# in async mode, this should return immediately
|
# in async mode, this should return immediately
|
||||||
producer.send_messages(topic, partition, 'success')
|
producer.send_messages(topic, partition, b'success')
|
||||||
|
producer.send_messages(topic, partition + 1, b'success')
|
||||||
|
|
||||||
# send to new leader
|
# send to new leader
|
||||||
self._send_random_messages(producer, topic, partition, 10)
|
self._send_random_messages(producer, topic, partition, 10)
|
||||||
|
self._send_random_messages(producer, topic, partition + 1, 10)
|
||||||
|
|
||||||
# wait until producer queue is empty
|
# Stop the producer and wait for it to shutdown
|
||||||
while not producer.queue.empty():
|
|
||||||
time.sleep(0.1)
|
|
||||||
producer.stop()
|
producer.stop()
|
||||||
|
started = time.time()
|
||||||
|
timeout = 60
|
||||||
|
while (time.time() - started) < timeout:
|
||||||
|
if not producer.thread.is_alive():
|
||||||
|
break
|
||||||
|
time.sleep(0.1)
|
||||||
|
else:
|
||||||
|
self.fail('timeout waiting for producer queue to empty')
|
||||||
|
|
||||||
# count number of messages
|
# count number of messages
|
||||||
# Should be equal to 10 before + 1 recovery + 10 after
|
# Should be equal to 10 before + 1 recovery + 10 after
|
||||||
self.assert_message_count(topic, 21, partitions=(partition,))
|
# at_least=True because exactly once delivery isn't really a thing
|
||||||
|
self.assert_message_count(topic, 21, partitions=(partition,),
|
||||||
|
at_least=True)
|
||||||
|
self.assert_message_count(topic, 21, partitions=(partition + 1,),
|
||||||
|
at_least=True)
|
||||||
|
|
||||||
|
@kafka_versions("all")
|
||||||
|
def test_switch_leader_keyed_producer(self):
|
||||||
|
topic = self.topic
|
||||||
|
|
||||||
|
producer = KeyedProducer(self.client, async=False)
|
||||||
|
|
||||||
|
# Send 10 random messages
|
||||||
|
for _ in range(10):
|
||||||
|
key = random_string(3).encode('utf-8')
|
||||||
|
msg = random_string(10).encode('utf-8')
|
||||||
|
producer.send_messages(topic, key, msg)
|
||||||
|
|
||||||
|
# kill leader for partition 0
|
||||||
|
self._kill_leader(topic, 0)
|
||||||
|
|
||||||
|
recovered = False
|
||||||
|
started = time.time()
|
||||||
|
timeout = 60
|
||||||
|
while not recovered and (time.time() - started) < timeout:
|
||||||
|
try:
|
||||||
|
key = random_string(3).encode('utf-8')
|
||||||
|
msg = random_string(10).encode('utf-8')
|
||||||
|
producer.send_messages(topic, key, msg)
|
||||||
|
if producer.partitioners[kafka_bytestring(topic)].partition(key) == 0:
|
||||||
|
recovered = True
|
||||||
|
except (FailedPayloadsError, ConnectionError):
|
||||||
|
log.debug("caught exception sending message -- will retry")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Verify we successfully sent the message
|
||||||
|
self.assertTrue(recovered)
|
||||||
|
|
||||||
|
# send some more messages just to make sure no more exceptions
|
||||||
|
for _ in range(10):
|
||||||
|
key = random_string(3).encode('utf-8')
|
||||||
|
msg = random_string(10).encode('utf-8')
|
||||||
|
producer.send_messages(topic, key, msg)
|
||||||
|
|
||||||
|
@kafka_versions("all")
|
||||||
|
def test_switch_leader_simple_consumer(self):
|
||||||
|
producer = Producer(self.client, async=False)
|
||||||
|
consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10)
|
||||||
|
self._send_random_messages(producer, self.topic, 0, 2)
|
||||||
|
consumer.get_messages()
|
||||||
|
self._kill_leader(self.topic, 0)
|
||||||
|
consumer.get_messages()
|
||||||
|
|
||||||
def _send_random_messages(self, producer, topic, partition, n):
|
def _send_random_messages(self, producer, topic, partition, n):
|
||||||
for j in range(n):
|
for j in range(n):
|
||||||
logging.debug('_send_random_message to %s:%d -- try %d', topic, partition, j)
|
msg = 'msg {0}: {1}'.format(j, random_string(10))
|
||||||
resp = producer.send_messages(topic, partition, random_string(10))
|
log.debug('_send_random_message %s to %s:%d', msg, topic, partition)
|
||||||
if len(resp) > 0:
|
while True:
|
||||||
self.assertEqual(resp[0].error, 0)
|
try:
|
||||||
logging.debug('_send_random_message to %s:%d -- try %d success', topic, partition, j)
|
producer.send_messages(topic, partition, msg.encode('utf-8'))
|
||||||
|
except:
|
||||||
|
log.exception('failure in _send_random_messages - retrying')
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
def _kill_leader(self, topic, partition):
|
def _kill_leader(self, topic, partition):
|
||||||
leader = self.client.topics_to_brokers[TopicAndPartition(topic, partition)]
|
leader = self.client.topics_to_brokers[TopicAndPartition(kafka_bytestring(topic), partition)]
|
||||||
broker = self.brokers[leader.nodeId]
|
broker = self.brokers[leader.nodeId]
|
||||||
broker.close()
|
broker.close()
|
||||||
return broker
|
return broker
|
||||||
|
|
||||||
def assert_message_count(self, topic, check_count, timeout=10, partitions=None):
|
def assert_message_count(self, topic, check_count, timeout=10,
|
||||||
|
partitions=None, at_least=False):
|
||||||
hosts = ','.join(['%s:%d' % (broker.host, broker.port)
|
hosts = ','.join(['%s:%d' % (broker.host, broker.port)
|
||||||
for broker in self.brokers])
|
for broker in self.brokers])
|
||||||
|
|
||||||
client = KafkaClient(hosts)
|
client = KafkaClient(hosts)
|
||||||
group = random_string(10)
|
consumer = SimpleConsumer(client, None, topic,
|
||||||
consumer = SimpleConsumer(client, group, topic,
|
|
||||||
partitions=partitions,
|
partitions=partitions,
|
||||||
auto_commit=False,
|
auto_commit=False,
|
||||||
iter_timeout=timeout)
|
iter_timeout=timeout)
|
||||||
@@ -145,10 +223,17 @@ class TestFailover(KafkaIntegrationTestCase):
|
|||||||
pending = consumer.pending(partitions)
|
pending = consumer.pending(partitions)
|
||||||
|
|
||||||
# Keep checking if it isn't immediately correct, subject to timeout
|
# Keep checking if it isn't immediately correct, subject to timeout
|
||||||
while pending != check_count and (time.time() - started_at < timeout):
|
while pending < check_count and (time.time() - started_at < timeout):
|
||||||
pending = consumer.pending(partitions)
|
pending = consumer.pending(partitions)
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
consumer.stop()
|
consumer.stop()
|
||||||
client.close()
|
client.close()
|
||||||
|
|
||||||
self.assertEqual(pending, check_count)
|
if pending < check_count:
|
||||||
|
self.fail('Too few pending messages: found %d, expected %d' %
|
||||||
|
(pending, check_count))
|
||||||
|
elif pending > check_count and not at_least:
|
||||||
|
self.fail('Too many pending messages: found %d, expected %d' %
|
||||||
|
(pending, check_count))
|
||||||
|
return True
|
||||||
|
|||||||
23
test/test_partitioner.py
Normal file
23
test/test_partitioner.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import six
|
||||||
|
from . import unittest
|
||||||
|
|
||||||
|
from kafka.partitioner import (Murmur2Partitioner)
|
||||||
|
|
||||||
|
class TestMurmurPartitioner(unittest.TestCase):
|
||||||
|
def test_hash_bytes(self):
|
||||||
|
p = Murmur2Partitioner(range(1000))
|
||||||
|
self.assertEqual(p.partition(bytearray(b'test')), p.partition(b'test'))
|
||||||
|
|
||||||
|
def test_hash_encoding(self):
|
||||||
|
p = Murmur2Partitioner(range(1000))
|
||||||
|
self.assertEqual(p.partition('test'), p.partition(u'test'))
|
||||||
|
|
||||||
|
def test_murmur2_java_compatibility(self):
|
||||||
|
p = Murmur2Partitioner(range(1000))
|
||||||
|
# compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner
|
||||||
|
self.assertEqual(681, p.partition(b''))
|
||||||
|
self.assertEqual(524, p.partition(b'a'))
|
||||||
|
self.assertEqual(434, p.partition(b'ab'))
|
||||||
|
self.assertEqual(107, p.partition(b'abc'))
|
||||||
|
self.assertEqual(566, p.partition(b'123456789'))
|
||||||
|
self.assertEqual(742, p.partition(b'\x00 '))
|
||||||
@@ -1,11 +1,29 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import collections
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
from mock import MagicMock
|
from mock import MagicMock, patch
|
||||||
from . import unittest
|
from . import unittest
|
||||||
|
|
||||||
from kafka.producer.base import Producer
|
from kafka import KafkaClient, SimpleProducer, KeyedProducer
|
||||||
|
from kafka.common import (
|
||||||
|
AsyncProducerQueueFull, FailedPayloadsError, NotLeaderForPartitionError,
|
||||||
|
ProduceResponse, RetryOptions, TopicAndPartition
|
||||||
|
)
|
||||||
|
from kafka.producer.base import Producer, _send_upstream
|
||||||
|
from kafka.protocol import CODEC_NONE
|
||||||
|
|
||||||
|
import threading
|
||||||
|
try:
|
||||||
|
from queue import Empty, Queue
|
||||||
|
except ImportError:
|
||||||
|
from Queue import Empty, Queue
|
||||||
|
try:
|
||||||
|
xrange
|
||||||
|
except NameError:
|
||||||
|
xrange = range
|
||||||
|
|
||||||
|
|
||||||
class TestKafkaProducer(unittest.TestCase):
|
class TestKafkaProducer(unittest.TestCase):
|
||||||
@@ -15,7 +33,8 @@ class TestKafkaProducer(unittest.TestCase):
|
|||||||
topic = b"test-topic"
|
topic = b"test-topic"
|
||||||
partition = 0
|
partition = 0
|
||||||
|
|
||||||
bad_data_types = (u'你怎么样?', 12, ['a', 'list'], ('a', 'tuple'), {'a': 'dict'})
|
bad_data_types = (u'你怎么样?', 12, ['a', 'list'],
|
||||||
|
('a', 'tuple'), {'a': 'dict'}, None,)
|
||||||
for m in bad_data_types:
|
for m in bad_data_types:
|
||||||
with self.assertRaises(TypeError):
|
with self.assertRaises(TypeError):
|
||||||
logging.debug("attempting to send message of type %s", type(m))
|
logging.debug("attempting to send message of type %s", type(m))
|
||||||
@@ -26,9 +45,26 @@ class TestKafkaProducer(unittest.TestCase):
|
|||||||
# This should not raise an exception
|
# This should not raise an exception
|
||||||
producer.send_messages(topic, partition, m)
|
producer.send_messages(topic, partition, m)
|
||||||
|
|
||||||
def test_topic_message_types(self):
|
def test_keyedproducer_message_types(self):
|
||||||
from kafka.producer.simple import SimpleProducer
|
client = MagicMock()
|
||||||
|
client.get_partition_ids_for_topic.return_value = [0, 1]
|
||||||
|
producer = KeyedProducer(client)
|
||||||
|
topic = b"test-topic"
|
||||||
|
key = b"testkey"
|
||||||
|
|
||||||
|
bad_data_types = (u'你怎么样?', 12, ['a', 'list'],
|
||||||
|
('a', 'tuple'), {'a': 'dict'},)
|
||||||
|
for m in bad_data_types:
|
||||||
|
with self.assertRaises(TypeError):
|
||||||
|
logging.debug("attempting to send message of type %s", type(m))
|
||||||
|
producer.send_messages(topic, key, m)
|
||||||
|
|
||||||
|
good_data_types = (b'a string!', None,)
|
||||||
|
for m in good_data_types:
|
||||||
|
# This should not raise an exception
|
||||||
|
producer.send_messages(topic, key, m)
|
||||||
|
|
||||||
|
def test_topic_message_types(self):
|
||||||
client = MagicMock()
|
client = MagicMock()
|
||||||
|
|
||||||
def partitions(topic):
|
def partitions(topic):
|
||||||
@@ -40,3 +76,188 @@ class TestKafkaProducer(unittest.TestCase):
|
|||||||
topic = b"test-topic"
|
topic = b"test-topic"
|
||||||
producer.send_messages(topic, b'hi')
|
producer.send_messages(topic, b'hi')
|
||||||
assert client.send_produce_request.called
|
assert client.send_produce_request.called
|
||||||
|
|
||||||
|
@patch('kafka.producer.base._send_upstream')
|
||||||
|
def test_producer_async_queue_overfilled(self, mock):
|
||||||
|
queue_size = 2
|
||||||
|
producer = Producer(MagicMock(), async=True,
|
||||||
|
async_queue_maxsize=queue_size)
|
||||||
|
|
||||||
|
topic = b'test-topic'
|
||||||
|
partition = 0
|
||||||
|
message = b'test-message'
|
||||||
|
|
||||||
|
with self.assertRaises(AsyncProducerQueueFull):
|
||||||
|
message_list = [message] * (queue_size + 1)
|
||||||
|
producer.send_messages(topic, partition, *message_list)
|
||||||
|
self.assertEqual(producer.queue.qsize(), queue_size)
|
||||||
|
for _ in xrange(producer.queue.qsize()):
|
||||||
|
producer.queue.get()
|
||||||
|
|
||||||
|
def test_producer_sync_fail_on_error(self):
|
||||||
|
error = FailedPayloadsError('failure')
|
||||||
|
with patch.object(KafkaClient, 'load_metadata_for_topics'):
|
||||||
|
with patch.object(KafkaClient, 'get_partition_ids_for_topic', return_value=[0, 1]):
|
||||||
|
with patch.object(KafkaClient, '_send_broker_aware_request', return_value = [error]):
|
||||||
|
|
||||||
|
client = KafkaClient(MagicMock())
|
||||||
|
producer = SimpleProducer(client, async=False, sync_fail_on_error=False)
|
||||||
|
|
||||||
|
# This should not raise
|
||||||
|
(response,) = producer.send_messages('foobar', b'test message')
|
||||||
|
self.assertEqual(response, error)
|
||||||
|
|
||||||
|
producer = SimpleProducer(client, async=False, sync_fail_on_error=True)
|
||||||
|
with self.assertRaises(FailedPayloadsError):
|
||||||
|
producer.send_messages('foobar', b'test message')
|
||||||
|
|
||||||
|
def test_cleanup_is_not_called_on_stopped_producer(self):
|
||||||
|
producer = Producer(MagicMock(), async=True)
|
||||||
|
producer.stopped = True
|
||||||
|
with patch.object(producer, 'stop') as mocked_stop:
|
||||||
|
producer._cleanup_func(producer)
|
||||||
|
self.assertEqual(mocked_stop.call_count, 0)
|
||||||
|
|
||||||
|
def test_cleanup_is_called_on_running_producer(self):
|
||||||
|
producer = Producer(MagicMock(), async=True)
|
||||||
|
producer.stopped = False
|
||||||
|
with patch.object(producer, 'stop') as mocked_stop:
|
||||||
|
producer._cleanup_func(producer)
|
||||||
|
self.assertEqual(mocked_stop.call_count, 1)
|
||||||
|
|
||||||
|
|
||||||
|
class TestKafkaProducerSendUpstream(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.client = MagicMock()
|
||||||
|
self.queue = Queue()
|
||||||
|
|
||||||
|
def _run_process(self, retries_limit=3, sleep_timeout=1):
|
||||||
|
# run _send_upstream process with the queue
|
||||||
|
stop_event = threading.Event()
|
||||||
|
retry_options = RetryOptions(limit=retries_limit,
|
||||||
|
backoff_ms=50,
|
||||||
|
retry_on_timeouts=False)
|
||||||
|
self.thread = threading.Thread(
|
||||||
|
target=_send_upstream,
|
||||||
|
args=(self.queue, self.client, CODEC_NONE,
|
||||||
|
0.3, # batch time (seconds)
|
||||||
|
3, # batch length
|
||||||
|
Producer.ACK_AFTER_LOCAL_WRITE,
|
||||||
|
Producer.DEFAULT_ACK_TIMEOUT,
|
||||||
|
retry_options,
|
||||||
|
stop_event))
|
||||||
|
self.thread.daemon = True
|
||||||
|
self.thread.start()
|
||||||
|
time.sleep(sleep_timeout)
|
||||||
|
stop_event.set()
|
||||||
|
|
||||||
|
def test_wo_retries(self):
|
||||||
|
|
||||||
|
# lets create a queue and add 10 messages for 1 partition
|
||||||
|
for i in range(10):
|
||||||
|
self.queue.put((TopicAndPartition("test", 0), "msg %i", "key %i"))
|
||||||
|
|
||||||
|
self._run_process()
|
||||||
|
|
||||||
|
# the queue should be void at the end of the test
|
||||||
|
self.assertEqual(self.queue.empty(), True)
|
||||||
|
|
||||||
|
# there should be 4 non-void cals:
|
||||||
|
# 3 batches of 3 msgs each + 1 batch of 1 message
|
||||||
|
self.assertEqual(self.client.send_produce_request.call_count, 4)
|
||||||
|
|
||||||
|
def test_first_send_failed(self):
|
||||||
|
|
||||||
|
# lets create a queue and add 10 messages for 10 different partitions
|
||||||
|
# to show how retries should work ideally
|
||||||
|
for i in range(10):
|
||||||
|
self.queue.put((TopicAndPartition("test", i), "msg %i", "key %i"))
|
||||||
|
|
||||||
|
# Mock offsets counter for closure
|
||||||
|
offsets = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
|
||||||
|
self.client.is_first_time = True
|
||||||
|
def send_side_effect(reqs, *args, **kwargs):
|
||||||
|
if self.client.is_first_time:
|
||||||
|
self.client.is_first_time = False
|
||||||
|
return [FailedPayloadsError(req) for req in reqs]
|
||||||
|
responses = []
|
||||||
|
for req in reqs:
|
||||||
|
offset = offsets[req.topic][req.partition]
|
||||||
|
offsets[req.topic][req.partition] += len(req.messages)
|
||||||
|
responses.append(
|
||||||
|
ProduceResponse(req.topic, req.partition, 0, offset)
|
||||||
|
)
|
||||||
|
return responses
|
||||||
|
|
||||||
|
self.client.send_produce_request.side_effect = send_side_effect
|
||||||
|
|
||||||
|
self._run_process(2)
|
||||||
|
|
||||||
|
# the queue should be void at the end of the test
|
||||||
|
self.assertEqual(self.queue.empty(), True)
|
||||||
|
|
||||||
|
# there should be 5 non-void calls: 1st failed batch of 3 msgs
|
||||||
|
# plus 3 batches of 3 msgs each + 1 batch of 1 message
|
||||||
|
self.assertEqual(self.client.send_produce_request.call_count, 5)
|
||||||
|
|
||||||
|
def test_with_limited_retries(self):
|
||||||
|
|
||||||
|
# lets create a queue and add 10 messages for 10 different partitions
|
||||||
|
# to show how retries should work ideally
|
||||||
|
for i in range(10):
|
||||||
|
self.queue.put((TopicAndPartition("test", i), "msg %i" % i, "key %i" % i))
|
||||||
|
|
||||||
|
def send_side_effect(reqs, *args, **kwargs):
|
||||||
|
return [FailedPayloadsError(req) for req in reqs]
|
||||||
|
|
||||||
|
self.client.send_produce_request.side_effect = send_side_effect
|
||||||
|
|
||||||
|
self._run_process(3, 3)
|
||||||
|
|
||||||
|
# the queue should be void at the end of the test
|
||||||
|
self.assertEqual(self.queue.empty(), True)
|
||||||
|
|
||||||
|
# there should be 16 non-void calls:
|
||||||
|
# 3 initial batches of 3 msgs each + 1 initial batch of 1 msg +
|
||||||
|
# 3 retries of the batches above = (1 + 3 retries) * 4 batches = 16
|
||||||
|
self.assertEqual(self.client.send_produce_request.call_count, 16)
|
||||||
|
|
||||||
|
def test_async_producer_not_leader(self):
|
||||||
|
|
||||||
|
for i in range(10):
|
||||||
|
self.queue.put((TopicAndPartition("test", i), "msg %i", "key %i"))
|
||||||
|
|
||||||
|
# Mock offsets counter for closure
|
||||||
|
offsets = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
|
||||||
|
self.client.is_first_time = True
|
||||||
|
def send_side_effect(reqs, *args, **kwargs):
|
||||||
|
if self.client.is_first_time:
|
||||||
|
self.client.is_first_time = False
|
||||||
|
return [ProduceResponse(req.topic, req.partition,
|
||||||
|
NotLeaderForPartitionError.errno, -1)
|
||||||
|
for req in reqs]
|
||||||
|
|
||||||
|
responses = []
|
||||||
|
for req in reqs:
|
||||||
|
offset = offsets[req.topic][req.partition]
|
||||||
|
offsets[req.topic][req.partition] += len(req.messages)
|
||||||
|
responses.append(
|
||||||
|
ProduceResponse(req.topic, req.partition, 0, offset)
|
||||||
|
)
|
||||||
|
return responses
|
||||||
|
|
||||||
|
self.client.send_produce_request.side_effect = send_side_effect
|
||||||
|
|
||||||
|
self._run_process(2)
|
||||||
|
|
||||||
|
# the queue should be void at the end of the test
|
||||||
|
self.assertEqual(self.queue.empty(), True)
|
||||||
|
|
||||||
|
# there should be 5 non-void calls: 1st failed batch of 3 msgs
|
||||||
|
# + 3 batches of 3 msgs each + 1 batch of 1 msg = 1 + 3 + 1 = 5
|
||||||
|
self.assertEqual(self.client.send_produce_request.call_count, 5)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
for _ in xrange(self.queue.qsize()):
|
||||||
|
self.queue.get()
|
||||||
|
|||||||
@@ -14,12 +14,13 @@ from kafka.common import (
|
|||||||
FetchRequest, ProduceRequest,
|
FetchRequest, ProduceRequest,
|
||||||
UnknownTopicOrPartitionError, LeaderNotAvailableError
|
UnknownTopicOrPartitionError, LeaderNotAvailableError
|
||||||
)
|
)
|
||||||
|
from kafka.producer.base import Producer
|
||||||
|
|
||||||
from test.fixtures import ZookeeperFixture, KafkaFixture
|
from test.fixtures import ZookeeperFixture, KafkaFixture
|
||||||
from test.testutil import KafkaIntegrationTestCase, kafka_versions
|
from test.testutil import KafkaIntegrationTestCase, kafka_versions
|
||||||
|
|
||||||
|
|
||||||
class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
||||||
topic = b'produce_topic'
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls): # noqa
|
def setUpClass(cls): # noqa
|
||||||
@@ -71,9 +72,9 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
start_offset = self.current_offset(self.topic, 0)
|
start_offset = self.current_offset(self.topic, 0)
|
||||||
|
|
||||||
message1 = create_gzip_message([
|
message1 = create_gzip_message([
|
||||||
("Gzipped 1 %d" % i).encode('utf-8') for i in range(100)])
|
(("Gzipped 1 %d" % i).encode('utf-8'), None) for i in range(100)])
|
||||||
message2 = create_gzip_message([
|
message2 = create_gzip_message([
|
||||||
("Gzipped 2 %d" % i).encode('utf-8') for i in range(100)])
|
(("Gzipped 2 %d" % i).encode('utf-8'), None) for i in range(100)])
|
||||||
|
|
||||||
self.assert_produce_request(
|
self.assert_produce_request(
|
||||||
[ message1, message2 ],
|
[ message1, message2 ],
|
||||||
@@ -87,8 +88,8 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
start_offset = self.current_offset(self.topic, 0)
|
start_offset = self.current_offset(self.topic, 0)
|
||||||
|
|
||||||
self.assert_produce_request([
|
self.assert_produce_request([
|
||||||
create_snappy_message(["Snappy 1 %d" % i for i in range(100)]),
|
create_snappy_message([("Snappy 1 %d" % i, None) for i in range(100)]),
|
||||||
create_snappy_message(["Snappy 2 %d" % i for i in range(100)]),
|
create_snappy_message([("Snappy 2 %d" % i, None) for i in range(100)]),
|
||||||
],
|
],
|
||||||
start_offset,
|
start_offset,
|
||||||
200,
|
200,
|
||||||
@@ -102,13 +103,13 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
messages = [
|
messages = [
|
||||||
create_message(b"Just a plain message"),
|
create_message(b"Just a plain message"),
|
||||||
create_gzip_message([
|
create_gzip_message([
|
||||||
("Gzipped %d" % i).encode('utf-8') for i in range(100)]),
|
(("Gzipped %d" % i).encode('utf-8'), None) for i in range(100)]),
|
||||||
]
|
]
|
||||||
|
|
||||||
# All snappy integration tests fail with nosnappyjava
|
# All snappy integration tests fail with nosnappyjava
|
||||||
if False and has_snappy():
|
if False and has_snappy():
|
||||||
msg_count += 100
|
msg_count += 100
|
||||||
messages.append(create_snappy_message(["Snappy %d" % i for i in range(100)]))
|
messages.append(create_snappy_message([("Snappy %d" % i, None) for i in range(100)]))
|
||||||
|
|
||||||
self.assert_produce_request(messages, start_offset, msg_count)
|
self.assert_produce_request(messages, start_offset, msg_count)
|
||||||
|
|
||||||
@@ -118,7 +119,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
self.assert_produce_request([
|
self.assert_produce_request([
|
||||||
create_gzip_message([
|
create_gzip_message([
|
||||||
("Gzipped batch 1, message %d" % i).encode('utf-8')
|
(("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
|
||||||
for i in range(50000)])
|
for i in range(50000)])
|
||||||
],
|
],
|
||||||
start_offset,
|
start_offset,
|
||||||
@@ -127,7 +128,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
self.assert_produce_request([
|
self.assert_produce_request([
|
||||||
create_gzip_message([
|
create_gzip_message([
|
||||||
("Gzipped batch 1, message %d" % i).encode('utf-8')
|
(("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
|
||||||
for i in range(50000)])
|
for i in range(50000)])
|
||||||
],
|
],
|
||||||
start_offset+50000,
|
start_offset+50000,
|
||||||
@@ -140,25 +141,26 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_simple_producer(self):
|
def test_simple_producer(self):
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
partitions = self.client.get_partition_ids_for_topic(self.topic)
|
||||||
start_offset1 = self.current_offset(self.topic, 1)
|
start_offsets = [self.current_offset(self.topic, p) for p in partitions]
|
||||||
|
|
||||||
producer = SimpleProducer(self.client, random_start=False)
|
producer = SimpleProducer(self.client, random_start=False)
|
||||||
|
|
||||||
# Goes to first partition, randomly.
|
# Goes to first partition, randomly.
|
||||||
resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two"))
|
resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two"))
|
||||||
self.assert_produce_response(resp, start_offset0)
|
self.assert_produce_response(resp, start_offsets[0])
|
||||||
|
|
||||||
# Goes to the next partition, randomly.
|
# Goes to the next partition, randomly.
|
||||||
resp = producer.send_messages(self.topic, self.msg("three"))
|
resp = producer.send_messages(self.topic, self.msg("three"))
|
||||||
self.assert_produce_response(resp, start_offset1)
|
self.assert_produce_response(resp, start_offsets[1])
|
||||||
|
|
||||||
self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two") ])
|
self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two") ])
|
||||||
self.assert_fetch_offset(1, start_offset1, [ self.msg("three") ])
|
self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("three") ])
|
||||||
|
|
||||||
# Goes back to the first partition because there's only two partitions
|
# Goes back to the first partition because there's only two partitions
|
||||||
resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five"))
|
resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five"))
|
||||||
self.assert_produce_response(resp, start_offset0+2)
|
self.assert_produce_response(resp, start_offsets[0]+2)
|
||||||
self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ])
|
self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ])
|
||||||
|
|
||||||
producer.stop()
|
producer.stop()
|
||||||
|
|
||||||
@@ -194,111 +196,38 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
self.assertEqual(resp3[0].partition, 0)
|
self.assertEqual(resp3[0].partition, 0)
|
||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_round_robin_partitioner(self):
|
def test_async_simple_producer(self):
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
partition = self.client.get_partition_ids_for_topic(self.topic)[0]
|
||||||
start_offset1 = self.current_offset(self.topic, 1)
|
start_offset = self.current_offset(self.topic, partition)
|
||||||
|
|
||||||
producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
|
producer = SimpleProducer(self.client, async=True, random_start=False)
|
||||||
resp1 = producer.send(self.topic, self.key("key1"), self.msg("one"))
|
|
||||||
resp2 = producer.send(self.topic, self.key("key2"), self.msg("two"))
|
|
||||||
resp3 = producer.send(self.topic, self.key("key3"), self.msg("three"))
|
|
||||||
resp4 = producer.send(self.topic, self.key("key4"), self.msg("four"))
|
|
||||||
|
|
||||||
self.assert_produce_response(resp1, start_offset0+0)
|
|
||||||
self.assert_produce_response(resp2, start_offset1+0)
|
|
||||||
self.assert_produce_response(resp3, start_offset0+1)
|
|
||||||
self.assert_produce_response(resp4, start_offset1+1)
|
|
||||||
|
|
||||||
self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("three") ])
|
|
||||||
self.assert_fetch_offset(1, start_offset1, [ self.msg("two"), self.msg("four") ])
|
|
||||||
|
|
||||||
producer.stop()
|
|
||||||
|
|
||||||
@kafka_versions("all")
|
|
||||||
def test_hashed_partitioner(self):
|
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
|
||||||
start_offset1 = self.current_offset(self.topic, 1)
|
|
||||||
|
|
||||||
producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
|
|
||||||
resp1 = producer.send(self.topic, self.key("1"), self.msg("one"))
|
|
||||||
resp2 = producer.send(self.topic, self.key("2"), self.msg("two"))
|
|
||||||
resp3 = producer.send(self.topic, self.key("3"), self.msg("three"))
|
|
||||||
resp4 = producer.send(self.topic, self.key("3"), self.msg("four"))
|
|
||||||
resp5 = producer.send(self.topic, self.key("4"), self.msg("five"))
|
|
||||||
|
|
||||||
offsets = {0: start_offset0, 1: start_offset1}
|
|
||||||
messages = {0: [], 1: []}
|
|
||||||
|
|
||||||
keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
|
|
||||||
resps = [resp1, resp2, resp3, resp4, resp5]
|
|
||||||
msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]
|
|
||||||
|
|
||||||
for key, resp, msg in zip(keys, resps, msgs):
|
|
||||||
k = hash(key) % 2
|
|
||||||
offset = offsets[k]
|
|
||||||
self.assert_produce_response(resp, offset)
|
|
||||||
offsets[k] += 1
|
|
||||||
messages[k].append(msg)
|
|
||||||
|
|
||||||
self.assert_fetch_offset(0, start_offset0, messages[0])
|
|
||||||
self.assert_fetch_offset(1, start_offset1, messages[1])
|
|
||||||
|
|
||||||
producer.stop()
|
|
||||||
|
|
||||||
@kafka_versions("all")
|
|
||||||
def test_acks_none(self):
|
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
|
||||||
|
|
||||||
producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_NOT_REQUIRED,
|
|
||||||
random_start=False)
|
|
||||||
resp = producer.send_messages(self.topic, self.msg("one"))
|
resp = producer.send_messages(self.topic, self.msg("one"))
|
||||||
self.assertEqual(len(resp), 0)
|
self.assertEqual(len(resp), 0)
|
||||||
|
|
||||||
self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
|
# flush messages
|
||||||
producer.stop()
|
producer.stop()
|
||||||
|
|
||||||
@kafka_versions("all")
|
self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
|
||||||
def test_acks_local_write(self):
|
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
|
||||||
|
|
||||||
producer = SimpleProducer(self.client, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
|
|
||||||
random_start=False)
|
|
||||||
resp = producer.send_messages(self.topic, self.msg("one"))
|
|
||||||
|
|
||||||
self.assert_produce_response(resp, start_offset0)
|
|
||||||
self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
|
|
||||||
|
|
||||||
producer.stop()
|
|
||||||
|
|
||||||
@kafka_versions("all")
|
|
||||||
def test_acks_cluster_commit(self):
|
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
|
||||||
|
|
||||||
producer = SimpleProducer(
|
|
||||||
self.client,
|
|
||||||
req_acks=SimpleProducer.ACK_AFTER_CLUSTER_COMMIT,
|
|
||||||
random_start=False)
|
|
||||||
|
|
||||||
resp = producer.send_messages(self.topic, self.msg("one"))
|
|
||||||
self.assert_produce_response(resp, start_offset0)
|
|
||||||
self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
|
|
||||||
|
|
||||||
producer.stop()
|
|
||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_batched_simple_producer__triggers_by_message(self):
|
def test_batched_simple_producer__triggers_by_message(self):
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
partitions = self.client.get_partition_ids_for_topic(self.topic)
|
||||||
start_offset1 = self.current_offset(self.topic, 1)
|
start_offsets = [self.current_offset(self.topic, p) for p in partitions]
|
||||||
|
|
||||||
|
# Configure batch producer
|
||||||
|
batch_messages = 5
|
||||||
|
batch_interval = 5
|
||||||
producer = SimpleProducer(
|
producer = SimpleProducer(
|
||||||
self.client,
|
self.client,
|
||||||
batch_send=True,
|
async=True,
|
||||||
batch_send_every_n=5,
|
batch_send_every_n=batch_messages,
|
||||||
batch_send_every_t=20,
|
batch_send_every_t=batch_interval,
|
||||||
random_start=False)
|
random_start=False)
|
||||||
|
|
||||||
# Send 5 messages and do a fetch
|
# Send 4 messages -- should not trigger a batch
|
||||||
resp = producer.send_messages(self.topic,
|
resp = producer.send_messages(
|
||||||
|
self.topic,
|
||||||
self.msg("one"),
|
self.msg("one"),
|
||||||
self.msg("two"),
|
self.msg("two"),
|
||||||
self.msg("three"),
|
self.msg("three"),
|
||||||
@@ -309,10 +238,12 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
self.assertEqual(len(resp), 0)
|
self.assertEqual(len(resp), 0)
|
||||||
|
|
||||||
# It hasn't sent yet
|
# It hasn't sent yet
|
||||||
self.assert_fetch_offset(0, start_offset0, [])
|
self.assert_fetch_offset(partitions[0], start_offsets[0], [])
|
||||||
self.assert_fetch_offset(1, start_offset1, [])
|
self.assert_fetch_offset(partitions[1], start_offsets[1], [])
|
||||||
|
|
||||||
resp = producer.send_messages(self.topic,
|
# send 3 more messages -- should trigger batch on first 5
|
||||||
|
resp = producer.send_messages(
|
||||||
|
self.topic,
|
||||||
self.msg("five"),
|
self.msg("five"),
|
||||||
self.msg("six"),
|
self.msg("six"),
|
||||||
self.msg("seven"),
|
self.msg("seven"),
|
||||||
@@ -321,34 +252,48 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
# Batch mode is async. No ack
|
# Batch mode is async. No ack
|
||||||
self.assertEqual(len(resp), 0)
|
self.assertEqual(len(resp), 0)
|
||||||
|
|
||||||
self.assert_fetch_offset(0, start_offset0, [
|
# Wait until producer has pulled all messages from internal queue
|
||||||
|
# this should signal that the first batch was sent, and the producer
|
||||||
|
# is now waiting for enough messages to batch again (or a timeout)
|
||||||
|
timeout = 5
|
||||||
|
start = time.time()
|
||||||
|
while not producer.queue.empty():
|
||||||
|
if time.time() - start > timeout:
|
||||||
|
self.fail('timeout waiting for producer queue to empty')
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
# send messages groups all *msgs in a single call to the same partition
|
||||||
|
# so we should see all messages from the first call in one partition
|
||||||
|
self.assert_fetch_offset(partitions[0], start_offsets[0], [
|
||||||
self.msg("one"),
|
self.msg("one"),
|
||||||
self.msg("two"),
|
self.msg("two"),
|
||||||
self.msg("three"),
|
self.msg("three"),
|
||||||
self.msg("four"),
|
self.msg("four"),
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assert_fetch_offset(1, start_offset1, [
|
# Because we are batching every 5 messages, we should only see one
|
||||||
|
self.assert_fetch_offset(partitions[1], start_offsets[1], [
|
||||||
self.msg("five"),
|
self.msg("five"),
|
||||||
# self.msg("six"),
|
|
||||||
# self.msg("seven"),
|
|
||||||
])
|
])
|
||||||
|
|
||||||
producer.stop()
|
producer.stop()
|
||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_batched_simple_producer__triggers_by_time(self):
|
def test_batched_simple_producer__triggers_by_time(self):
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
partitions = self.client.get_partition_ids_for_topic(self.topic)
|
||||||
start_offset1 = self.current_offset(self.topic, 1)
|
start_offsets = [self.current_offset(self.topic, p) for p in partitions]
|
||||||
|
|
||||||
producer = SimpleProducer(self.client,
|
batch_interval = 5
|
||||||
batch_send=True,
|
producer = SimpleProducer(
|
||||||
|
self.client,
|
||||||
|
async=True,
|
||||||
batch_send_every_n=100,
|
batch_send_every_n=100,
|
||||||
batch_send_every_t=5,
|
batch_send_every_t=batch_interval,
|
||||||
random_start=False)
|
random_start=False)
|
||||||
|
|
||||||
# Send 5 messages and do a fetch
|
# Send 5 messages and do a fetch
|
||||||
resp = producer.send_messages(self.topic,
|
resp = producer.send_messages(
|
||||||
|
self.topic,
|
||||||
self.msg("one"),
|
self.msg("one"),
|
||||||
self.msg("two"),
|
self.msg("two"),
|
||||||
self.msg("three"),
|
self.msg("three"),
|
||||||
@@ -359,8 +304,8 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
self.assertEqual(len(resp), 0)
|
self.assertEqual(len(resp), 0)
|
||||||
|
|
||||||
# It hasn't sent yet
|
# It hasn't sent yet
|
||||||
self.assert_fetch_offset(0, start_offset0, [])
|
self.assert_fetch_offset(partitions[0], start_offsets[0], [])
|
||||||
self.assert_fetch_offset(1, start_offset1, [])
|
self.assert_fetch_offset(partitions[1], start_offsets[1], [])
|
||||||
|
|
||||||
resp = producer.send_messages(self.topic,
|
resp = producer.send_messages(self.topic,
|
||||||
self.msg("five"),
|
self.msg("five"),
|
||||||
@@ -372,16 +317,16 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
self.assertEqual(len(resp), 0)
|
self.assertEqual(len(resp), 0)
|
||||||
|
|
||||||
# Wait the timeout out
|
# Wait the timeout out
|
||||||
time.sleep(5)
|
time.sleep(batch_interval)
|
||||||
|
|
||||||
self.assert_fetch_offset(0, start_offset0, [
|
self.assert_fetch_offset(partitions[0], start_offsets[0], [
|
||||||
self.msg("one"),
|
self.msg("one"),
|
||||||
self.msg("two"),
|
self.msg("two"),
|
||||||
self.msg("three"),
|
self.msg("three"),
|
||||||
self.msg("four"),
|
self.msg("four"),
|
||||||
])
|
])
|
||||||
|
|
||||||
self.assert_fetch_offset(1, start_offset1, [
|
self.assert_fetch_offset(partitions[1], start_offsets[1], [
|
||||||
self.msg("five"),
|
self.msg("five"),
|
||||||
self.msg("six"),
|
self.msg("six"),
|
||||||
self.msg("seven"),
|
self.msg("seven"),
|
||||||
@@ -389,40 +334,168 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
|
|
||||||
producer.stop()
|
producer.stop()
|
||||||
|
|
||||||
|
|
||||||
|
############################
|
||||||
|
# KeyedProducer Tests #
|
||||||
|
############################
|
||||||
|
|
||||||
|
@kafka_versions("0.8.1", "0.8.1.1", "0.8.2.0")
|
||||||
|
def test_keyedproducer_null_payload(self):
|
||||||
|
partitions = self.client.get_partition_ids_for_topic(self.topic)
|
||||||
|
start_offsets = [self.current_offset(self.topic, p) for p in partitions]
|
||||||
|
|
||||||
|
producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
|
||||||
|
key = "test"
|
||||||
|
|
||||||
|
resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
|
||||||
|
self.assert_produce_response(resp, start_offsets[0])
|
||||||
|
resp = producer.send_messages(self.topic, self.key("key2"), None)
|
||||||
|
self.assert_produce_response(resp, start_offsets[1])
|
||||||
|
resp = producer.send_messages(self.topic, self.key("key3"), None)
|
||||||
|
self.assert_produce_response(resp, start_offsets[0]+1)
|
||||||
|
resp = producer.send_messages(self.topic, self.key("key4"), self.msg("four"))
|
||||||
|
self.assert_produce_response(resp, start_offsets[1]+1)
|
||||||
|
|
||||||
|
self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), None ])
|
||||||
|
self.assert_fetch_offset(partitions[1], start_offsets[1], [ None, self.msg("four") ])
|
||||||
|
|
||||||
|
producer.stop()
|
||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_async_simple_producer(self):
|
def test_round_robin_partitioner(self):
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
partitions = self.client.get_partition_ids_for_topic(self.topic)
|
||||||
|
start_offsets = [self.current_offset(self.topic, p) for p in partitions]
|
||||||
|
|
||||||
producer = SimpleProducer(self.client, async=True, random_start=False)
|
producer = KeyedProducer(self.client, partitioner=RoundRobinPartitioner)
|
||||||
resp = producer.send_messages(self.topic, self.msg("one"))
|
resp1 = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
|
||||||
self.assertEqual(len(resp), 0)
|
resp2 = producer.send_messages(self.topic, self.key("key2"), self.msg("two"))
|
||||||
|
resp3 = producer.send_messages(self.topic, self.key("key3"), self.msg("three"))
|
||||||
|
resp4 = producer.send_messages(self.topic, self.key("key4"), self.msg("four"))
|
||||||
|
|
||||||
self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
|
self.assert_produce_response(resp1, start_offsets[0]+0)
|
||||||
|
self.assert_produce_response(resp2, start_offsets[1]+0)
|
||||||
|
self.assert_produce_response(resp3, start_offsets[0]+1)
|
||||||
|
self.assert_produce_response(resp4, start_offsets[1]+1)
|
||||||
|
|
||||||
|
self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("three") ])
|
||||||
|
self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("two"), self.msg("four") ])
|
||||||
|
|
||||||
|
producer.stop()
|
||||||
|
|
||||||
|
@kafka_versions("all")
|
||||||
|
def test_hashed_partitioner(self):
|
||||||
|
partitions = self.client.get_partition_ids_for_topic(self.topic)
|
||||||
|
start_offsets = [self.current_offset(self.topic, p) for p in partitions]
|
||||||
|
|
||||||
|
producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
|
||||||
|
resp1 = producer.send_messages(self.topic, self.key("1"), self.msg("one"))
|
||||||
|
resp2 = producer.send_messages(self.topic, self.key("2"), self.msg("two"))
|
||||||
|
resp3 = producer.send_messages(self.topic, self.key("3"), self.msg("three"))
|
||||||
|
resp4 = producer.send_messages(self.topic, self.key("3"), self.msg("four"))
|
||||||
|
resp5 = producer.send_messages(self.topic, self.key("4"), self.msg("five"))
|
||||||
|
|
||||||
|
offsets = {partitions[0]: start_offsets[0], partitions[1]: start_offsets[1]}
|
||||||
|
messages = {partitions[0]: [], partitions[1]: []}
|
||||||
|
|
||||||
|
keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
|
||||||
|
resps = [resp1, resp2, resp3, resp4, resp5]
|
||||||
|
msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]
|
||||||
|
|
||||||
|
for key, resp, msg in zip(keys, resps, msgs):
|
||||||
|
k = hash(key) % 2
|
||||||
|
partition = partitions[k]
|
||||||
|
offset = offsets[partition]
|
||||||
|
self.assert_produce_response(resp, offset)
|
||||||
|
offsets[partition] += 1
|
||||||
|
messages[partition].append(msg)
|
||||||
|
|
||||||
|
self.assert_fetch_offset(partitions[0], start_offsets[0], messages[partitions[0]])
|
||||||
|
self.assert_fetch_offset(partitions[1], start_offsets[1], messages[partitions[1]])
|
||||||
|
|
||||||
producer.stop()
|
producer.stop()
|
||||||
|
|
||||||
@kafka_versions("all")
|
@kafka_versions("all")
|
||||||
def test_async_keyed_producer(self):
|
def test_async_keyed_producer(self):
|
||||||
start_offset0 = self.current_offset(self.topic, 0)
|
partition = self.client.get_partition_ids_for_topic(self.topic)[0]
|
||||||
|
start_offset = self.current_offset(self.topic, partition)
|
||||||
|
|
||||||
producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True)
|
producer = KeyedProducer(self.client, partitioner = RoundRobinPartitioner, async=True)
|
||||||
|
|
||||||
resp = producer.send(self.topic, self.key("key1"), self.msg("one"))
|
resp = producer.send_messages(self.topic, self.key("key1"), self.msg("one"))
|
||||||
self.assertEqual(len(resp), 0)
|
self.assertEqual(len(resp), 0)
|
||||||
|
|
||||||
self.assert_fetch_offset(0, start_offset0, [ self.msg("one") ])
|
# wait for the server to report a new highwatermark
|
||||||
|
while self.current_offset(self.topic, partition) == start_offset:
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
|
||||||
|
|
||||||
producer.stop()
|
producer.stop()
|
||||||
|
|
||||||
def assert_produce_request(self, messages, initial_offset, message_ct):
|
############################
|
||||||
produce = ProduceRequest(self.topic, 0, messages=messages)
|
# Producer ACK Tests #
|
||||||
|
############################
|
||||||
|
|
||||||
|
@kafka_versions("all")
|
||||||
|
def test_acks_none(self):
|
||||||
|
partition = self.client.get_partition_ids_for_topic(self.topic)[0]
|
||||||
|
start_offset = self.current_offset(self.topic, partition)
|
||||||
|
|
||||||
|
producer = Producer(
|
||||||
|
self.client,
|
||||||
|
req_acks=Producer.ACK_NOT_REQUIRED,
|
||||||
|
)
|
||||||
|
resp = producer.send_messages(self.topic, partition, self.msg("one"))
|
||||||
|
|
||||||
|
# No response from produce request with no acks required
|
||||||
|
self.assertEqual(len(resp), 0)
|
||||||
|
|
||||||
|
# But the message should still have been delivered
|
||||||
|
self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
|
||||||
|
producer.stop()
|
||||||
|
|
||||||
|
@kafka_versions("all")
|
||||||
|
def test_acks_local_write(self):
|
||||||
|
partition = self.client.get_partition_ids_for_topic(self.topic)[0]
|
||||||
|
start_offset = self.current_offset(self.topic, partition)
|
||||||
|
|
||||||
|
producer = Producer(
|
||||||
|
self.client,
|
||||||
|
req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
|
||||||
|
)
|
||||||
|
resp = producer.send_messages(self.topic, partition, self.msg("one"))
|
||||||
|
|
||||||
|
self.assert_produce_response(resp, start_offset)
|
||||||
|
self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
|
||||||
|
|
||||||
|
producer.stop()
|
||||||
|
|
||||||
|
@kafka_versions("all")
|
||||||
|
def test_acks_cluster_commit(self):
|
||||||
|
partition = self.client.get_partition_ids_for_topic(self.topic)[0]
|
||||||
|
start_offset = self.current_offset(self.topic, partition)
|
||||||
|
|
||||||
|
producer = Producer(
|
||||||
|
self.client,
|
||||||
|
req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT,
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = producer.send_messages(self.topic, partition, self.msg("one"))
|
||||||
|
self.assert_produce_response(resp, start_offset)
|
||||||
|
self.assert_fetch_offset(partition, start_offset, [ self.msg("one") ])
|
||||||
|
|
||||||
|
producer.stop()
|
||||||
|
|
||||||
|
def assert_produce_request(self, messages, initial_offset, message_ct,
|
||||||
|
partition=0):
|
||||||
|
produce = ProduceRequest(self.bytes_topic, partition, messages=messages)
|
||||||
|
|
||||||
# There should only be one response message from the server.
|
# There should only be one response message from the server.
|
||||||
# This will throw an exception if there's more than one.
|
# This will throw an exception if there's more than one.
|
||||||
resp = self.client.send_produce_request([ produce ])
|
resp = self.client.send_produce_request([ produce ])
|
||||||
self.assert_produce_response(resp, initial_offset)
|
self.assert_produce_response(resp, initial_offset)
|
||||||
|
|
||||||
self.assertEqual(self.current_offset(self.topic, 0), initial_offset + message_ct)
|
self.assertEqual(self.current_offset(self.topic, partition), initial_offset + message_ct)
|
||||||
|
|
||||||
def assert_produce_response(self, resp, initial_offset):
|
def assert_produce_response(self, resp, initial_offset):
|
||||||
self.assertEqual(len(resp), 1)
|
self.assertEqual(len(resp), 1)
|
||||||
@@ -433,7 +506,7 @@ class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
|
|||||||
# There should only be one response message from the server.
|
# There should only be one response message from the server.
|
||||||
# This will throw an exception if there's more than one.
|
# This will throw an exception if there's more than one.
|
||||||
|
|
||||||
resp, = self.client.send_fetch_request([ FetchRequest(self.topic, partition, start_offset, 1024) ])
|
resp, = self.client.send_fetch_request([ FetchRequest(self.bytes_topic, partition, start_offset, 1024) ])
|
||||||
|
|
||||||
self.assertEqual(resp.error, 0)
|
self.assertEqual(resp.error, 0)
|
||||||
self.assertEqual(resp.partition, partition)
|
self.assertEqual(resp.partition, partition)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from kafka.common import (
|
|||||||
ProduceResponse, FetchResponse, OffsetAndMessage,
|
ProduceResponse, FetchResponse, OffsetAndMessage,
|
||||||
BrokerMetadata, TopicMetadata, PartitionMetadata, TopicAndPartition,
|
BrokerMetadata, TopicMetadata, PartitionMetadata, TopicAndPartition,
|
||||||
KafkaUnavailableError, UnsupportedCodecError, ConsumerFetchSizeTooSmall,
|
KafkaUnavailableError, UnsupportedCodecError, ConsumerFetchSizeTooSmall,
|
||||||
ProtocolError
|
ProtocolError, ConsumerMetadataResponse
|
||||||
)
|
)
|
||||||
from kafka.protocol import (
|
from kafka.protocol import (
|
||||||
ATTRIBUTE_CODEC_MASK, CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, KafkaProtocol,
|
ATTRIBUTE_CODEC_MASK, CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, KafkaProtocol,
|
||||||
@@ -32,7 +32,7 @@ class TestProtocol(unittest.TestCase):
|
|||||||
self.assertEqual(msg.value, payload)
|
self.assertEqual(msg.value, payload)
|
||||||
|
|
||||||
def test_create_gzip(self):
|
def test_create_gzip(self):
|
||||||
payloads = [b"v1", b"v2"]
|
payloads = [(b"v1", None), (b"v2", None)]
|
||||||
msg = create_gzip_message(payloads)
|
msg = create_gzip_message(payloads)
|
||||||
self.assertEqual(msg.magic, 0)
|
self.assertEqual(msg.magic, 0)
|
||||||
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
|
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
|
||||||
@@ -59,9 +59,39 @@ class TestProtocol(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(decoded, expect)
|
self.assertEqual(decoded, expect)
|
||||||
|
|
||||||
|
def test_create_gzip_keyed(self):
|
||||||
|
payloads = [(b"v1", b"k1"), (b"v2", b"k2")]
|
||||||
|
msg = create_gzip_message(payloads)
|
||||||
|
self.assertEqual(msg.magic, 0)
|
||||||
|
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP)
|
||||||
|
self.assertEqual(msg.key, None)
|
||||||
|
# Need to decode to check since gzipped payload is non-deterministic
|
||||||
|
decoded = gzip_decode(msg.value)
|
||||||
|
expect = b"".join([
|
||||||
|
struct.pack(">q", 0), # MsgSet Offset
|
||||||
|
struct.pack(">i", 18), # Msg Size
|
||||||
|
struct.pack(">i", 1474775406), # CRC
|
||||||
|
struct.pack(">bb", 0, 0), # Magic, flags
|
||||||
|
struct.pack(">i", 2), # Length of key
|
||||||
|
b"k1", # Key
|
||||||
|
struct.pack(">i", 2), # Length of value
|
||||||
|
b"v1", # Value
|
||||||
|
|
||||||
|
struct.pack(">q", 0), # MsgSet Offset
|
||||||
|
struct.pack(">i", 18), # Msg Size
|
||||||
|
struct.pack(">i", -16383415), # CRC
|
||||||
|
struct.pack(">bb", 0, 0), # Magic, flags
|
||||||
|
struct.pack(">i", 2), # Length of key
|
||||||
|
b"k2", # Key
|
||||||
|
struct.pack(">i", 2), # Length of value
|
||||||
|
b"v2", # Value
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertEqual(decoded, expect)
|
||||||
|
|
||||||
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
||||||
def test_create_snappy(self):
|
def test_create_snappy(self):
|
||||||
payloads = [b"v1", b"v2"]
|
payloads = [(b"v1", None), (b"v2", None)]
|
||||||
msg = create_snappy_message(payloads)
|
msg = create_snappy_message(payloads)
|
||||||
self.assertEqual(msg.magic, 0)
|
self.assertEqual(msg.magic, 0)
|
||||||
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
|
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
|
||||||
@@ -87,6 +117,36 @@ class TestProtocol(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(decoded, expect)
|
self.assertEqual(decoded, expect)
|
||||||
|
|
||||||
|
@unittest.skipUnless(has_snappy(), "Snappy not available")
|
||||||
|
def test_create_snappy_keyed(self):
|
||||||
|
payloads = [(b"v1", b"k1"), (b"v2", b"k2")]
|
||||||
|
msg = create_snappy_message(payloads)
|
||||||
|
self.assertEqual(msg.magic, 0)
|
||||||
|
self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
|
||||||
|
self.assertEqual(msg.key, None)
|
||||||
|
decoded = snappy_decode(msg.value)
|
||||||
|
expect = b"".join([
|
||||||
|
struct.pack(">q", 0), # MsgSet Offset
|
||||||
|
struct.pack(">i", 18), # Msg Size
|
||||||
|
struct.pack(">i", 1474775406), # CRC
|
||||||
|
struct.pack(">bb", 0, 0), # Magic, flags
|
||||||
|
struct.pack(">i", 2), # Length of key
|
||||||
|
b"k1", # Key
|
||||||
|
struct.pack(">i", 2), # Length of value
|
||||||
|
b"v1", # Value
|
||||||
|
|
||||||
|
struct.pack(">q", 0), # MsgSet Offset
|
||||||
|
struct.pack(">i", 18), # Msg Size
|
||||||
|
struct.pack(">i", -16383415), # CRC
|
||||||
|
struct.pack(">bb", 0, 0), # Magic, flags
|
||||||
|
struct.pack(">i", 2), # Length of key
|
||||||
|
b"k2", # Key
|
||||||
|
struct.pack(">i", 2), # Length of value
|
||||||
|
b"v2", # Value
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertEqual(decoded, expect)
|
||||||
|
|
||||||
def test_encode_message_header(self):
|
def test_encode_message_header(self):
|
||||||
expect = b"".join([
|
expect = b"".join([
|
||||||
struct.pack(">h", 10), # API Key
|
struct.pack(">h", 10), # API Key
|
||||||
@@ -500,6 +560,34 @@ class TestProtocol(unittest.TestCase):
|
|||||||
decoded = KafkaProtocol.decode_metadata_response(encoded)
|
decoded = KafkaProtocol.decode_metadata_response(encoded)
|
||||||
self.assertEqual(decoded, (node_brokers, topic_partitions))
|
self.assertEqual(decoded, (node_brokers, topic_partitions))
|
||||||
|
|
||||||
|
def test_encode_consumer_metadata_request(self):
|
||||||
|
expected = b"".join([
|
||||||
|
struct.pack(">i", 17), # Total length of the request
|
||||||
|
struct.pack('>h', 10), # API key consumer metadata
|
||||||
|
struct.pack('>h', 0), # API version
|
||||||
|
struct.pack('>i', 4), # Correlation ID
|
||||||
|
struct.pack('>h3s', 3, b"cid"),# The client ID
|
||||||
|
struct.pack('>h2s', 2, b"g1"), # Group "g1"
|
||||||
|
])
|
||||||
|
|
||||||
|
encoded = KafkaProtocol.encode_consumer_metadata_request(b"cid", 4, b"g1")
|
||||||
|
|
||||||
|
self.assertEqual(encoded, expected)
|
||||||
|
|
||||||
|
def test_decode_consumer_metadata_response(self):
|
||||||
|
encoded = b"".join([
|
||||||
|
struct.pack(">i", 42), # Correlation ID
|
||||||
|
struct.pack(">h", 0), # No Error
|
||||||
|
struct.pack(">i", 1), # Broker ID
|
||||||
|
struct.pack(">h23s", 23, b"brokers1.kafka.rdio.com"), # Broker Host
|
||||||
|
struct.pack(">i", 1000), # Broker Port
|
||||||
|
])
|
||||||
|
|
||||||
|
results = KafkaProtocol.decode_consumer_metadata_response(encoded)
|
||||||
|
self.assertEqual(results,
|
||||||
|
ConsumerMetadataResponse(error = 0, nodeId = 1, host = b'brokers1.kafka.rdio.com', port = 1000)
|
||||||
|
)
|
||||||
|
|
||||||
def test_encode_offset_request(self):
|
def test_encode_offset_request(self):
|
||||||
expected = b"".join([
|
expected = b"".join([
|
||||||
struct.pack(">i", 21), # Total length of the request
|
struct.pack(">i", 21), # Total length of the request
|
||||||
@@ -701,7 +789,7 @@ class TestProtocol(unittest.TestCase):
|
|||||||
yield
|
yield
|
||||||
|
|
||||||
def test_create_message_set(self):
|
def test_create_message_set(self):
|
||||||
messages = [1, 2, 3]
|
messages = [(1, "k1"), (2, "k2"), (3, "k3")]
|
||||||
|
|
||||||
# Default codec is CODEC_NONE. Expect list of regular messages.
|
# Default codec is CODEC_NONE. Expect list of regular messages.
|
||||||
expect = [sentinel.message] * len(messages)
|
expect = [sentinel.message] * len(messages)
|
||||||
|
|||||||
@@ -107,7 +107,6 @@ class UtilTest(unittest.TestCase):
|
|||||||
t = kafka.common.TopicAndPartition
|
t = kafka.common.TopicAndPartition
|
||||||
|
|
||||||
l = [
|
l = [
|
||||||
t("a", 1),
|
|
||||||
t("a", 1),
|
t("a", 1),
|
||||||
t("a", 2),
|
t("a", 2),
|
||||||
t("a", 3),
|
t("a", 3),
|
||||||
@@ -124,3 +123,8 @@ class UtilTest(unittest.TestCase):
|
|||||||
3: t("b", 3),
|
3: t("b", 3),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# should not be able to group duplicate topic-partitions
|
||||||
|
t1 = t("a", 1)
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
kafka.util.group_by_topic_and_partition([t1, t1])
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from . import unittest
|
|||||||
|
|
||||||
from kafka import KafkaClient
|
from kafka import KafkaClient
|
||||||
from kafka.common import OffsetRequest
|
from kafka.common import OffsetRequest
|
||||||
|
from kafka.util import kafka_bytestring
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'random_string',
|
'random_string',
|
||||||
@@ -22,8 +23,7 @@ __all__ = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
def random_string(l):
|
def random_string(l):
|
||||||
s = "".join(random.choice(string.ascii_letters) for i in xrange(l))
|
return "".join(random.choice(string.ascii_letters) for i in xrange(l))
|
||||||
return s.encode('utf-8')
|
|
||||||
|
|
||||||
def kafka_versions(*versions):
|
def kafka_versions(*versions):
|
||||||
def kafka_versions(func):
|
def kafka_versions(func):
|
||||||
@@ -50,6 +50,8 @@ def get_open_port():
|
|||||||
class KafkaIntegrationTestCase(unittest.TestCase):
|
class KafkaIntegrationTestCase(unittest.TestCase):
|
||||||
create_client = True
|
create_client = True
|
||||||
topic = None
|
topic = None
|
||||||
|
bytes_topic = None
|
||||||
|
zk = None
|
||||||
server = None
|
server = None
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
@@ -58,8 +60,9 @@ class KafkaIntegrationTestCase(unittest.TestCase):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if not self.topic:
|
if not self.topic:
|
||||||
topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10).decode('utf-8'))
|
topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
|
||||||
self.topic = topic.encode('utf-8')
|
self.topic = topic
|
||||||
|
self.bytes_topic = topic.encode('utf-8')
|
||||||
|
|
||||||
if self.create_client:
|
if self.create_client:
|
||||||
self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port))
|
self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port))
|
||||||
@@ -77,8 +80,15 @@ class KafkaIntegrationTestCase(unittest.TestCase):
|
|||||||
self.client.close()
|
self.client.close()
|
||||||
|
|
||||||
def current_offset(self, topic, partition):
|
def current_offset(self, topic, partition):
|
||||||
offsets, = self.client.send_offset_request([ OffsetRequest(topic, partition, -1, 1) ])
|
try:
|
||||||
return offsets.offsets[0]
|
offsets, = self.client.send_offset_request([ OffsetRequest(kafka_bytestring(topic), partition, -1, 1) ])
|
||||||
|
except:
|
||||||
|
# XXX: We've seen some UnknownErrors here and cant debug w/o server logs
|
||||||
|
self.zk.child.dump_logs()
|
||||||
|
self.server.child.dump_logs()
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
return offsets.offsets[0]
|
||||||
|
|
||||||
def msgs(self, iterable):
|
def msgs(self, iterable):
|
||||||
return [ self.msg(x) for x in iterable ]
|
return [ self.msg(x) for x in iterable ]
|
||||||
@@ -103,3 +113,8 @@ class Timer(object):
|
|||||||
self.interval = self.end - self.start
|
self.interval = self.end - self.start
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
logging.getLogger('test.fixtures').setLevel(logging.ERROR)
|
||||||
|
logging.getLogger('test.service').setLevel(logging.ERROR)
|
||||||
|
|
||||||
|
# kafka.conn debug logging is verbose, disable in tests by default
|
||||||
|
logging.getLogger('kafka.conn').setLevel(logging.INFO)
|
||||||
|
|||||||
45
tox.ini
45
tox.ini
@@ -1,6 +1,21 @@
|
|||||||
[tox]
|
[tox]
|
||||||
envlist = lint, py26, py27, pypy, py33, py34
|
envlist = lint, py26, py27, pypy, py33, py34, py35, docs
|
||||||
|
|
||||||
[testenv]
|
[testenv]
|
||||||
|
deps =
|
||||||
|
nose
|
||||||
|
nose-timer
|
||||||
|
coverage
|
||||||
|
mock
|
||||||
|
python-snappy
|
||||||
|
commands =
|
||||||
|
nosetests {posargs:-v -x --with-id --id-file={envdir}/.noseids --with-timer --timer-top-n 10 --with-coverage --cover-erase --cover-package kafka}
|
||||||
|
setenv =
|
||||||
|
NOSE_LOGFORMAT = %(asctime)s - %(thread)d - %(name)s - %(levelname)s - %(message)s
|
||||||
|
PROJECT_ROOT = {toxinidir}
|
||||||
|
passenv = KAFKA_VERSION
|
||||||
|
|
||||||
|
[testenv:py26]
|
||||||
deps =
|
deps =
|
||||||
six
|
six
|
||||||
unittest2
|
unittest2
|
||||||
@@ -9,21 +24,11 @@ deps =
|
|||||||
coverage
|
coverage
|
||||||
mock
|
mock
|
||||||
python-snappy
|
python-snappy
|
||||||
commands =
|
|
||||||
nosetests {posargs:-v --with-id --id-file={envdir}/.noseids --with-timer --timer-top-n 10 --with-coverage --cover-erase --cover-package kafka}
|
|
||||||
setenv =
|
|
||||||
PROJECT_ROOT = {toxinidir}
|
|
||||||
|
|
||||||
[testenv:py33]
|
[testenv:py27]
|
||||||
deps =
|
|
||||||
nose
|
|
||||||
nose-timer
|
|
||||||
coverage
|
|
||||||
mock
|
|
||||||
python-snappy
|
|
||||||
|
|
||||||
[testenv:py34]
|
|
||||||
deps =
|
deps =
|
||||||
|
six
|
||||||
|
unittest2
|
||||||
nose
|
nose
|
||||||
nose-timer
|
nose-timer
|
||||||
coverage
|
coverage
|
||||||
@@ -36,4 +41,14 @@ deps =
|
|||||||
unittest2
|
unittest2
|
||||||
mock
|
mock
|
||||||
pylint
|
pylint
|
||||||
commands = pylint {posargs: -E --ignore=queue.py kafka test}
|
commands = pylint --rcfile=pylint.rc {posargs: -E kafka test}
|
||||||
|
|
||||||
|
[testenv:docs]
|
||||||
|
deps =
|
||||||
|
sphinxcontrib-napoleon
|
||||||
|
sphinx_rtd_theme
|
||||||
|
sphinx
|
||||||
|
|
||||||
|
commands =
|
||||||
|
sphinx-apidoc -o docs/apidoc/ kafka/
|
||||||
|
sphinx-build -b html docs/ docs/_build
|
||||||
|
|||||||
@@ -3,6 +3,8 @@
|
|||||||
|
|
||||||
if [ $1 == "pypy" ]; then
|
if [ $1 == "pypy" ]; then
|
||||||
echo "pypy"
|
echo "pypy"
|
||||||
|
elif [ $1 == "3.5" ]; then
|
||||||
|
echo "py35"
|
||||||
elif [ $1 == "3.4" ]; then
|
elif [ $1 == "3.4" ]; then
|
||||||
echo "py34"
|
echo "py34"
|
||||||
elif [ $1 == "3.3" ]; then
|
elif [ $1 == "3.3" ]; then
|
||||||
|
|||||||
Reference in New Issue
Block a user