4.6 KiB
4.6 KiB
Usage
KafkaConsumer
from kafka import KafkaConsumer
# To consume latest messages and auto-commit offsets
= KafkaConsumer('my-topic',
consumer ='my-group',
group_id=['localhost:9092'])
bootstrap_serversfor message in consumer:
# message value and key are raw bytes -- decode if necessary!
# e.g., for unicode: `message.value.decode('utf-8')`
print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
message.offset, message.key,
message.value))
# consume earliest available messages, dont commit offsets
='earliest', enable_auto_commit=False)
KafkaConsumer(auto_offset_reset
# consume json messages
=lambda m: json.loads(m.decode('ascii')))
KafkaConsumer(value_deserializer
# consume msgpack
=msgpack.unpackb)
KafkaConsumer(value_deserializer
# StopIteration if no message after 1sec
=1000)
KafkaConsumer(consumer_timeout_ms
# Subscribe to a regex topic pattern
= KafkaConsumer()
consumer ='^awesome.*')
consumer.subscribe(pattern
# Use multiple consumers in parallel w/ 0.9 kafka brokers
# typically you would run each on a different server / process / CPU
= KafkaConsumer('my-topic',
consumer1 ='my-group',
group_id='my.server.com')
bootstrap_servers= KafkaConsumer('my-topic',
consumer2 ='my-group',
group_id='my.server.com') bootstrap_servers
There are many configuration options for the consumer class. See
~kafka.KafkaConsumer
API documentation for more details.
SimpleProducer
Asynchronous Mode
from kafka import SimpleProducer, SimpleClient
# To send messages asynchronously
= SimpleClient('localhost:9092')
client = SimpleProducer(client, async=True)
producer 'my-topic', b'async message')
producer.send_messages(
# To send messages in batch. You can use any of the available
# producers for doing this. The following producer will collect
# messages in batch and send them to Kafka after 20 messages are
# collected or every 60 seconds
# Notes:
# * If the producer dies before the messages are sent, there will be losses
# * Call producer.stop() to send the messages and cleanup
= SimpleProducer(client,
producer async=True,
=20,
batch_send_every_n=60) batch_send_every_t
Synchronous Mode
from kafka import SimpleProducer, SimpleClient
# To send messages synchronously
= SimpleClient('localhost:9092')
client = SimpleProducer(client, async=False)
producer
# Note that the application is responsible for encoding messages to type bytes
'my-topic', b'some message')
producer.send_messages('my-topic', b'this method', b'is variadic')
producer.send_messages(
# Send unicode message
'my-topic', u'你怎么样?'.encode('utf-8'))
producer.send_messages(
# To wait for acknowledgements
# ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to
# a local log before sending response
# ACK_AFTER_CLUSTER_COMMIT : server will block until the message is committed
# by all in sync replicas before sending a response
= SimpleProducer(client,
producer async=False,
=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
req_acks=2000,
ack_timeout=False)
sync_fail_on_error
= producer.send_messages('my-topic', b'another message')
responses for r in responses:
logging.info(r.offset)
KeyedProducer
from kafka import (
SimpleClient, KeyedProducer,
Murmur2Partitioner, RoundRobinPartitioner)
= SimpleClient('localhost:9092')
kafka
# HashedPartitioner is default (currently uses python hash())
= KeyedProducer(kafka)
producer b'my-topic', b'key1', b'some message')
producer.send_messages(b'my-topic', b'key2', b'this methode')
producer.send_messages(
# Murmur2Partitioner attempts to mirror the java client hashing
= KeyedProducer(kafka, partitioner=Murmur2Partitioner)
producer
# Or just produce round-robin (or just use SimpleProducer)
= KeyedProducer(kafka, partitioner=RoundRobinPartitioner) producer