Update Partitioners for use with KafkaProducer (#827)
This commit is contained in:

committed by
Dana Powers

parent
5c784890b6
commit
b8717b4b79
2
.gitignore
vendored
2
.gitignore
vendored
@@ -12,3 +12,5 @@ servers/*/resources/ssl*
|
|||||||
docs/_build
|
docs/_build
|
||||||
.cache*
|
.cache*
|
||||||
.idea/
|
.idea/
|
||||||
|
integration-test/
|
||||||
|
tests-env/
|
@@ -5,22 +5,23 @@ class Partitioner(object):
|
|||||||
"""
|
"""
|
||||||
Base class for a partitioner
|
Base class for a partitioner
|
||||||
"""
|
"""
|
||||||
def __init__(self, partitions):
|
def __init__(self, partitions=None):
|
||||||
"""
|
"""
|
||||||
Initialize the partitioner
|
Initialize the partitioner
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
partitions: A list of available partitions (during startup)
|
partitions: A list of available partitions (during startup) OPTIONAL.
|
||||||
"""
|
"""
|
||||||
self.partitions = partitions
|
self.partitions = partitions
|
||||||
|
|
||||||
def partition(self, key, partitions=None):
|
def __call__(self, key, all_partitions=None, available_partitions=None):
|
||||||
"""
|
"""
|
||||||
Takes a string key and num_partitions as argument and returns
|
Takes a string key, num_partitions and available_partitions as argument and returns
|
||||||
a partition to be used for the message
|
a partition to be used for the message
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
key: the key to use for partitioning
|
key: the key to use for partitioning.
|
||||||
partitions: (optional) a list of partitions.
|
all_partitions: a list of the topic's partitions.
|
||||||
|
available_partitions: a list of the broker's currently avaliable partitions(optional).
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError('partition function has to be implemented')
|
raise NotImplementedError('partition function has to be implemented')
|
||||||
|
@@ -11,6 +11,11 @@ class Murmur2Partitioner(Partitioner):
|
|||||||
the hash of the key. Attempts to apply the same hashing
|
the hash of the key. Attempts to apply the same hashing
|
||||||
function as mainline java client.
|
function as mainline java client.
|
||||||
"""
|
"""
|
||||||
|
def __call__(self, key, partitions=None, available=None):
|
||||||
|
if available:
|
||||||
|
return self.partition(key, available)
|
||||||
|
return self.partition(key, partitions)
|
||||||
|
|
||||||
def partition(self, key, partitions=None):
|
def partition(self, key, partitions=None):
|
||||||
if not partitions:
|
if not partitions:
|
||||||
partitions = self.partitions
|
partitions = self.partitions
|
||||||
@@ -21,12 +26,15 @@ class Murmur2Partitioner(Partitioner):
|
|||||||
return partitions[idx]
|
return partitions[idx]
|
||||||
|
|
||||||
|
|
||||||
class LegacyPartitioner(Partitioner):
|
class LegacyPartitioner(object):
|
||||||
"""DEPRECATED -- See Issue 374
|
"""DEPRECATED -- See Issue 374
|
||||||
|
|
||||||
Implements a partitioner which selects the target partition based on
|
Implements a partitioner which selects the target partition based on
|
||||||
the hash of the key
|
the hash of the key
|
||||||
"""
|
"""
|
||||||
|
def __init__(self, partitions):
|
||||||
|
self.partitions = partitions
|
||||||
|
|
||||||
def partition(self, key, partitions=None):
|
def partition(self, key, partitions=None):
|
||||||
if not partitions:
|
if not partitions:
|
||||||
partitions = self.partitions
|
partitions = self.partitions
|
||||||
|
@@ -1,26 +1,70 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
from itertools import cycle
|
|
||||||
|
|
||||||
from .base import Partitioner
|
from .base import Partitioner
|
||||||
|
|
||||||
|
|
||||||
class RoundRobinPartitioner(Partitioner):
|
class RoundRobinPartitioner(Partitioner):
|
||||||
"""
|
def __init__(self, partitions=None):
|
||||||
Implements a round robin partitioner which sends data to partitions
|
self.partitions_iterable = CachedPartitionCycler(partitions)
|
||||||
in a round robin fashion
|
if partitions:
|
||||||
"""
|
|
||||||
def __init__(self, partitions):
|
|
||||||
super(RoundRobinPartitioner, self).__init__(partitions)
|
|
||||||
self.iterpart = cycle(partitions)
|
|
||||||
|
|
||||||
def _set_partitions(self, partitions):
|
|
||||||
self.partitions = partitions
|
|
||||||
self.iterpart = cycle(partitions)
|
|
||||||
|
|
||||||
def partition(self, key, partitions=None):
|
|
||||||
# Refresh the partition list if necessary
|
|
||||||
if partitions and self.partitions != partitions:
|
|
||||||
self._set_partitions(partitions)
|
self._set_partitions(partitions)
|
||||||
|
else:
|
||||||
|
self.partitions = None
|
||||||
|
|
||||||
return next(self.iterpart)
|
def __call__(self, key, all_partitions=None, available_partitions=None):
|
||||||
|
if available_partitions:
|
||||||
|
cur_partitions = available_partitions
|
||||||
|
else:
|
||||||
|
cur_partitions = all_partitions
|
||||||
|
if not self.partitions:
|
||||||
|
self._set_partitions(cur_partitions)
|
||||||
|
elif cur_partitions != self.partitions_iterable.partitions and cur_partitions is not None:
|
||||||
|
self._set_partitions(cur_partitions)
|
||||||
|
return next(self.partitions_iterable)
|
||||||
|
|
||||||
|
def _set_partitions(self, available_partitions):
|
||||||
|
self.partitions = available_partitions
|
||||||
|
self.partitions_iterable.set_partitions(available_partitions)
|
||||||
|
|
||||||
|
def partition(self, key, all_partitions=None, available_partitions=None):
|
||||||
|
return self.__call__(key, all_partitions, available_partitions)
|
||||||
|
|
||||||
|
|
||||||
|
class CachedPartitionCycler(object):
|
||||||
|
def __init__(self, partitions=None):
|
||||||
|
self.partitions = partitions
|
||||||
|
if partitions:
|
||||||
|
assert type(partitions) is list
|
||||||
|
self.cur_pos = None
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
return self.next()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _index_available(cur_pos, partitions):
|
||||||
|
return cur_pos < len(partitions)
|
||||||
|
|
||||||
|
def set_partitions(self, partitions):
|
||||||
|
if self.cur_pos:
|
||||||
|
if not self._index_available(self.cur_pos, partitions):
|
||||||
|
self.cur_pos = 0
|
||||||
|
self.partitions = partitions
|
||||||
|
return None
|
||||||
|
|
||||||
|
self.partitions = partitions
|
||||||
|
next_item = self.partitions[self.cur_pos]
|
||||||
|
if next_item in partitions:
|
||||||
|
self.cur_pos = partitions.index(next_item)
|
||||||
|
else:
|
||||||
|
self.cur_pos = 0
|
||||||
|
return None
|
||||||
|
self.partitions = partitions
|
||||||
|
|
||||||
|
def next(self):
|
||||||
|
assert self.partitions is not None
|
||||||
|
if self.cur_pos is None or not self._index_available(self.cur_pos, self.partitions):
|
||||||
|
self.cur_pos = 1
|
||||||
|
return self.partitions[0]
|
||||||
|
cur_item = self.partitions[self.cur_pos]
|
||||||
|
self.cur_pos += 1
|
||||||
|
return cur_item
|
||||||
|
@@ -3,6 +3,7 @@ import six
|
|||||||
|
|
||||||
from kafka.partitioner import Murmur2Partitioner
|
from kafka.partitioner import Murmur2Partitioner
|
||||||
from kafka.partitioner.default import DefaultPartitioner
|
from kafka.partitioner.default import DefaultPartitioner
|
||||||
|
from kafka.partitioner import RoundRobinPartitioner
|
||||||
|
|
||||||
|
|
||||||
def test_default_partitioner():
|
def test_default_partitioner():
|
||||||
@@ -22,6 +23,38 @@ def test_default_partitioner():
|
|||||||
assert partitioner(None, all_partitions, []) in all_partitions
|
assert partitioner(None, all_partitions, []) in all_partitions
|
||||||
|
|
||||||
|
|
||||||
|
def test_roundrobin_partitioner():
|
||||||
|
partitioner = RoundRobinPartitioner()
|
||||||
|
all_partitions = list(range(100))
|
||||||
|
available = all_partitions
|
||||||
|
# partitioner should cycle between partitions
|
||||||
|
i = 0
|
||||||
|
max_partition = all_partitions[len(all_partitions) - 1]
|
||||||
|
while i <= max_partition:
|
||||||
|
assert i == partitioner(None, all_partitions, available)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while i <= int(max_partition / 2):
|
||||||
|
assert i == partitioner(None, all_partitions, available)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# test dynamic partition re-assignment
|
||||||
|
available = available[:-25]
|
||||||
|
|
||||||
|
while i <= max(available):
|
||||||
|
assert i == partitioner(None, all_partitions, available)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
all_partitions = list(range(200))
|
||||||
|
available = all_partitions
|
||||||
|
|
||||||
|
max_partition = all_partitions[len(all_partitions) - 1]
|
||||||
|
while i <= max_partition:
|
||||||
|
assert i == partitioner(None, all_partitions, available)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
|
||||||
def test_hash_bytes():
|
def test_hash_bytes():
|
||||||
p = Murmur2Partitioner(range(1000))
|
p = Murmur2Partitioner(range(1000))
|
||||||
assert p.partition(bytearray(b'test')) == p.partition(b'test')
|
assert p.partition(bytearray(b'test')) == p.partition(b'test')
|
||||||
|
Reference in New Issue
Block a user