Remove dependency on clandestined and use pure python murmur3

This commit is contained in:
John Anderson
2015-07-02 10:39:43 -07:00
parent a935261830
commit cff5abfad5
5 changed files with 311 additions and 8 deletions

View File

@@ -3,16 +3,11 @@ import time
import logging
from pymemcache.client.base import Client, PooledClient, _check_key
from clandestined import RendezvousHash as RH
from pymemcache.client.rendezvous import RendezvousHash
logger = logging.getLogger(__name__)
class RendezvousHash(RH):
def get_node(self, key):
return self.find_node(key)
class HashClient(object):
"""
A client for communicating with a cluster of memcached servers
@@ -57,6 +52,11 @@ class HashClient(object):
Further arguments are interpreted as for :py:class:`.Client`
constructor.
The default ``hasher`` is using a pure python implementation that can be
significantly improved performance wise by switching to a C based
version. We recommend using ``python-clandestined`` if having a C
dependency is acceptable.
"""
self.clients = {}
self.retry_attempts = retry_attempts

View File

@@ -0,0 +1,51 @@
def murmur3_32(data, seed=0):
"""MurmurHash3 was written by Austin Appleby, and is placed in the
public domain. The author hereby disclaims copyright to this source
code."""
c1 = 0xcc9e2d51
c2 = 0x1b873593
length = len(data)
h1 = seed
roundedEnd = (length & 0xfffffffc) # round down to 4 byte block
for i in range(0, roundedEnd, 4):
# little endian load order
k1 = (ord(data[i]) & 0xff) | ((ord(data[i + 1]) & 0xff) << 8) | \
((ord(data[i + 2]) & 0xff) << 16) | (ord(data[i + 3]) << 24)
k1 *= c1
k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15)
k1 *= c2
h1 ^= k1
h1 = (h1 << 13) | ((h1 & 0xffffffff) >> 19) # ROTL32(h1,13)
h1 = h1 * 5 + 0xe6546b64
# tail
k1 = 0
val = length & 0x03
if val == 3:
k1 = (ord(data[roundedEnd + 2]) & 0xff) << 16
# fallthrough
if val in [2, 3]:
k1 |= (ord(data[roundedEnd + 1]) & 0xff) << 8
# fallthrough
if val in [1, 2, 3]:
k1 |= ord(data[roundedEnd]) & 0xff
k1 *= c1
k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15)
k1 *= c2
h1 ^= k1
# finalization
h1 ^= length
# fmix(h1)
h1 ^= ((h1 & 0xffffffff) >> 16)
h1 *= 0x85ebca6b
h1 ^= ((h1 & 0xffffffff) >> 13)
h1 *= 0xc2b2ae35
h1 ^= ((h1 & 0xffffffff) >> 16)
return h1 & 0xffffffff

View File

@@ -0,0 +1,46 @@
from pymemcache.client.murmur3 import murmur3_32
class RendezvousHash(object):
"""
Implements the Highest Random Weight (HRW) hashing algorithm most
commonly referred to as rendezvous hashing.
Originally developed as part of python-clandestined.
Copyright (c) 2014 Ernest W. Durbin III
"""
def __init__(self, nodes=None, seed=0, hash_function=murmur3_32):
"""
Constructor.
"""
self.nodes = []
self.seed = seed
if nodes is not None:
self.nodes = nodes
self.hash_function = lambda x: hash_function(x, seed)
def add_node(self, node):
if node not in self.nodes:
self.nodes.append(node)
def remove_node(self, node):
if node in self.nodes:
self.nodes.remove(node)
else:
raise ValueError("No such node %s to remove" % (node))
def get_node(self, key):
high_score = -1
winner = None
for node in self.nodes:
score = self.hash_function(
"%s-%s" % (str(node), str(key)))
if score > high_score:
(high_score, winner) = (score, node)
elif score == high_score:
(high_score, winner) = (score, max(str(node), str(winner)))
return winner

View File

@@ -0,0 +1,203 @@
from pymemcache.client.rendezvous import RendezvousHash
import pytest
@pytest.mark.unit()
def test_init_no_options():
rendezvous = RendezvousHash()
assert 0 == len(rendezvous.nodes)
assert 1361238019 == rendezvous.hash_function('6666')
@pytest.mark.unit()
def test_init():
nodes = ['0', '1', '2']
rendezvous = RendezvousHash(nodes=nodes)
assert 3 == len(rendezvous.nodes)
assert 1361238019 == rendezvous.hash_function('6666')
@pytest.mark.unit()
def test_seed():
rendezvous = RendezvousHash(seed=10)
assert 2981722772 == rendezvous.hash_function('6666')
@pytest.mark.unit()
def test_add_node():
rendezvous = RendezvousHash()
rendezvous.add_node('1')
assert 1 == len(rendezvous.nodes)
rendezvous.add_node('1')
assert 1 == len(rendezvous.nodes)
rendezvous.add_node('2')
assert 2 == len(rendezvous.nodes)
rendezvous.add_node('1')
assert 2 == len(rendezvous.nodes)
@pytest.mark.unit()
def test_remove_node():
nodes = ['0', '1', '2']
rendezvous = RendezvousHash(nodes=nodes)
rendezvous.remove_node('2')
assert 2 == len(rendezvous.nodes)
with pytest.raises(ValueError):
rendezvous.remove_node('2')
assert 2 == len(rendezvous.nodes)
rendezvous.remove_node('1')
assert 1 == len(rendezvous.nodes)
rendezvous.remove_node('0')
assert 0 == len(rendezvous.nodes)
@pytest.mark.unit()
def test_get_node():
nodes = ['0', '1', '2']
rendezvous = RendezvousHash(nodes=nodes)
assert '0' == rendezvous.get_node('ok')
assert '1' == rendezvous.get_node('mykey')
assert '2' == rendezvous.get_node('wat')
@pytest.mark.unit()
def test_get_node_after_removal():
nodes = ['0', '1', '2']
rendezvous = RendezvousHash(nodes=nodes)
rendezvous.remove_node('1')
assert '0' == rendezvous.get_node('ok')
assert '0' == rendezvous.get_node('mykey')
assert '2' == rendezvous.get_node('wat')
@pytest.mark.unit()
def test_get_node_after_addition():
nodes = ['0', '1', '2']
rendezvous = RendezvousHash(nodes=nodes)
assert '0' == rendezvous.get_node('ok')
assert '1' == rendezvous.get_node('mykey')
assert '2' == rendezvous.get_node('wat')
assert '2' == rendezvous.get_node('lol')
rendezvous.add_node('3')
assert '0' == rendezvous.get_node('ok')
assert '1' == rendezvous.get_node('mykey')
assert '2' == rendezvous.get_node('wat')
assert '3' == rendezvous.get_node('lol')
@pytest.mark.unit()
def test_grow():
rendezvous = RendezvousHash()
placements = {}
for i in range(10):
rendezvous.add_node(str(i))
placements[str(i)] = []
for i in range(1000):
node = rendezvous.get_node(str(i))
placements[node].append(i)
new_placements = {}
for i in range(20):
rendezvous.add_node(str(i))
new_placements[str(i)] = []
for i in range(1000):
node = rendezvous.get_node(str(i))
new_placements[node].append(i)
keys = [k for sublist in placements.values() for k in sublist]
new_keys = [k for sublist in new_placements.values() for k in sublist]
assert sorted(keys) == sorted(new_keys)
added = 0
removed = 0
for node, assignments in new_placements.items():
after = set(assignments)
before = set(placements.get(node, []))
removed += len(before.difference(after))
added += len(after.difference(before))
assert added == removed
assert 1062 == (added + removed)
@pytest.mark.unit()
def test_shrink():
rendezvous = RendezvousHash()
placements = {}
for i in range(10):
rendezvous.add_node(str(i))
placements[str(i)] = []
for i in range(1000):
node = rendezvous.get_node(str(i))
placements[node].append(i)
rendezvous.remove_node('9')
new_placements = {}
for i in range(9):
new_placements[str(i)] = []
for i in range(1000):
node = rendezvous.get_node(str(i))
new_placements[node].append(i)
keys = [k for sublist in placements.values() for k in sublist]
new_keys = [k for sublist in new_placements.values() for k in sublist]
assert sorted(keys) == sorted(new_keys)
added = 0
removed = 0
for node, assignments in placements.items():
after = set(assignments)
before = set(new_placements.get(node, []))
removed += len(before.difference(after))
added += len(after.difference(before))
assert added == removed
assert 202 == (added + removed)
def collide(key, seed):
return 1337
@pytest.mark.unit()
def test_rendezvous_collision():
nodes = ['c', 'b', 'a']
rendezvous = RendezvousHash(nodes, hash_function=collide)
for i in range(1000):
assert 'c' == rendezvous.get_node(i)
@pytest.mark.unit()
def test_rendezvous_names():
nodes = [1, 2, 3, 'a', 'b', 'lol.wat.com']
rendezvous = RendezvousHash(nodes, hash_function=collide)
for i in range(10):
assert 'lol.wat.com' == rendezvous.get_node(i)
nodes = [1, 'a', '0']
rendezvous = RendezvousHash(nodes, hash_function=collide)
for i in range(10):
assert 'a' == rendezvous.get_node(i)

View File

@@ -1,16 +1,16 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
from pymemcache import __version__
setup(
name='pymemcache',
version=__version__,
author='Charles Gordon',
author_email='charles@pinterest.com',
packages=find_packages(),
install_requires=['six', 'clandestined'],
install_requires=['six'],
description='A comprehensive, fast, pure Python memcached client',
long_description=open('README.md').read(),
license='Apache License 2.0',
@@ -20,6 +20,9 @@ setup(
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: PyPy',
'Programming Language :: Python :: PyPy3',
'License :: OSI Approved :: Apache Software License',
'Topic :: Database',
],