More comprehensive cython and numpy deserializer tests

This commit is contained in:
Mark Florisson
2015-08-08 15:58:53 +01:00
parent edd5463e7b
commit c8dfc48ff2
5 changed files with 111 additions and 41 deletions

View File

@@ -23,7 +23,7 @@ from cassandra import cqltypes
from cassandra.util import is_little_endian
import numpy as np
# import pandas as pd
cdef extern from "numpyFlags.h":
# Include 'numpyFlags.h' into the generated C code to disable the
@@ -74,8 +74,10 @@ cdef class NumpyParser(ColumnParser):
for i in range(rowcount):
unpack_row(reader, desc, arrs)
return [make_native_byteorder(arr) for arr in arrays]
# return pd.DataFrame(dict(zip(desc.colnames, arrays)))
arrays = [make_native_byteorder(arr) for arr in arrays]
result = dict(zip(desc.colnames, arrays))
# return pd.DataFrame(result)
return result
### Helper functions to create NumPy arrays and array descriptors

View File

@@ -107,10 +107,11 @@ class CustomProtocolHandlerTest(unittest.TestCase):
session.client_protocol_handler = CustomProtocolHandlerResultMessageTracked
session.row_factory = tuple_factory
columns_string = create_table_with_all_types("alltypes", session)
colnames = create_table_with_all_types("alltypes", session, 1)
columns_string = ", ".join(colnames)
# verify data
params = get_all_primitive_params()
params = get_all_primitive_params(0)
results = session.execute("SELECT {0} FROM alltypes WHERE primkey=0".format(columns_string))[0]
for expected, actual in zip(params, results):
self.assertEqual(actual, expected)

View File

@@ -7,23 +7,25 @@ try:
except ImportError:
import unittest
from cassandra.query import tuple_factory
from cassandra.cluster import Cluster
from cassandra.protocol import ProtocolHandler, LazyProtocolHandler, NumpyProtocolHandler
from tests.integration import use_singledc, PROTOCOL_VERSION
from tests.integration.datatype_utils import update_datatypes
from tests.integration.standard.utils import create_table_with_all_types, get_all_primitive_params
from cassandra.cython_deps import HAVE_CYTHON
if not HAVE_CYTHON:
raise unittest.SkipTest("Skipping test, not compiled with Cython enabled")
from tests.integration.standard.utils import (
create_table_with_all_types, get_all_primitive_params, get_primitive_datatypes)
from tests.unit.cython.utils import cythontest, numpytest
def setup_module():
use_singledc()
update_datatypes()
class CustomProtocolHandlerTest(unittest.TestCase):
class CythonProtocolHandlerTest(unittest.TestCase):
N_ITEMS = 10
@classmethod
def setUpClass(cls):
@@ -32,39 +34,96 @@ class CustomProtocolHandlerTest(unittest.TestCase):
cls.session.execute("CREATE KEYSPACE testspace WITH replication = "
"{ 'class' : 'SimpleStrategy', 'replication_factor': '1'}")
cls.session.set_keyspace("testspace")
create_table_with_all_types("test_table", cls.session)
cls.colnames = create_table_with_all_types("test_table", cls.session, cls.N_ITEMS)
@classmethod
def tearDownClass(cls):
cls.session.execute("DROP KEYSPACE testspace")
cls.cluster.shutdown()
@cythontest
def test_cython_parser(self):
"""
Test Cython-based parser that returns a list of tuples
"""
self.cython_parser(ProtocolHandler)
verify_iterator_data(self.assertEqual, get_data(ProtocolHandler))
@cythontest
def test_cython_lazy_parser(self):
"""
Test Cython-based parser that returns a list of tuples
Test Cython-based parser that returns an iterator of tuples
"""
self.cython_parser(LazyProtocolHandler)
verify_iterator_data(self.assertEqual, get_data(LazyProtocolHandler))
def cython_parser(self, protocol_handler):
cluster = Cluster(protocol_version=PROTOCOL_VERSION)
session = cluster.connect(keyspace="testspace")
@numpytest
def test_numpy_parser(self):
"""
Test Numpy-based parser that returns a NumPy array
"""
# arrays = { 'a': arr1, 'b': arr2, ... }
arrays = get_data(NumpyProtocolHandler)
# use our custom protocol handler
session.client_protocol_handler = protocol_handler
# session.row_factory = tuple_factory
colnames = self.colnames
datatypes = get_primitive_datatypes()
for colname, datatype in zip(colnames, datatypes):
arr = arrays[colname]
self.match_dtype(datatype, arr.dtype)
# verify data
params = get_all_primitive_params()
[first_result] = session.execute("SELECT * FROM test_table WHERE primkey=0")
self.assertEqual(len(params), len(first_result),
msg="Not the right number of columns?")
for expected, actual in zip(params, first_result):
self.assertEqual(actual, expected)
verify_iterator_data(self.assertEqual, arrays_to_list_of_tuples(arrays, colnames))
session.shutdown()
def match_dtype(self, datatype, dtype):
"""Match a string cqltype (e.g. 'int' or 'blob') with a numpy dtype"""
if datatype == 'smallint':
self.match_dtype_props(dtype, 'i', 2)
elif datatype == 'int':
self.match_dtype_props(dtype, 'i', 4)
elif datatype in ('bigint', 'counter'):
self.match_dtype_props(dtype, 'i', 8)
elif datatype == 'float':
self.match_dtype_props(dtype, 'f', 4)
elif datatype == 'double':
self.match_dtype_props(dtype, 'f', 8)
else:
self.assertEqual(dtype.kind, 'O', msg=(dtype, datatype))
def match_dtype_props(self, dtype, kind, size, signed=None):
self.assertEqual(dtype.kind, kind, msg=dtype)
self.assertEqual(dtype.itemsize, size, msg=dtype)
def arrays_to_list_of_tuples(arrays, colnames):
"""Convert a dict of arrays (as given by the numpy protocol handler) to a list of tuples"""
first_array = arrays[colnames[0]]
return [tuple(arrays[colname][i] for colname in colnames)
for i in range(len(first_array))]
def get_data(protocol_handler):
"""
Get some data from the test table.
:param key: if None, get all results (100.000 results), otherwise get only one result
"""
cluster = Cluster(protocol_version=PROTOCOL_VERSION)
session = cluster.connect(keyspace="testspace")
# use our custom protocol handler
session.client_protocol_handler = protocol_handler
session.row_factory = tuple_factory
results = session.execute("SELECT * FROM test_table")
session.shutdown()
return results
def verify_iterator_data(assertEqual, results):
"""
Check the result of get_data() when this is a list or
iterator of tuples
"""
for result in results:
params = get_all_primitive_params(result[0])
assertEqual(len(params), len(result),
msg="Not the right number of columns?")
for expected, actual in zip(params, result):
assertEqual(actual, expected)

View File

@@ -4,15 +4,16 @@ Helper module to populate a dummy Cassandra tables with data.
from tests.integration.datatype_utils import PRIMITIVE_DATATYPES, get_sample
def create_table_with_all_types(table_name, session):
def create_table_with_all_types(table_name, session, N):
"""
Method that given a table_name and session construct a table that contains
all possible primitive types.
:param table_name: Name of table to create
:param session: session to use for table creation
:return: a string containing the names of all the columns.
This can be used to query the table.
:param N: the number of items to insert into the table
:return: a list of column names
"""
# create table
alpha_type_list = ["primkey int PRIMARY KEY"]
@@ -26,21 +27,27 @@ def create_table_with_all_types(table_name, session):
table_name, ', '.join(alpha_type_list)), timeout=120)
# create the input
params = get_all_primitive_params()
# insert into table as a simple statement
columns_string = ', '.join(col_names)
placeholders = ', '.join(["%s"] * len(col_names))
session.execute("INSERT INTO {0} ({1}) VALUES ({2})".format(
table_name, columns_string, placeholders), params, timeout=120)
return columns_string
for key in range(N):
params = get_all_primitive_params(key)
# insert into table as a simple statement
columns_string = ', '.join(col_names)
placeholders = ', '.join(["%s"] * len(col_names))
session.execute("INSERT INTO {0} ({1}) VALUES ({2})".format(
table_name, columns_string, placeholders), params, timeout=120)
return col_names
def get_all_primitive_params():
def get_all_primitive_params(key):
"""
Simple utility method used to give back a list of all possible primitive data sample types.
"""
params = [0]
params = [key]
for datatype in PRIMITIVE_DATATYPES:
params.append(get_sample(datatype))
return params
def get_primitive_datatypes():
return ['int'] + list(PRIMITIVE_DATATYPES)

View File

@@ -17,6 +17,7 @@ def cyimport(import_path):
raise
return None
# @cythontest
# def test_something(self): ...
cythontest = unittest.skipUnless(HAVE_CYTHON, 'Cython is not available')