From 8afd853ae984bd5ef0fd2369dbd7d2a7c604e01f Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 23 Jul 2015 15:43:57 +0100
Subject: [PATCH 01/70] Add typecodes to module with Cython-compatible .pxd
 file

---
 cassandra/protocol.py   | 34 ++--------------------
 cassandra/typecodes.pxd | 28 +++++++++++++++++++
 cassandra/typecodes.py  | 62 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 31 deletions(-)
 create mode 100644 cassandra/typecodes.pxd
 create mode 100644 cassandra/typecodes.py

diff --git a/cassandra/protocol.py b/cassandra/protocol.py
index 41439334..a6ce22ec 100644
--- a/cassandra/protocol.py
+++ b/cassandra/protocol.py
@@ -22,6 +22,7 @@ import six
 from six.moves import range
 import io
 
+from cassandra import typecodes
 from cassandra import (Unavailable, WriteTimeout, ReadTimeout,
                        WriteFailure, ReadFailure, FunctionFailure,
                        AlreadyExists, InvalidRequest, Unauthorized,
@@ -35,7 +36,7 @@ from cassandra.cqltypes import (AsciiType, BytesType, BooleanType,
                                 DoubleType, FloatType, Int32Type,
                                 InetAddressType, IntegerType, ListType,
                                 LongType, MapType, SetType, TimeUUIDType,
-                                UTF8Type, UUIDType, UserType,
+                                UTF8Type, VarcharType, UUIDType, UserType,
                                 TupleType, lookup_casstype, SimpleDateType,
                                 TimeType, ByteType, ShortType)
 from cassandra.policies import WriteType
@@ -531,35 +532,6 @@ RESULT_KIND_SET_KEYSPACE = 0x0003
 RESULT_KIND_PREPARED = 0x0004
 RESULT_KIND_SCHEMA_CHANGE = 0x0005
 
-class CassandraTypeCodes(object):
-    CUSTOM_TYPE = 0x0000
-    AsciiType = 0x0001
-    LongType = 0x0002
-    BytesType = 0x0003
-    BooleanType = 0x0004
-    CounterColumnType = 0x0005
-    DecimalType = 0x0006
-    DoubleType = 0x0007
-    FloatType = 0x0008
-    Int32Type = 0x0009
-    UTF8Type = 0x000A
-    DateType = 0x000B
-    UUIDType = 0x000C
-    UTF8Type = 0x000D
-    IntegerType = 0x000E
-    TimeUUIDType = 0x000F
-    InetAddressType = 0x0010
-    SimpleDateType = 0x0011
-    TimeType = 0x0012
-    ShortType = 0x0013
-    ByteType = 0x0014
-    ListType = 0x0020
-    MapType = 0x0021
-    SetType = 0x0022
-    UserType = 0x0030
-    TupleType = 0x0031
-
-
 class ResultMessage(_MessageType):
     opcode = 0x08
     name = 'RESULT'
@@ -569,7 +541,7 @@ class ResultMessage(_MessageType):
     paging_state = None
 
     # Names match type name in module scope. Most are imported from cassandra.cqltypes (except CUSTOM_TYPE)
-    type_codes = _cqltypes_by_code = dict((v, globals()[k]) for k, v in CassandraTypeCodes.__dict__.items() if not k.startswith('_'))
+    type_codes = _cqltypes_by_code = dict((v, globals()[k]) for k, v in typecodes.__dict__.items() if not k.startswith('_'))
 
     _FLAGS_GLOBAL_TABLES_SPEC = 0x0001
     _HAS_MORE_PAGES_FLAG = 0x0002
diff --git a/cassandra/typecodes.pxd b/cassandra/typecodes.pxd
new file mode 100644
index 00000000..b0405284
--- /dev/null
+++ b/cassandra/typecodes.pxd
@@ -0,0 +1,28 @@
+cdef enum:
+    CUSTOM_TYPE
+    AsciiType
+    LongType
+    BytesType
+    BooleanType
+    CounterColumnType
+    DecimalType
+    DoubleType
+    FloatType
+    Int32Type
+    UTF8Type
+    DateType
+    UUIDType
+    VarcharType
+    IntegerType
+    TimeUUIDType
+    InetAddressType
+    SimpleDateType
+    TimeType
+    ShortType
+    ByteType
+    ListType
+    MapType
+    SetType
+    UserType
+    TupleType
+
diff --git a/cassandra/typecodes.py b/cassandra/typecodes.py
new file mode 100644
index 00000000..651c58d7
--- /dev/null
+++ b/cassandra/typecodes.py
@@ -0,0 +1,62 @@
+"""
+Module with constants for Cassandra type codes.
+
+These constants are useful for
+
+    a) mapping messages to cqltypes                 (cassandra/cqltypes.py)
+    b) optimizezd dispatching for (de)serialization (cassandra/encoding.py)
+
+Type codes are repeated here from the Cassandra binary protocol specification:
+
+            0x0000    Custom: the value is a [string], see above.
+            0x0001    Ascii
+            0x0002    Bigint
+            0x0003    Blob
+            0x0004    Boolean
+            0x0005    Counter
+            0x0006    Decimal
+            0x0007    Double
+            0x0008    Float
+            0x0009    Int
+            0x000A    Text
+            0x000B    Timestamp
+            0x000C    Uuid
+            0x000D    Varchar
+            0x000E    Varint
+            0x000F    Timeuuid
+            0x0010    Inet
+            0x0020    List: the value is an [option], representing the type
+                            of the elements of the list.
+            0x0021    Map: the value is two [option], representing the types of the
+                           keys and values of the map
+            0x0022    Set: the value is an [option], representing the type
+                            of the elements of the set
+"""
+
+CUSTOM_TYPE = 0x0000
+AsciiType = 0x0001
+LongType = 0x0002
+BytesType = 0x0003
+BooleanType = 0x0004
+CounterColumnType = 0x0005
+DecimalType = 0x0006
+DoubleType = 0x0007
+FloatType = 0x0008
+Int32Type = 0x0009
+UTF8Type = 0x000A
+DateType = 0x000B
+UUIDType = 0x000C
+VarcharType = 0x000D
+IntegerType = 0x000E
+TimeUUIDType = 0x000F
+InetAddressType = 0x0010
+SimpleDateType = 0x0011
+TimeType = 0x0012
+ShortType = 0x0013
+ByteType = 0x0014
+ListType = 0x0020
+MapType = 0x0021
+SetType = 0x0022
+UserType = 0x0030
+TupleType = 0x0031
+

From f0b360a9c718b5d7c74604788a6092870242efcb Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 23 Jul 2015 15:45:28 +0100
Subject: [PATCH 02/70] Cythonize marshalling code

---
 cassandra/marshal.pxd |  29 ++++++
 cassandra/marshal.pyx | 201 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 230 insertions(+)
 create mode 100644 cassandra/marshal.pxd
 create mode 100644 cassandra/marshal.pyx

diff --git a/cassandra/marshal.pxd b/cassandra/marshal.pxd
new file mode 100644
index 00000000..ef7d9858
--- /dev/null
+++ b/cassandra/marshal.pxd
@@ -0,0 +1,29 @@
+from libc.stdint cimport (int8_t, int16_t, int32_t, int64_t,
+                          uint8_t, uint16_t, uint32_t, uint64_t)
+
+cpdef bytes int64_pack(int64_t x)
+cpdef bytes int32_pack(int32_t x)
+cpdef bytes int16_pack(int16_t x)
+cpdef bytes int8_pack(int8_t x)
+
+cpdef int64_t int64_unpack(const char *buf)
+cpdef int32_t int32_unpack(const char *buf)
+cpdef int16_t int16_unpack(const char *buf)
+cpdef int8_t  int8_unpack(const char *buf)
+
+cpdef bytes uint64_pack(uint64_t x)
+cpdef bytes uint32_pack(uint32_t x)
+cpdef bytes uint16_pack(uint16_t x)
+cpdef bytes uint8_pack(uint8_t x)
+
+cpdef uint64_t uint64_unpack(const char *buf)
+cpdef uint32_t uint32_unpack(const char *buf)
+cpdef uint16_t uint16_unpack(const char *buf)
+cpdef uint8_t  uint8_unpack(const char *buf)
+
+cpdef bytes double_pack(double x)
+cpdef bytes float_pack(float x)
+
+cpdef double double_unpack(const char *buf)
+cpdef float float_unpack(const char *buf)
+
diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
new file mode 100644
index 00000000..48036861
--- /dev/null
+++ b/cassandra/marshal.pyx
@@ -0,0 +1,201 @@
+# cython: profile=True
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import six
+import sys
+import struct
+import math
+
+from libc.stdint cimport (int8_t, int16_t, int32_t, int64_t,
+                          uint8_t, uint16_t, uint32_t, uint64_t)
+
+assert sys.byteorder in ('little', 'big')
+is_little_endian = sys.byteorder == 'little'
+
+# cdef extern from "marshal.h":
+#     cdef str c_string_to_python(char *p, Py_ssize_t len)
+
+def _make_packer(format_string):
+    packer = struct.Struct(format_string)
+    pack = packer.pack
+    unpack = lambda s: packer.unpack(s)[0]
+    return pack, unpack
+
+
+cdef inline bytes pack(char *buf, Py_ssize_t size):
+    """
+    Pack a buffer, given as a char *, into Python bytes in byte order.
+    """
+    if is_little_endian:
+        swap_order(buf, size)
+    return buf[:size]
+
+
+cdef inline swap_order(char *buf, Py_ssize_t size):
+    """
+    Swap the byteorder of `buf` in-place (reverse all the bytes).
+    There are functions ntohl etc, but these may be POSIX-dependent.
+    """
+    cdef Py_ssize_t start, end
+    cdef char c
+    for i in range(size/2):
+        end = size - i - 1
+        c = buf[i]
+        buf[i] = buf[end]
+        buf[end] = c
+
+### Packing and unpacking of signed integers
+
+cpdef inline bytes int64_pack(int64_t x):
+    return pack(<char *> &x, 8)
+
+cpdef inline int64_t int64_unpack(const char *buf):
+    # The 'const' makes sure the buffer is not mutated in-place!
+    cdef int64_t x = (<int64_t *> buf)[0]
+    swap_order(<char *> &x, 8)
+    return x
+
+cpdef inline bytes int32_pack(int32_t x):
+    return pack(<char *> &x, 4)
+
+cpdef inline int32_t int32_unpack(const char *buf):
+    cdef int32_t x = (<int32_t *> buf)[0]
+    swap_order(<char *> &x, 4)
+    return x
+
+cpdef inline bytes int16_pack(int16_t x):
+    return pack(<char *> &x, 2)
+
+cpdef inline int16_t int16_unpack(const char *buf):
+    cdef int16_t x = (<int16_t *> buf)[0]
+    swap_order(<char *> &x, 2)
+    return x
+
+cpdef inline bytes int8_pack(int8_t x):
+    return (<char *> &x)[:1]
+
+cpdef inline int8_t int8_unpack(const char *buf):
+    return (<int8_t *> buf)[0]
+
+cpdef inline bytes uint64_pack(uint64_t x):
+    return pack(<char *> &x, 8)
+
+cpdef inline uint64_t uint64_unpack(const char *buf):
+    cdef uint64_t x = (<uint64_t *> buf)[0]
+    swap_order(<char *> &x, 8)
+    return x
+
+cpdef inline bytes uint32_pack(uint32_t x):
+    return pack(<char *> &x, 4)
+
+cpdef inline uint32_t uint32_unpack(const char *buf):
+    cdef uint32_t x = (<uint32_t *> buf)[0]
+    swap_order(<char *> &x, 4)
+    return x
+
+cpdef inline bytes uint16_pack(uint16_t x):
+    return pack(<char *> &x, 2)
+
+cpdef inline uint16_t uint16_unpack(const char *buf):
+    cdef uint16_t x = (<uint16_t *> buf)[0]
+    swap_order(<char *> &x, 2)
+    return x
+
+cpdef inline bytes uint8_pack(uint8_t x):
+    return pack(<char *> &x, 1)
+
+cpdef inline uint8_t uint8_unpack(const char *buf):
+    return (<uint8_t *> buf)[0]
+
+cpdef inline bytes double_pack(double x):
+    return pack(<char *> &x, 8)
+
+cpdef inline double double_unpack(const char *buf):
+    cdef double x = (<double *> buf)[0]
+    swap_order(<char *> &x, 8)
+    return x
+
+cpdef inline bytes float_pack(float x):
+    return pack(<char *> &x, 4)
+
+cpdef inline float float_unpack(const char *buf):
+    cdef float x = (<float *> buf)[0]
+    swap_order(<char *> &x, 4)
+    return x
+
+# int64_pack, int64_unpack = _make_packer('>q')
+# int32_pack, int32_unpack = _make_packer('>i')
+# int16_pack, int16_unpack = _make_packer('>h')
+# int8_pack, int8_unpack = _make_packer('>b')
+# uint64_pack, uint64_unpack = _make_packer('>Q')
+# uint32_pack, uint32_unpack = _make_packer('>I')
+# uint16_pack, uint16_unpack = _make_packer('>H')
+# uint8_pack, uint8_unpack = _make_packer('>B')
+# float_pack, float_unpack = _make_packer('>f')
+# double_pack, double_unpack = _make_packer('>d')
+
+# Special case for cassandra header
+header_struct = struct.Struct('>BBbB')
+header_pack = header_struct.pack
+header_unpack = header_struct.unpack
+
+# in protocol version 3 and higher, the stream ID is two bytes
+v3_header_struct = struct.Struct('>BBhB')
+v3_header_pack = v3_header_struct.pack
+v3_header_unpack = v3_header_struct.unpack
+
+
+if six.PY3:
+    def varint_unpack(term):
+        val = int(''.join("%02x" % i for i in term), 16)
+        if (term[0] & 128) != 0:
+            # There is a bug in Cython (0.20 - 0.22), where if we do
+            # '1 << (len(term) * 8)' Cython generates '1' directly into the
+            # C code, causing integer overflows. Treat it as an object for now
+            val -= (<object> 1L) << (len(term) * 8)
+        return val
+else:
+    def varint_unpack(term):  # noqa
+        val = int(term.encode('hex'), 16)
+        if (ord(term[0]) & 128) != 0:
+            val = val - (1 << (len(term) * 8))
+        return val
+
+
+def bitlength(n):
+    # return int(math.log2(n)) + 1
+    bitlen = 0
+    while n > 0:
+        n >>= 1
+        bitlen += 1
+    return bitlen
+
+
+def varint_pack(big):
+    pos = True
+    if big == 0:
+        return b'\x00'
+    if big < 0:
+        bytelength = bitlength(abs(big) - 1) // 8 + 1
+        big = (1 << bytelength * 8) + big
+        pos = False
+    revbytes = bytearray()
+    while big > 0:
+        revbytes.append(big & 0xff)
+        big >>= 8
+    if pos and revbytes[-1] & 0x80:
+        revbytes.append(0)
+    revbytes.reverse()
+    return six.binary_type(revbytes)

From ad7e4e08481b8cd48c5724256c955604713101eb Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 23 Jul 2015 15:47:04 +0100
Subject: [PATCH 03/70] Start on Cython version of ProtocolHandler

---
 cassandra/bytesio.pxd                 |   7 ++
 cassandra/bytesio.pyx                 |  56 ++++++++++
 cassandra/cython_protocol_handler.pyx | 154 ++++++++++++++++++++++++++
 3 files changed, 217 insertions(+)
 create mode 100644 cassandra/bytesio.pxd
 create mode 100644 cassandra/bytesio.pyx
 create mode 100644 cassandra/cython_protocol_handler.pyx

diff --git a/cassandra/bytesio.pxd b/cassandra/bytesio.pxd
new file mode 100644
index 00000000..349fd600
--- /dev/null
+++ b/cassandra/bytesio.pxd
@@ -0,0 +1,7 @@
+cdef class BytesIOReader:
+    cdef bytes buf
+    cdef char *buf_ptr
+    cdef Py_ssize_t pos
+    cdef Py_ssize_t size
+    cdef char *read(self, Py_ssize_t n = ?)
+
diff --git a/cassandra/bytesio.pyx b/cassandra/bytesio.pyx
new file mode 100644
index 00000000..505fe391
--- /dev/null
+++ b/cassandra/bytesio.pyx
@@ -0,0 +1,56 @@
+# ython profile=True
+
+cdef class BytesIOReader:
+    """
+    This class provides efficient support for reading bytes from a 'bytes' buffer,
+    by returning char * values directly without allocating intermediate objects.
+    """
+
+    def __init__(self, bytes buf):
+        self.buf = buf
+        self.size = len(buf)
+        self.buf_ptr = self.buf
+
+    cdef char *read(self, Py_ssize_t n = -1):
+        """Read at most size bytes from the file
+        (less if the read hits EOF before obtaining size bytes).
+
+        If the size argument is negative or omitted, read all data until EOF
+        is reached. The bytes are returned as a string object. An empty
+        string is returned when EOF is encountered immediately.
+        """
+        cdef Py_ssize_t newpos = self.pos + n
+        cdef char *res
+
+        if n < 0:
+            newpos = self.size
+        elif newpos > self.size:
+            self.pos = self.size
+            return b''
+        else:
+            res = self.buf_ptr + self.pos
+            self.pos = newpos
+            return res
+
+
+class PyBytesIOReader(BytesIOReader):
+    """
+    Python-compatible BytesIOReader class
+    """
+
+    def read(self, n = -1):
+        """Read at most size bytes from the file
+        (less if the read hits EOF before obtaining size bytes).
+
+        If the size argument is negative or omitted, read all data until EOF
+        is reached. The bytes are returned as a string object. An empty
+        string is returned when EOF is encountered immediately.
+        """
+        if n is None or n < 0:
+            newpos = self.len
+        else:
+            newpos = min(self.pos+n, self.len)
+        r = self.buf[self.pos:newpos]
+        self.pos = newpos
+        return r
+
diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
new file mode 100644
index 00000000..85a5945a
--- /dev/null
+++ b/cassandra/cython_protocol_handler.pyx
@@ -0,0 +1,154 @@
+# ython: profile=True
+
+from libc.stdint cimport int64_t, int32_t
+
+# from cassandra.marshal cimport (int8_pack, int8_unpack, int16_pack, int16_unpack,
+#                                 uint16_pack, uint16_unpack, uint32_pack, uint32_unpack,
+#                                 int32_pack, int32_unpack, int64_pack, int64_unpack, float_pack, float_unpack, double_pack, double_unpack)
+
+from cassandra.marshal import varint_pack, varint_unpack
+from cassandra import util
+from cassandra.cqltypes import EMPTY
+from cassandra.protocol import ResultMessage, ProtocolHandler
+
+from cassandra.bytesio cimport BytesIOReader
+from cassandra cimport typecodes
+
+import numpy as np
+
+include "marshal.pyx"
+
+class FastResultMessage(ResultMessage):
+    """
+    Cython version of Result Message that has a faster implementation of
+    recv_results_row.
+    """
+    # type_codes = ResultMessage.type_codes.copy()
+    code_to_type = dict((v, k) for k, v in ResultMessage.type_codes.items())
+
+    @classmethod
+    def recv_results_rows(cls, f, protocol_version, user_type_map):
+        paging_state, column_metadata = cls.recv_results_metadata(f, user_type_map)
+
+        colnames = [c[2] for c in column_metadata]
+        coltypes = [c[3] for c in column_metadata]
+        colcodes = np.array(
+                [cls.code_to_type.get(coltype, -1) for coltype in coltypes],
+                dtype=np.dtype('i'))
+        parsed_rows = parse_rows(BytesIOReader(f.read()), colnames,
+                                 coltypes, colcodes, protocol_version)
+        return (paging_state, (colnames, parsed_rows))
+
+
+cdef parse_rows(BytesIOReader reader, list colnames, list coltypes,
+        int[::1] colcodes, protocol_version):
+    cdef Py_ssize_t i, rowcount
+    cdef char *raw_val
+    cdef int32_t raw_val_size
+    rowcount = read_int(reader)
+    # return RowIterator(reader, coltypes, colcodes, protocol_version, rowcount)
+    return [parse_row(reader, coltypes, colcodes, protocol_version)
+                for i in range(rowcount)]
+
+
+cdef class RowIterator:
+    """
+    Result iterator for a set of rows
+
+    There seems to be an issue with generator expressions + memoryviews, so we
+    have a special iterator class instead.
+    """
+    cdef list coltypes
+    cdef int[::1] colcodes
+    cdef Py_ssize_t rowcount, pos
+    cdef BytesIOReader reader
+    cdef object protocol_version
+
+    def __init__(self, reader, coltypes, colcodes, protocol_version, rowcount):
+        self.reader = reader
+        self.coltypes = coltypes
+        self.colcodes = colcodes
+        self.protocol_version = protocol_version
+        self.rowcount = rowcount
+        self.pos = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.pos >= self.rowcount:
+            raise StopIteration
+        self.pos += 1
+        return parse_row(self.reader, self.coltypes, self.colcodes, self.protocol_version)
+
+    next = __next__
+
+
+cdef inline parse_row(BytesIOReader reader, list coltypes, int[::1] colcodes,
+                      protocol_version):
+    cdef Py_ssize_t j
+
+    row = []
+    for j, ctype in enumerate(coltypes):
+        raw_val_size = read_int(reader)
+        if raw_val_size < 0:
+            val = None
+        else:
+            raw_val = reader.read(raw_val_size)
+            val = from_binary(ctype, colcodes[j], raw_val,
+                              raw_val_size, protocol_version)
+        row.append(val)
+
+    return row
+
+
+class CythonProtocolHandler(ProtocolHandler):
+    """
+    Use FastResultMessage to decode query result message messages.
+    """
+    my_opcodes = ProtocolHandler.message_types_by_opcode.copy()
+    my_opcodes[FastResultMessage.opcode] = FastResultMessage
+    message_types_by_opcode = my_opcodes
+
+
+cdef inline int32_t read_int(BytesIOReader reader):
+    return int32_unpack(reader.read(4))
+
+
+cdef inline from_binary(cqltype, int typecode, char *byts, int32_t size, protocol_version):
+    """
+    Deserialize a bytestring into a value. See the deserialize() method
+    for more information. This method differs in that if None or the empty
+    string is passed in, None may be returned.
+
+    This method provides a fast-path deserialization routine.
+    """
+    if size == 0 and cqltype.empty_binary_ok:
+        return empty(cqltype)
+    return deserialize(cqltype, typecode, byts, size, protocol_version)
+
+
+cdef empty(cqltype):
+    return EMPTY if cqltype.support_empty_values else None
+
+
+def to_binary(cqltype, val, protocol_version):
+    """
+    Serialize a value into a bytestring. See the serialize() method for
+    more information. This method differs in that if None is passed in,
+    the result is the empty string.
+    """
+    return b'' if val is None else cqltype.serialize(val, protocol_version)
+
+
+cdef deserialize(cqltype, int typecode, char *byts, int32_t size, protocol_version):
+    if typecode == typecodes.LongType:
+        return int64_unpack(byts)
+    else:
+        return deserialize_generic(cqltype, typecode, byts, size, protocol_version)
+
+cdef deserialize_generic(cqltype, int typecode, char *byts, int32_t size,
+        protocol_version):
+    print("deserialize", cqltype)
+    return cqltype.deserialize(byts[:size], protocol_version)
+

From 39af4e15698081348dc97acdcaa4ffcd284f6ae2 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 23 Jul 2015 16:03:23 +0100
Subject: [PATCH 04/70] Add Cython modules to setup.py

---
 cassandra/bytesio.pyx                 | 2 +-
 cassandra/cython_protocol_handler.pyx | 2 +-
 cassandra/marshal.pyx                 | 2 +-
 setup.py                              | 5 +++--
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/cassandra/bytesio.pyx b/cassandra/bytesio.pyx
index 505fe391..82887f43 100644
--- a/cassandra/bytesio.pyx
+++ b/cassandra/bytesio.pyx
@@ -1,4 +1,4 @@
-# ython profile=True
+# -- cython profile=True
 
 cdef class BytesIOReader:
     """
diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
index 85a5945a..add1e9f5 100644
--- a/cassandra/cython_protocol_handler.pyx
+++ b/cassandra/cython_protocol_handler.pyx
@@ -1,4 +1,4 @@
-# ython: profile=True
+# -- cython: profile=True
 
 from libc.stdint cimport int64_t, int32_t
 
diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index 48036861..0efbf705 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -1,4 +1,4 @@
-# cython: profile=True
+# -- cython: profile=True
 # Copyright 2013-2015 DataStax, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/setup.py b/setup.py
index 37899c2e..7083d7aa 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,6 @@ from distutils.errors import (CCompilerError, DistutilsPlatformError,
                               DistutilsExecError)
 from distutils.cmd import Command
 
-
 try:
     import subprocess
     has_subprocess = True
@@ -262,11 +261,13 @@ if "--no-libev" not in sys.argv and not is_windows:
 if "--no-cython" not in sys.argv:
     try:
         from Cython.Build import cythonize
-        cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'marshal', 'metadata', 'pool', 'protocol', 'query', 'util']
+        cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'metadata', 'pool', 'protocol', 'query', 'util']
         compile_args = [] if is_windows else ['-Wno-unused-function']
         extensions.extend(cythonize(
             [Extension('cassandra.%s' % m, ['cassandra/%s.py' % m], extra_compile_args=compile_args) for m in cython_candidates],
             exclude_failures=True))
+
+        extensions.extend(cythonize("cassandra/*.pyx"))
     except ImportError:
         sys.stderr.write("Cython is not installed. Not compiling core driver files as extensions (optional).")
 

From 92457198cca1a3832455fbefcc81e3fed351b33d Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 23 Jul 2015 16:18:19 +0100
Subject: [PATCH 05/70] Use return type void for swap_order

---
 cassandra/marshal.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index 0efbf705..8ffe3e46 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -43,7 +43,7 @@ cdef inline bytes pack(char *buf, Py_ssize_t size):
     return buf[:size]
 
 
-cdef inline swap_order(char *buf, Py_ssize_t size):
+cdef inline void swap_order(char *buf, Py_ssize_t size):
     """
     Swap the byteorder of `buf` in-place (reverse all the bytes).
     There are functions ntohl etc, but these may be POSIX-dependent.

From fe67aec185f63576e093d82a0491789a64301467 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 23 Jul 2015 16:27:17 +0100
Subject: [PATCH 06/70] Make sure swap_order uses no PyObjects

---
 cassandra/marshal.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index 8ffe3e46..529f45e7 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -48,9 +48,9 @@ cdef inline void swap_order(char *buf, Py_ssize_t size):
     Swap the byteorder of `buf` in-place (reverse all the bytes).
     There are functions ntohl etc, but these may be POSIX-dependent.
     """
-    cdef Py_ssize_t start, end
+    cdef Py_ssize_t start, end, i
     cdef char c
-    for i in range(size/2):
+    for i in range(size//2):
         end = size - i - 1
         c = buf[i]
         buf[i] = buf[end]

From 2b7997830a3073a2e73942d36f2e3b22f7443a6c Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 23 Jul 2015 16:31:44 +0100
Subject: [PATCH 07/70] Check endianness before byte-swapping

---
 cassandra/marshal.pyx | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index 529f45e7..2ecb0fa5 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -22,7 +22,7 @@ from libc.stdint cimport (int8_t, int16_t, int32_t, int64_t,
                           uint8_t, uint16_t, uint32_t, uint64_t)
 
 assert sys.byteorder in ('little', 'big')
-is_little_endian = sys.byteorder == 'little'
+cdef bint is_little_endian = sys.byteorder == 'little'
 
 # cdef extern from "marshal.h":
 #     cdef str c_string_to_python(char *p, Py_ssize_t len)
@@ -38,23 +38,25 @@ cdef inline bytes pack(char *buf, Py_ssize_t size):
     """
     Pack a buffer, given as a char *, into Python bytes in byte order.
     """
-    if is_little_endian:
-        swap_order(buf, size)
+    swap_order(buf, size)
     return buf[:size]
 
 
 cdef inline void swap_order(char *buf, Py_ssize_t size):
     """
-    Swap the byteorder of `buf` in-place (reverse all the bytes).
+    Swap the byteorder of `buf` in-place on little-endian platforms
+    (reverse all the bytes).
     There are functions ntohl etc, but these may be POSIX-dependent.
     """
     cdef Py_ssize_t start, end, i
     cdef char c
-    for i in range(size//2):
-        end = size - i - 1
-        c = buf[i]
-        buf[i] = buf[end]
-        buf[end] = c
+
+    if is_little_endian:
+        for i in range(size//2):
+            end = size - i - 1
+            c = buf[i]
+            buf[i] = buf[end]
+            buf[end] = c
 
 ### Packing and unpacking of signed integers
 

From 81ff98efc2cdc6cb256789806ff5bbbdd3c46e90 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Wed, 29 Jul 2015 10:38:15 +0100
Subject: [PATCH 08/70] Start on pluggable row parsers (e.g. tuple vs numpy
 record)

---
 cassandra/bytesio.pyx                 |   2 +-
 cassandra/cython_protocol_handler.pyx | 240 +++++++++++++++-----------
 cassandra/datatypes.pxd               |  16 ++
 cassandra/datatypes.pyx               |  52 ++++++
 cassandra/ioutils.pyx                 |  21 +++
 cassandra/marshal.pyx                 |   8 +
 cassandra/rowparser.pxd               |   5 +
 cassandra/rowparser.pyx               |  70 ++++++++
 setup.py                              |  14 +-
 9 files changed, 319 insertions(+), 109 deletions(-)
 create mode 100644 cassandra/datatypes.pxd
 create mode 100644 cassandra/datatypes.pyx
 create mode 100644 cassandra/ioutils.pyx
 create mode 100644 cassandra/rowparser.pxd
 create mode 100644 cassandra/rowparser.pyx

diff --git a/cassandra/bytesio.pyx b/cassandra/bytesio.pyx
index 82887f43..d392b23f 100644
--- a/cassandra/bytesio.pyx
+++ b/cassandra/bytesio.pyx
@@ -1,4 +1,4 @@
-# -- cython profile=True
+# -- cython: profile=True
 
 cdef class BytesIOReader:
     """
diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
index add1e9f5..644985dc 100644
--- a/cassandra/cython_protocol_handler.pyx
+++ b/cassandra/cython_protocol_handler.pyx
@@ -8,16 +8,22 @@ from libc.stdint cimport int64_t, int32_t
 
 from cassandra.marshal import varint_pack, varint_unpack
 from cassandra import util
-from cassandra.cqltypes import EMPTY
+from cassandra.cqltypes import EMPTY, LongType
 from cassandra.protocol import ResultMessage, ProtocolHandler
 
 from cassandra.bytesio cimport BytesIOReader
 from cassandra cimport typecodes
+from cassandra.datatypes cimport DataType
+from cassandra.rowparser cimport RowParser
 
-import numpy as np
+from cassandra.rowparser import TupleRowParser
+from cassandra.datatypes import Int64, GenericDataType
+
+from cython.view cimport array as cython_array
 
 include "marshal.pyx"
 
+
 class FastResultMessage(ResultMessage):
     """
     Cython version of Result Message that has a faster implementation of
@@ -32,74 +38,24 @@ class FastResultMessage(ResultMessage):
 
         colnames = [c[2] for c in column_metadata]
         coltypes = [c[3] for c in column_metadata]
-        colcodes = np.array(
-                [cls.code_to_type.get(coltype, -1) for coltype in coltypes],
-                dtype=np.dtype('i'))
-        parsed_rows = parse_rows(BytesIOReader(f.read()), colnames,
-                                 coltypes, colcodes, protocol_version)
+
+        cdef DataType[::1] datatypes
+        datatypes = obj_array(
+            [Int64() if coltype == LongType else GenericDataType(coltype) for coltype in coltypes])
+            # [GenericDataType(coltype) for coltype in coltypes])
+
+        # parsed_rows = parse_rows2(BytesIOReader(f.read()), colnames, coltypes, protocol_version)
+        parsed_rows = parse_rows(BytesIOReader(f.read()), datatypes, protocol_version)
         return (paging_state, (colnames, parsed_rows))
 
 
-cdef parse_rows(BytesIOReader reader, list colnames, list coltypes,
-        int[::1] colcodes, protocol_version):
-    cdef Py_ssize_t i, rowcount
-    cdef char *raw_val
-    cdef int32_t raw_val_size
-    rowcount = read_int(reader)
-    # return RowIterator(reader, coltypes, colcodes, protocol_version, rowcount)
-    return [parse_row(reader, coltypes, colcodes, protocol_version)
-                for i in range(rowcount)]
-
-
-cdef class RowIterator:
-    """
-    Result iterator for a set of rows
-
-    There seems to be an issue with generator expressions + memoryviews, so we
-    have a special iterator class instead.
-    """
-    cdef list coltypes
-    cdef int[::1] colcodes
-    cdef Py_ssize_t rowcount, pos
-    cdef BytesIOReader reader
-    cdef object protocol_version
-
-    def __init__(self, reader, coltypes, colcodes, protocol_version, rowcount):
-        self.reader = reader
-        self.coltypes = coltypes
-        self.colcodes = colcodes
-        self.protocol_version = protocol_version
-        self.rowcount = rowcount
-        self.pos = 0
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        if self.pos >= self.rowcount:
-            raise StopIteration
-        self.pos += 1
-        return parse_row(self.reader, self.coltypes, self.colcodes, self.protocol_version)
-
-    next = __next__
-
-
-cdef inline parse_row(BytesIOReader reader, list coltypes, int[::1] colcodes,
-                      protocol_version):
-    cdef Py_ssize_t j
-
-    row = []
-    for j, ctype in enumerate(coltypes):
-        raw_val_size = read_int(reader)
-        if raw_val_size < 0:
-            val = None
-        else:
-            raw_val = reader.read(raw_val_size)
-            val = from_binary(ctype, colcodes[j], raw_val,
-                              raw_val_size, protocol_version)
-        row.append(val)
-
-    return row
+def obj_array(list objs):
+    cdef object[:] arr
+    arr = cython_array(shape=(len(objs),), itemsize=sizeof(void *), format="O")
+    # arr[:] = objs # This does not work (segmentation faults)
+    for i, obj in enumerate(objs):
+        arr[i] = obj
+    return arr
 
 
 class CythonProtocolHandler(ProtocolHandler):
@@ -111,44 +67,120 @@ class CythonProtocolHandler(ProtocolHandler):
     message_types_by_opcode = my_opcodes
 
 
+cdef parse_rows(BytesIOReader reader, DataType[::1] datatypes, protocol_version):
+    cdef Py_ssize_t i, rowcount
+    cdef RowParser parser = TupleRowParser(len(datatypes), datatypes)
+    rowcount = read_int(reader)
+    return [parser.unpack_row(reader, protocol_version) for i in range(rowcount)]
+
+
 cdef inline int32_t read_int(BytesIOReader reader):
     return int32_unpack(reader.read(4))
 
 
-cdef inline from_binary(cqltype, int typecode, char *byts, int32_t size, protocol_version):
-    """
-    Deserialize a bytestring into a value. See the deserialize() method
-    for more information. This method differs in that if None or the empty
-    string is passed in, None may be returned.
-
-    This method provides a fast-path deserialization routine.
-    """
-    if size == 0 and cqltype.empty_binary_ok:
-        return empty(cqltype)
-    return deserialize(cqltype, typecode, byts, size, protocol_version)
-
-
-cdef empty(cqltype):
-    return EMPTY if cqltype.support_empty_values else None
-
-
-def to_binary(cqltype, val, protocol_version):
-    """
-    Serialize a value into a bytestring. See the serialize() method for
-    more information. This method differs in that if None is passed in,
-    the result is the empty string.
-    """
-    return b'' if val is None else cqltype.serialize(val, protocol_version)
-
-
-cdef deserialize(cqltype, int typecode, char *byts, int32_t size, protocol_version):
-    if typecode == typecodes.LongType:
-        return int64_unpack(byts)
-    else:
-        return deserialize_generic(cqltype, typecode, byts, size, protocol_version)
-
-cdef deserialize_generic(cqltype, int typecode, char *byts, int32_t size,
-        protocol_version):
-    print("deserialize", cqltype)
-    return cqltype.deserialize(byts[:size], protocol_version)
-
+# cdef parse_rows2(BytesIOReader reader, list colnames, list coltypes, protocol_version):
+#     cdef Py_ssize_t i, rowcount
+#     cdef char *raw_val
+#     cdef int[::1] colcodes
+#
+#     colcodes = np.array(
+#                 [FastResultMessage.code_to_type.get(coltype, -1) for coltype in coltypes],
+#                 dtype=np.dtype('i'))
+#
+#     rowcount = read_int(reader)
+#     # return RowIterator(reader, coltypes, colcodes, protocol_version, rowcount)
+#     return [parse_row(reader, coltypes, colcodes, protocol_version)
+#                 for i in range(rowcount)]
+#
+#
+# cdef class RowIterator:
+#     """
+#     Result iterator for a set of rows
+#
+#     There seems to be an issue with generator expressions + memoryviews, so we
+#     have a special iterator class instead.
+#     """
+#
+#     cdef list coltypes
+#     cdef int[::1] colcodes
+#     cdef Py_ssize_t rowcount, pos
+#     cdef BytesIOReader reader
+#     cdef object protocol_version
+#
+#     def __init__(self, reader, coltypes, colcodes, protocol_version, rowcount):
+#         self.reader = reader
+#         self.coltypes = coltypes
+#         self.colcodes = colcodes
+#         self.protocol_version = protocol_version
+#         self.rowcount = rowcount
+#         self.pos = 0
+#
+#     def __iter__(self):
+#         return self
+#
+#     def __next__(self):
+#         if self.pos >= self.rowcount:
+#             raise StopIteration
+#         self.pos += 1
+#         return parse_row(self.reader, self.coltypes, self.colcodes, self.protocol_version)
+#
+#     next = __next__
+#
+#
+# cdef inline parse_row(BytesIOReader reader, list coltypes, int[::1] colcodes,
+#                       protocol_version):
+#     cdef Py_ssize_t j
+#
+#     row = []
+#     for j, ctype in enumerate(coltypes):
+#         raw_val_size = read_int(reader)
+#         if raw_val_size < 0:
+#             val = None
+#         else:
+#             raw_val = reader.read(raw_val_size)
+#             val = from_binary(ctype, colcodes[j], raw_val,
+#                               raw_val_size, protocol_version)
+#         row.append(val)
+#
+#     return row
+#
+#
+# cdef inline from_binary(cqltype, int typecode, char *byts, int32_t size, protocol_version):
+#     """
+#     Deserialize a bytestring into a value. See the deserialize() method
+#     for more information. This method differs in that if None or the empty
+#     string is passed in, None may be returned.
+#
+#     This method provides a fast-path deserialization routine.
+#     """
+#     if size == 0 and cqltype.empty_binary_ok:
+#         return empty(cqltype)
+#     return deserialize(cqltype, typecode, byts, size, protocol_version)
+#
+#
+# cdef empty(cqltype):
+#     return EMPTY if cqltype.support_empty_values else None
+#
+#
+# def to_binary(cqltype, val, protocol_version):
+#     """
+#     Serialize a value into a bytestring. See the serialize() method for
+#     more information. This method differs in that if None is passed in,
+#     the result is the empty string.
+#     """
+#     return b'' if val is None else cqltype.serialize(val, protocol_version)
+#
+# cdef DataType obj = Int64()
+#
+# cdef deserialize(cqltype, int typecode, char *byts, int32_t size, protocol_version):
+#     # if typecode == typecodes.LongType:
+#     #     # return int64_unpack(byts)
+#     #     return obj.deserialize(byts, size, protocol_version)
+#     # else:
+#     # return deserialize_generic(cqltype, typecode, byts, size, protocol_version)
+#     return cqltype.deserialize(byts[:size], protocol_version)
+#
+# cdef deserialize_generic(cqltype, int typecode, char *byts, int32_t size,
+#         protocol_version):
+#     return cqltype.deserialize(byts[:size], protocol_version)
+#
\ No newline at end of file
diff --git a/cassandra/datatypes.pxd b/cassandra/datatypes.pxd
new file mode 100644
index 00000000..40f8d742
--- /dev/null
+++ b/cassandra/datatypes.pxd
@@ -0,0 +1,16 @@
+cdef class LLDataType:
+    """
+    Low-level Cassandra datatype
+    """
+
+    cdef Py_ssize_t size
+
+    cdef void deserialize_ptr(self, char *buf, Py_ssize_t size, void *out, protocol_version)
+
+cdef class DataType:
+    cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version)
+
+
+cdef class Int64(DataType):
+    pass
+
diff --git a/cassandra/datatypes.pyx b/cassandra/datatypes.pyx
new file mode 100644
index 00000000..5fc61cfa
--- /dev/null
+++ b/cassandra/datatypes.pyx
@@ -0,0 +1,52 @@
+include 'marshal.pyx'
+
+from cassandra import cqltypes
+
+
+cdef class LLDataType:
+    cdef void deserialize_ptr(self, char *buf, Py_ssize_t size,
+                              void *out, protocol_version):
+        pass
+
+
+cdef class DataType:
+    cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
+        pass
+
+
+cdef class LLInt64(LLDataType):
+    """
+    Low-level Cassandra datatype
+    """
+
+    cdef void deserialize_ptr(self, char *buf, Py_ssize_t size, void *out, protocol_version):
+        cdef int64_t x = int64_unpack(buf)
+        (<int64_t *> out)[0] = x
+
+
+cdef class Int64(DataType):
+
+    cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
+        cdef int64_t x = int64_unpack(buf)
+        return x
+
+    def __str__(self):
+        return "int64"
+
+
+cdef class GenericDataType(DataType):
+    """
+    Wrap a generic datatype for deserialization
+    """
+
+    cdef object cqltype
+
+    def __init__(self, cqltype):
+        self.cqltype = cqltype
+
+    cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
+        return self.cqltype.deserialize(buf[:size], protocol_version)
+
+    def __str__(self):
+        return "GenericDataType(%s)" % (self.cqltype,)
+
diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx
new file mode 100644
index 00000000..8749457b
--- /dev/null
+++ b/cassandra/ioutils.pyx
@@ -0,0 +1,21 @@
+include 'marshal.pyx'
+from libc.stdint cimport int32_t
+from cassandra.bytesio cimport BytesIOReader
+
+
+cdef inline char *get_buf(BytesIOReader reader, Py_ssize_t *size_out):
+    """
+    Get a pointer into the buffer provided by BytesIOReader for the
+    next data item in the stream of values.
+    """
+    raw_val_size = read_int(reader)
+    size_out[0] = raw_val_size
+    if raw_val_size < 0:
+        return NULL
+    else:
+        return reader.read(raw_val_size)
+
+
+cdef inline int32_t read_int(BytesIOReader reader):
+    return int32_unpack(reader.read(4))
+
diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index 2ecb0fa5..85c3504c 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -1,4 +1,5 @@
 # -- cython: profile=True
+#
 # Copyright 2013-2015 DataStax, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -66,6 +67,10 @@ cpdef inline bytes int64_pack(int64_t x):
 cpdef inline int64_t int64_unpack(const char *buf):
     # The 'const' makes sure the buffer is not mutated in-place!
     cdef int64_t x = (<int64_t *> buf)[0]
+    cdef char *p = <char *> &x
+    # if is_little_endian:
+    #     p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7] = (
+    #         p[7], p[6], p[5], p[4], p[3], p[2], p[1], p[0])
     swap_order(<char *> &x, 8)
     return x
 
@@ -74,7 +79,10 @@ cpdef inline bytes int32_pack(int32_t x):
 
 cpdef inline int32_t int32_unpack(const char *buf):
     cdef int32_t x = (<int32_t *> buf)[0]
+    cdef char *p = <char *> &x
     swap_order(<char *> &x, 4)
+    # if is_little_endian:
+    #     p[0], p[1], p[2], p[3] = p[3], p[2], p[1], p[0]
     return x
 
 cpdef inline bytes int16_pack(int16_t x):
diff --git a/cassandra/rowparser.pxd b/cassandra/rowparser.pxd
new file mode 100644
index 00000000..7597cca9
--- /dev/null
+++ b/cassandra/rowparser.pxd
@@ -0,0 +1,5 @@
+from cassandra.bytesio cimport BytesIOReader
+
+cdef class RowParser:
+    cpdef unpack_row(self, BytesIOReader reader, protocol_version)
+
diff --git a/cassandra/rowparser.pyx b/cassandra/rowparser.pyx
new file mode 100644
index 00000000..d09bdf94
--- /dev/null
+++ b/cassandra/rowparser.pyx
@@ -0,0 +1,70 @@
+include "ioutils.pyx"
+
+from cpython.tuple cimport (
+        PyTuple_New,
+        # Return value: New reference.
+        # Return a new tuple object of size len, or NULL on failure.
+        PyTuple_SET_ITEM,
+        # Like PyTuple_SetItem(), but does no error checking, and should
+        # only be used to fill in brand new tuples. Note: This function
+        # ``steals'' a reference to o.
+        )
+
+from cpython.ref cimport (
+        Py_INCREF
+        # void Py_INCREF(object o)
+        #     Increment the reference count for object o. The object must not
+        #     be NULL; if you aren't sure that it isn't NULL, use
+        #     Py_XINCREF().
+        )
+
+from cassandra.bytesio cimport BytesIOReader
+from cassandra.datatypes cimport DataType
+
+
+cdef class RowParser:
+    cpdef unpack_row(self, BytesIOReader reader, protocol_version):
+        """
+        Unpack a single row of data in a ResultMessage.
+        """
+        raise NotImplementedError
+
+
+cdef class TupleRowParser(RowParser):
+    """
+    Parse a single returned row into a tuple of objects:
+
+        (obj1, ..., objN)
+
+    Attributes
+    ===========
+    datatypes:
+        this is a memoryview of N DataType objects that can deserialize bytes
+        into objects
+    """
+
+    cdef DataType[::1] datatypes
+    cdef Py_ssize_t size
+
+    def __init__(self, Py_ssize_t n, DataType[::1] datatypes):
+        self.datatypes = datatypes
+        self.size = n
+
+    cpdef unpack_row(self, BytesIOReader reader, protocol_version):
+        cdef char *buf
+        cdef Py_ssize_t i, bufsize, rowsize = self.size
+        cdef DataType dt
+        cdef tuple res = PyTuple_New(self.size)
+
+        for i in range(rowsize):
+            buf = get_buf(reader, &bufsize)
+            if buf == NULL:
+                val = None
+            else:
+                dt = self.datatypes[i]
+                val = dt.deserialize(buf, bufsize, protocol_version)
+
+            Py_INCREF(val)
+            PyTuple_SET_ITEM(res, i, val)
+
+        return res
diff --git a/setup.py b/setup.py
index 7083d7aa..7fe2631a 100644
--- a/setup.py
+++ b/setup.py
@@ -70,6 +70,7 @@ if __name__ == '__main__' and sys.argv[1] == "install":
     except ImportError:
         pass
 
+PROFILING = False
 
 class DocCommand(Command):
 
@@ -261,13 +262,18 @@ if "--no-libev" not in sys.argv and not is_windows:
 if "--no-cython" not in sys.argv:
     try:
         from Cython.Build import cythonize
-        cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'metadata', 'pool', 'protocol', 'query', 'util']
+        # cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'metadata', 'pool', 'protocol', 'query', 'util']
+        cython_candidates = []
         compile_args = [] if is_windows else ['-Wno-unused-function']
+        directives = {'profile': PROFILING} # this seems to have no effect...
         extensions.extend(cythonize(
-            [Extension('cassandra.%s' % m, ['cassandra/%s.py' % m], extra_compile_args=compile_args) for m in cython_candidates],
+            [Extension('cassandra.%s' % m, ['cassandra/%s.py' % m],
+                       extra_compile_args=compile_args,
+                       compiler_directives=directives)
+                for m in cython_candidates],
             exclude_failures=True))
-
-        extensions.extend(cythonize("cassandra/*.pyx"))
+        extensions.extend(cythonize("cassandra/*.pyx",
+            compiler_directives=directives))
     except ImportError:
         sys.stderr.write("Cython is not installed. Not compiling core driver files as extensions (optional).")
 

From 1a6534b575b3b8f19a81f3f88a9583807b5cacc3 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Thu, 30 Jul 2015 17:02:27 +0100
Subject: [PATCH 09/70] (Optional) NumPy row parser

---
 cassandra/datatypes.pxd   |  17 ++---
 cassandra/datatypes.pyx   |  19 -----
 cassandra/marshal.pyx     |   4 +-
 cassandra/numpyparser.pyx | 144 ++++++++++++++++++++++++++++++++++++++
 cassandra/util.py         |   3 +
 5 files changed, 158 insertions(+), 29 deletions(-)
 create mode 100644 cassandra/numpyparser.pyx

diff --git a/cassandra/datatypes.pxd b/cassandra/datatypes.pxd
index 40f8d742..d4db2b02 100644
--- a/cassandra/datatypes.pxd
+++ b/cassandra/datatypes.pxd
@@ -1,11 +1,12 @@
-cdef class LLDataType:
-    """
-    Low-level Cassandra datatype
-    """
-
-    cdef Py_ssize_t size
-
-    cdef void deserialize_ptr(self, char *buf, Py_ssize_t size, void *out, protocol_version)
+# cdef class LLDataType:
+#     """
+#     Low-level Cassandra datatype
+#     """
+#
+#     cdef Py_ssize_t size
+#
+#     cdef void deserialize_ptr(self, char *buf, Py_ssize_t size,
+#                               Py_ssize_t index, void *out, protocol_version)
 
 cdef class DataType:
     cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version)
diff --git a/cassandra/datatypes.pyx b/cassandra/datatypes.pyx
index 5fc61cfa..a1c50fcb 100644
--- a/cassandra/datatypes.pyx
+++ b/cassandra/datatypes.pyx
@@ -1,29 +1,10 @@
 include 'marshal.pyx'
 
-from cassandra import cqltypes
-
-
-cdef class LLDataType:
-    cdef void deserialize_ptr(self, char *buf, Py_ssize_t size,
-                              void *out, protocol_version):
-        pass
-
-
 cdef class DataType:
     cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
         pass
 
 
-cdef class LLInt64(LLDataType):
-    """
-    Low-level Cassandra datatype
-    """
-
-    cdef void deserialize_ptr(self, char *buf, Py_ssize_t size, void *out, protocol_version):
-        cdef int64_t x = int64_unpack(buf)
-        (<int64_t *> out)[0] = x
-
-
 cdef class Int64(DataType):
 
     cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index 85c3504c..92fb1293 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -22,8 +22,8 @@ import math
 from libc.stdint cimport (int8_t, int16_t, int32_t, int64_t,
                           uint8_t, uint16_t, uint32_t, uint64_t)
 
-assert sys.byteorder in ('little', 'big')
-cdef bint is_little_endian = sys.byteorder == 'little'
+cdef bint is_little_endian
+from cassandra.util import is_little_endian
 
 # cdef extern from "marshal.h":
 #     cdef str c_string_to_python(char *p, Py_ssize_t len)
diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
new file mode 100644
index 00000000..73eeea95
--- /dev/null
+++ b/cassandra/numpyparser.pyx
@@ -0,0 +1,144 @@
+"""
+This module provider an optional protocol parser that returns
+NumPy arrays.
+
+=============================================================================
+This module should not be imported by any of the main python-driver modules,
+as numpy is an optional dependency.
+=============================================================================
+"""
+
+include "ioutils.pyx"
+
+from libc.stdint cimport uint64_t
+
+from cassandra.rowparser cimport RowParser
+from cassandra.bytesio cimport BytesIOReader
+from cassandra.datatypes cimport DataType
+from cassandra import cqltypes
+
+import numpy as np
+cimport numpy as np
+
+from cassandra.util import is_little_endian
+
+from cpython.ref cimport Py_INCREF, PyObject
+
+cdef extern from "Python.h":
+    # An integer type large enough to hold a pointer
+    ctypedef uint64_t Py_uintptr_t
+
+# ctypedef struct TypeRepr:
+#     Py_ssize_t size
+#     int is_object
+
+ctypedef struct ArrRepr:
+    # TypeRepr typ
+    Py_uintptr_t buf_ptr
+    Py_ssize_t stride
+    int is_object
+
+_cqltype_to_numpy = {
+    cqltypes.LongType:          np.dtype('>i8'),
+    cqltypes.CounterColumnType: np.dtype('>i8'),
+    cqltypes.Int32Type:         np.dtype('>i4'),
+    cqltypes.ShortType:         np.dtype('>i2'),
+    cqltypes.FloatType:         np.dtype('>f4'),
+    cqltypes.DoubleType:        np.dtype('>f8'),
+}
+
+
+# cdef type_repr(coltype):
+#     """
+#     Get a low-level type representation for the cqltype
+#     """
+#     cdef TypeRepr res
+#     if coltype in _cqltype_to_numpy:
+#         dtype = _cqltype_to_numpy[coltype]
+#         res.size = dtype.itemsize
+#         res.is_object = False
+#     else:
+#         res.size = sizeof(PyObject *)
+#         res.is_object = True
+#     return res
+
+
+cdef ArrRepr array_repr(np.ndarray arr, coltype):
+    """
+    Construct a low-level array representation
+    """
+    assert arr.ndim == 1, "Expected a one-dimensional array"
+
+    cdef ArrRepr res
+    # Get the data pointer to the underlying memory of the numpy array
+    res.buf_ptr = arr.ctypes.data
+    res.stride = arr.strides[0]
+    res.is_object = coltype in _cqltype_to_numpy
+    return res
+
+
+cdef class NativeRowParser(RowParser):
+    """
+    This is a row parser that copies bytes into arrays (e.g. NumPy arrays)
+    for types it recognizes, such as int64. Values of other types are
+    converted to objects.
+
+    NOTE: This class is stateful, in that every time unpack_row is called it
+          advanced the pointer into the array by updates the buf_ptr field
+          of self.arrays
+    """
+
+    # ArrRepr contains a 'buf_ptr' field, which is not supported as a memoryview dtype
+    cdef ArrRepr[::1] arrays
+    cdef DataType[::1] datatypes
+    cdef Py_ssize_t size
+
+    def __init__(self, ArrRepr[::1] arrays, DataType[::1] datatypes):
+        self.arrays = arrays
+        self.datatypes = datatypes
+        self.size = len(datatypes)
+
+    cpdef unpack_row(self, BytesIOReader reader, protocol_version):
+        cdef char *buf
+        cdef Py_ssize_t i, bufsize, rowsize = self.size
+        cdef ArrRepr arr
+
+        for i in range(rowsize):
+            buf = get_buf(reader, &bufsize)
+            if buf == NULL:
+                raise ValueError("Unexpected end of stream")
+
+            arr = self.arrays[i]
+
+            if arr.is_object:
+                dt = self.datatypes[i]
+                val = dt.deserialize(buf, bufsize, protocol_version)
+                Py_INCREF(val)
+                (<PyObject **> arr.buf_ptr)[0] = <PyObject *> val
+            else:
+                memcopy(buf, <char *> arr.buf_ptr, bufsize)
+
+            # Update the pointer into the array for the next time
+            self.arrays[i].buf_ptr += arr.stride
+
+
+cdef inline memcopy(char *src, char *dst, Py_ssize_t size):
+    """
+    Our own simple memcopy which can be inlined. This is useful because our data types
+    are only a few bytes.
+    """
+    cdef Py_ssize_t i
+    for i in range(size):
+        dst[i] = src[i]
+
+
+def make_native_byteorder(arr):
+    """
+    Make sure all values have a native endian in the NumPy arrays.
+    """
+    if is_little_endian:
+        # We have arrays in big-endian order. First swap the bytes
+        # into little endian order, and then update the numpy dtype
+        # accordingly (e.g. from '>i8' to '<i8')
+        return arr.byteswap().newbyteorder()
+    return arr
diff --git a/cassandra/util.py b/cassandra/util.py
index 83260577..4cf3879e 100644
--- a/cassandra/util.py
+++ b/cassandra/util.py
@@ -4,9 +4,12 @@ import datetime
 import random
 import six
 import uuid
+import sys
 
 DATETIME_EPOC = datetime.datetime(1970, 1, 1)
 
+assert sys.byteorder in ('little', 'big')
+is_little_endian = sys.byteorder == 'little'
 
 def datetime_from_timestamp(timestamp):
     """

From 7796a9e9cb95a897e03606c5da4726a75bac68cd Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Thu, 30 Jul 2015 17:51:49 +0100
Subject: [PATCH 10/70] Some more work on NumPy row parsing and array
 allocation

---
 cassandra/numpyparser.pyx | 107 ++++++++++++++++++++++++--------------
 1 file changed, 68 insertions(+), 39 deletions(-)

diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 73eeea95..763c3ab2 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -11,33 +11,46 @@ as numpy is an optional dependency.
 include "ioutils.pyx"
 
 from libc.stdint cimport uint64_t
+from cpython.ref cimport Py_INCREF, PyObject
 
 from cassandra.rowparser cimport RowParser
 from cassandra.bytesio cimport BytesIOReader
 from cassandra.datatypes cimport DataType
 from cassandra import cqltypes
-
-import numpy as np
-cimport numpy as np
-
 from cassandra.util import is_little_endian
 
-from cpython.ref cimport Py_INCREF, PyObject
+import numpy as np
+
+
+cdef extern from "numpyFlags.h":
+
+    pass
 
 cdef extern from "Python.h":
     # An integer type large enough to hold a pointer
     ctypedef uint64_t Py_uintptr_t
 
-# ctypedef struct TypeRepr:
-#     Py_ssize_t size
-#     int is_object
+cdef extern from "numpy/arrayobject.h":
+    # Avoid using 'numpy' from Cython, as it access the 'data' attribute
+    # of PyArrayObject, which is deprecated:
+    #
+    #     warning: #warning "Using deprecated NumPy API, disable it by
+    #     #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [-Wcpp]
+    #
+    ctypedef class np.ndarray [object PyArrayObject]:
+        pass
 
-ctypedef struct ArrRepr:
-    # TypeRepr typ
+
+# Simple array descriptor, useful to parse rows into a NumPy array
+ctypedef struct ArrDesc:
     Py_uintptr_t buf_ptr
     Py_ssize_t stride
     int is_object
 
+cdef ArrDesc[:] _dummyArray = <ArrDesc[:0]> NULL
+arrDescDtype = np.array(_dummyArray).dtype
+
+
 _cqltype_to_numpy = {
     cqltypes.LongType:          np.dtype('>i8'),
     cqltypes.CounterColumnType: np.dtype('>i8'),
@@ -47,36 +60,53 @@ _cqltype_to_numpy = {
     cqltypes.DoubleType:        np.dtype('>f8'),
 }
 
+obj_dtype = np.dtype('O')
 
-# cdef type_repr(coltype):
+def make_array(coltype, array_size):
+    """
+    Allocate a new NumPy array of the given column type and size.
+    """
+    dtype = _cqltype_to_numpy.get(coltype, obj_dtype)
+    return np.empty((array_size,), dtype=dtype)
+
+
+def make_arrays(colnames, coltypes, array_size):
+    """
+    Allocate arrays for each result column.
+
+    returns a tuple of (array_descs, arrays), where
+        'array_descs' describe the arrays for NativeRowParser and
+        'arrays' is a dict mapping column names to arrays
+            (e.g. this can be fed into pandas.DataFrame)
+    """
+    row_size = len(colnames)
+    array_descs = np.empty((row_size,), arrDescDtype)
+    arrays = {}
+
+    for i, colname, coltype in zip(range(row_size), colnames, coltypes):
+        arr = make_array(coltype, array_size)
+        array_descs[i].buf_ptr = arr.ctypes.data
+        array_descs[i].stride = arr.strides[0]
+        array_descs[i].is_object = coltype in _cqltype_to_numpy
+        arrays[colname] = arr
+
+    return array_descs, arrays
+
+
+# cdef ArrDesc array_repr(np.ndarray arr, coltype):
 #     """
-#     Get a low-level type representation for the cqltype
+#     Construct a low-level array representation
 #     """
-#     cdef TypeRepr res
-#     if coltype in _cqltype_to_numpy:
-#         dtype = _cqltype_to_numpy[coltype]
-#         res.size = dtype.itemsize
-#         res.is_object = False
-#     else:
-#         res.size = sizeof(PyObject *)
-#         res.is_object = True
+#     assert arr.ndim == 1, "Expected a one-dimensional array"
+#
+#     cdef ArrDesc res
+#     # Get the data pointer to the underlying memory of the numpy array
+#     res.buf_ptr = arr.ctypes.data
+#     res.stride = arr.strides[0]
+#     res.is_object = coltype in _cqltype_to_numpy
 #     return res
 
 
-cdef ArrRepr array_repr(np.ndarray arr, coltype):
-    """
-    Construct a low-level array representation
-    """
-    assert arr.ndim == 1, "Expected a one-dimensional array"
-
-    cdef ArrRepr res
-    # Get the data pointer to the underlying memory of the numpy array
-    res.buf_ptr = arr.ctypes.data
-    res.stride = arr.strides[0]
-    res.is_object = coltype in _cqltype_to_numpy
-    return res
-
-
 cdef class NativeRowParser(RowParser):
     """
     This is a row parser that copies bytes into arrays (e.g. NumPy arrays)
@@ -88,12 +118,11 @@ cdef class NativeRowParser(RowParser):
           of self.arrays
     """
 
-    # ArrRepr contains a 'buf_ptr' field, which is not supported as a memoryview dtype
-    cdef ArrRepr[::1] arrays
+    cdef ArrDesc[::1] arrays
     cdef DataType[::1] datatypes
     cdef Py_ssize_t size
 
-    def __init__(self, ArrRepr[::1] arrays, DataType[::1] datatypes):
+    def __init__(self, ArrDesc[::1] arrays, DataType[::1] datatypes):
         self.arrays = arrays
         self.datatypes = datatypes
         self.size = len(datatypes)
@@ -101,7 +130,7 @@ cdef class NativeRowParser(RowParser):
     cpdef unpack_row(self, BytesIOReader reader, protocol_version):
         cdef char *buf
         cdef Py_ssize_t i, bufsize, rowsize = self.size
-        cdef ArrRepr arr
+        cdef ArrDesc arr
 
         for i in range(rowsize):
             buf = get_buf(reader, &bufsize)
@@ -110,7 +139,7 @@ cdef class NativeRowParser(RowParser):
 
             arr = self.arrays[i]
 
-            if arr.is_object:
+            if self.is_object[i]:
                 dt = self.datatypes[i]
                 val = dt.deserialize(buf, bufsize, protocol_version)
                 Py_INCREF(val)

From c7c50c973de4f6924fd99060fe40d8118c3b995a Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Thu, 30 Jul 2015 17:58:21 +0100
Subject: [PATCH 11/70] Fix wrong attribute access

---
 cassandra/numpyparser.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 763c3ab2..ff0b9f33 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -139,7 +139,7 @@ cdef class NativeRowParser(RowParser):
 
             arr = self.arrays[i]
 
-            if self.is_object[i]:
+            if arr.is_object:
                 dt = self.datatypes[i]
                 val = dt.deserialize(buf, bufsize, protocol_version)
                 Py_INCREF(val)

From ed4efd2a66b5631dfa814d502f255db3a3364848 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Thu, 30 Jul 2015 17:59:01 +0100
Subject: [PATCH 12/70] Disable use of deprecated NumPy API

---
 cassandra/numpyFlags.h | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 cassandra/numpyFlags.h

diff --git a/cassandra/numpyFlags.h b/cassandra/numpyFlags.h
new file mode 100644
index 00000000..6793b7a8
--- /dev/null
+++ b/cassandra/numpyFlags.h
@@ -0,0 +1 @@
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
\ No newline at end of file

From 43779022eceb7921973c01976a642a14a223f8cb Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Fri, 31 Jul 2015 10:32:53 +0100
Subject: [PATCH 13/70] Clean up some Cython deserialization code

---
 cassandra/cython_protocol_handler.pyx      | 11 ++++-------
 cassandra/numpyparser.pyx                  | 23 ++++++----------------
 cassandra/{rowparser.pxd => objparser.pxd} |  0
 cassandra/{rowparser.pyx => objparser.pyx} |  0
 4 files changed, 10 insertions(+), 24 deletions(-)
 rename cassandra/{rowparser.pxd => objparser.pxd} (100%)
 rename cassandra/{rowparser.pyx => objparser.pyx} (100%)

diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
index 644985dc..c9eb24e6 100644
--- a/cassandra/cython_protocol_handler.pyx
+++ b/cassandra/cython_protocol_handler.pyx
@@ -14,14 +14,14 @@ from cassandra.protocol import ResultMessage, ProtocolHandler
 from cassandra.bytesio cimport BytesIOReader
 from cassandra cimport typecodes
 from cassandra.datatypes cimport DataType
-from cassandra.rowparser cimport RowParser
+from cassandra.objparser cimport RowParser
 
-from cassandra.rowparser import TupleRowParser
+from cassandra.objparser import TupleRowParser
 from cassandra.datatypes import Int64, GenericDataType
 
 from cython.view cimport array as cython_array
 
-include "marshal.pyx"
+include "ioutils.pyx"
 
 
 class FastResultMessage(ResultMessage):
@@ -50,6 +50,7 @@ class FastResultMessage(ResultMessage):
 
 
 def obj_array(list objs):
+    """Create a (Cython) array of objects given a list of objects"""
     cdef object[:] arr
     arr = cython_array(shape=(len(objs),), itemsize=sizeof(void *), format="O")
     # arr[:] = objs # This does not work (segmentation faults)
@@ -74,10 +75,6 @@ cdef parse_rows(BytesIOReader reader, DataType[::1] datatypes, protocol_version)
     return [parser.unpack_row(reader, protocol_version) for i in range(rowcount)]
 
 
-cdef inline int32_t read_int(BytesIOReader reader):
-    return int32_unpack(reader.read(4))
-
-
 # cdef parse_rows2(BytesIOReader reader, list colnames, list coltypes, protocol_version):
 #     cdef Py_ssize_t i, rowcount
 #     cdef char *raw_val
diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index ff0b9f33..4c279c3a 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -13,7 +13,7 @@ include "ioutils.pyx"
 from libc.stdint cimport uint64_t
 from cpython.ref cimport Py_INCREF, PyObject
 
-from cassandra.rowparser cimport RowParser
+from cassandra.objparser cimport RowParser
 from cassandra.bytesio cimport BytesIOReader
 from cassandra.datatypes cimport DataType
 from cassandra import cqltypes
@@ -23,7 +23,8 @@ import numpy as np
 
 
 cdef extern from "numpyFlags.h":
-
+    # Include 'numpyFlags.h' into the generated C code to disable the
+    # deprecated NumPy API
     pass
 
 cdef extern from "Python.h":
@@ -93,20 +94,6 @@ def make_arrays(colnames, coltypes, array_size):
     return array_descs, arrays
 
 
-# cdef ArrDesc array_repr(np.ndarray arr, coltype):
-#     """
-#     Construct a low-level array representation
-#     """
-#     assert arr.ndim == 1, "Expected a one-dimensional array"
-#
-#     cdef ArrDesc res
-#     # Get the data pointer to the underlying memory of the numpy array
-#     res.buf_ptr = arr.ctypes.data
-#     res.stride = arr.strides[0]
-#     res.is_object = coltype in _cqltype_to_numpy
-#     return res
-
-
 cdef class NativeRowParser(RowParser):
     """
     This is a row parser that copies bytes into arrays (e.g. NumPy arrays)
@@ -165,9 +152,11 @@ def make_native_byteorder(arr):
     """
     Make sure all values have a native endian in the NumPy arrays.
     """
-    if is_little_endian:
+    if is_little_endian and not arr.dtype.kind == 'O':
         # We have arrays in big-endian order. First swap the bytes
         # into little endian order, and then update the numpy dtype
         # accordingly (e.g. from '>i8' to '<i8')
+        #
+        # Ignore any object arrays of dtype('O')
         return arr.byteswap().newbyteorder()
     return arr
diff --git a/cassandra/rowparser.pxd b/cassandra/objparser.pxd
similarity index 100%
rename from cassandra/rowparser.pxd
rename to cassandra/objparser.pxd
diff --git a/cassandra/rowparser.pyx b/cassandra/objparser.pyx
similarity index 100%
rename from cassandra/rowparser.pyx
rename to cassandra/objparser.pyx

From b65ea0b9794a70d3535134915e783bd7b0cc6f68 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Fri, 31 Jul 2015 13:37:16 +0100
Subject: [PATCH 14/70] Add lazy and list-based column deserializers

---
 cassandra/cython_protocol_handler.pyx | 69 ++++++++++++++++++---------
 cassandra/objparser.pxd               |  6 +++
 cassandra/objparser.pyx               | 35 ++++++++++++++
 3 files changed, 88 insertions(+), 22 deletions(-)

diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
index c9eb24e6..e8fabad0 100644
--- a/cassandra/cython_protocol_handler.pyx
+++ b/cassandra/cython_protocol_handler.pyx
@@ -14,9 +14,9 @@ from cassandra.protocol import ResultMessage, ProtocolHandler
 from cassandra.bytesio cimport BytesIOReader
 from cassandra cimport typecodes
 from cassandra.datatypes cimport DataType
-from cassandra.objparser cimport RowParser
+from cassandra.objparser cimport ColumnParser, RowParser
 
-from cassandra.objparser import TupleRowParser
+from cassandra.objparser import ListParser
 from cassandra.datatypes import Int64, GenericDataType
 
 from cython.view cimport array as cython_array
@@ -24,16 +24,12 @@ from cython.view cimport array as cython_array
 include "ioutils.pyx"
 
 
-class FastResultMessage(ResultMessage):
-    """
-    Cython version of Result Message that has a faster implementation of
-    recv_results_row.
-    """
-    # type_codes = ResultMessage.type_codes.copy()
-    code_to_type = dict((v, k) for k, v in ResultMessage.type_codes.items())
-
-    @classmethod
+def make_recv_results_rows(ColumnParser colparser):
     def recv_results_rows(cls, f, protocol_version, user_type_map):
+        """
+        Parse protocol data given as a BytesIO f into a set of columns (e.g. list of tuples)
+        This is used as the recv_results_rows method of (Fast)ResultMessage
+        """
         paging_state, column_metadata = cls.recv_results_metadata(f, user_type_map)
 
         colnames = [c[2] for c in column_metadata]
@@ -44,9 +40,12 @@ class FastResultMessage(ResultMessage):
             [Int64() if coltype == LongType else GenericDataType(coltype) for coltype in coltypes])
             # [GenericDataType(coltype) for coltype in coltypes])
 
+        parsed_rows = colparser.parse_rows(
+            BytesIOReader(f.read()), datatypes, protocol_version)
         # parsed_rows = parse_rows2(BytesIOReader(f.read()), colnames, coltypes, protocol_version)
-        parsed_rows = parse_rows(BytesIOReader(f.read()), datatypes, protocol_version)
+        # parsed_rows = parse_rows(BytesIOReader(f.read()), datatypes, protocol_version)
         return (paging_state, (colnames, parsed_rows))
+    return recv_results_rows
 
 
 def obj_array(list objs):
@@ -59,20 +58,46 @@ def obj_array(list objs):
     return arr
 
 
-class CythonProtocolHandler(ProtocolHandler):
+def make_protocol_handler(colparser=ListParser()):
     """
-    Use FastResultMessage to decode query result message messages.
+    Given a column parser to deserialize ResultMessages, return a suitable
+    Cython-based protocol handler.
+
+    There are three Cython-based protocol handlers (least to most performant):
+
+        1. objparser.ListParser
+            this parser decodes result messages into a list of tuples
+
+        2. objparser.LazyParser
+            this parser decodes result messages lazily by returning an iterator
+
+        3. numpyparser.NumPyParser
+            this parser decodes result messages into NumPy arrays
+
+    The default is to use objparser.ListParser
     """
-    my_opcodes = ProtocolHandler.message_types_by_opcode.copy()
-    my_opcodes[FastResultMessage.opcode] = FastResultMessage
-    message_types_by_opcode = my_opcodes
+    # TODO: It may be cleaner to turn ProtocolHandler and ResultMessage into
+    # TODO:     instances and use methods instead of class methods
 
+    class FastResultMessage(ResultMessage):
+        """
+        Cython version of Result Message that has a faster implementation of
+        recv_results_row.
+        """
+        # type_codes = ResultMessage.type_codes.copy()
+        code_to_type = dict((v, k) for k, v in ResultMessage.type_codes.items())
+        recv_results_rows = classmethod(make_recv_results_rows(colparser))
 
-cdef parse_rows(BytesIOReader reader, DataType[::1] datatypes, protocol_version):
-    cdef Py_ssize_t i, rowcount
-    cdef RowParser parser = TupleRowParser(len(datatypes), datatypes)
-    rowcount = read_int(reader)
-    return [parser.unpack_row(reader, protocol_version) for i in range(rowcount)]
+    class CythonProtocolHandler(ProtocolHandler):
+        """
+        Use FastResultMessage to decode query result message messages.
+        """
+
+        my_opcodes = ProtocolHandler.message_types_by_opcode.copy()
+        my_opcodes[FastResultMessage.opcode] = FastResultMessage
+        message_types_by_opcode = my_opcodes
+
+    return CythonProtocolHandler
 
 
 # cdef parse_rows2(BytesIOReader reader, list colnames, list coltypes, protocol_version):
diff --git a/cassandra/objparser.pxd b/cassandra/objparser.pxd
index 7597cca9..edfa2a60 100644
--- a/cassandra/objparser.pxd
+++ b/cassandra/objparser.pxd
@@ -1,4 +1,10 @@
 from cassandra.bytesio cimport BytesIOReader
+from cassandra.datatypes cimport DataType
+
+cdef class ColumnParser:
+    cpdef parse_rows(self, BytesIOReader reader, DataType[::1] datatypes,
+                     protocol_version)
+
 
 cdef class RowParser:
     cpdef unpack_row(self, BytesIOReader reader, protocol_version)
diff --git a/cassandra/objparser.pyx b/cassandra/objparser.pyx
index d09bdf94..1fa9d283 100644
--- a/cassandra/objparser.pyx
+++ b/cassandra/objparser.pyx
@@ -22,7 +22,42 @@ from cassandra.bytesio cimport BytesIOReader
 from cassandra.datatypes cimport DataType
 
 
+cdef class ColumnParser:
+    """Decode a ResultMessage into a set of columns"""
+    cpdef parse_rows(self, BytesIOReader reader, DataType[::1] datatypes,
+                     protocol_version):
+        raise NotImplementedError
+
+
+cdef class ListParser(ColumnParser):
+    """Decode a ResultMessage into a list of tuples (or other objects)"""
+
+    cpdef parse_rows(self, BytesIOReader r, DataType[::1] datatypes, ver):
+        cdef Py_ssize_t i, rowcount
+        rowcount = read_int(r)
+        cdef RowParser rowparser = TupleRowParser(len(datatypes), datatypes)
+        return [rowparser.unpack_row(r, ver) for i in range(rowcount)]
+
+
+cdef class LazyParser(ColumnParser):
+    """Decode a ResultMessage lazily using a generator"""
+
+    cpdef parse_rows(self, BytesIOReader r, DataType[::1] datatypes, ver):
+        # Use a little helper function as closures (generators) are not
+        # supported in cpdef methods
+        return parse_rows_lazy(r, self.rowparser, datatypes, ver)
+
+
+def parse_rows_lazy(BytesIOReader r, DataType[::1] datatypes, ver):
+    cdef Py_ssize_t i, rowcount
+    rowcount = read_int(r)
+    cdef RowParser rowparser = TupleRowParser(len(datatypes), datatypes)
+    return (rowparser.unpack_row(r, ver) for i in range(rowcount))
+
+
 cdef class RowParser:
+    """Parser for a single row"""
+
     cpdef unpack_row(self, BytesIOReader reader, protocol_version):
         """
         Unpack a single row of data in a ResultMessage.

From 7fbc6aa731eca509e19d739e6277de6a09f2bcc9 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Fri, 31 Jul 2015 15:51:20 +0100
Subject: [PATCH 15/70] Some fixes to numpy and object deserializers

---
 cassandra/cython_protocol_handler.pyx | 38 +++--------
 cassandra/datatypes.pxd               | 16 +----
 cassandra/datatypes.pyx               | 39 ++++++++++--
 cassandra/numpyparser.pyx             | 91 +++++++++++++++------------
 cassandra/objparser.pxd               | 11 ----
 cassandra/objparser.pyx               | 55 +++++-----------
 cassandra/parsing.pxd                 | 16 +++++
 cassandra/parsing.pyx                 | 30 +++++++++
 8 files changed, 157 insertions(+), 139 deletions(-)
 delete mode 100644 cassandra/objparser.pxd
 create mode 100644 cassandra/parsing.pxd
 create mode 100644 cassandra/parsing.pyx

diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
index e8fabad0..98c7f1d6 100644
--- a/cassandra/cython_protocol_handler.pyx
+++ b/cassandra/cython_protocol_handler.pyx
@@ -6,20 +6,16 @@ from libc.stdint cimport int64_t, int32_t
 #                                 uint16_pack, uint16_unpack, uint32_pack, uint32_unpack,
 #                                 int32_pack, int32_unpack, int64_pack, int64_unpack, float_pack, float_unpack, double_pack, double_unpack)
 
-from cassandra.marshal import varint_pack, varint_unpack
-from cassandra import util
-from cassandra.cqltypes import EMPTY, LongType
+# from cassandra.marshal import varint_pack, varint_unpack
+# from cassandra import util
+# from cassandra.cqltypes import EMPTY, LongType
 from cassandra.protocol import ResultMessage, ProtocolHandler
 
-from cassandra.bytesio cimport BytesIOReader
-from cassandra cimport typecodes
-from cassandra.datatypes cimport DataType
-from cassandra.objparser cimport ColumnParser, RowParser
-
+# from cassandra.bytesio cimport BytesIOReader
+from cassandra.parsing cimport ParseDesc, ColumnParser
+from cassandra.datatypes import make_datatypes
 from cassandra.objparser import ListParser
-from cassandra.datatypes import Int64, GenericDataType
 
-from cython.view cimport array as cython_array
 
 include "ioutils.pyx"
 
@@ -35,29 +31,15 @@ def make_recv_results_rows(ColumnParser colparser):
         colnames = [c[2] for c in column_metadata]
         coltypes = [c[3] for c in column_metadata]
 
-        cdef DataType[::1] datatypes
-        datatypes = obj_array(
-            [Int64() if coltype == LongType else GenericDataType(coltype) for coltype in coltypes])
-            # [GenericDataType(coltype) for coltype in coltypes])
+        desc = ParseDesc(colnames, coltypes, make_datatypes(coltypes), protocol_version)
+        reader = BytesIOReader(f.read())
+        parsed_rows = colparser.parse_rows(reader, desc)
 
-        parsed_rows = colparser.parse_rows(
-            BytesIOReader(f.read()), datatypes, protocol_version)
-        # parsed_rows = parse_rows2(BytesIOReader(f.read()), colnames, coltypes, protocol_version)
-        # parsed_rows = parse_rows(BytesIOReader(f.read()), datatypes, protocol_version)
         return (paging_state, (colnames, parsed_rows))
+
     return recv_results_rows
 
 
-def obj_array(list objs):
-    """Create a (Cython) array of objects given a list of objects"""
-    cdef object[:] arr
-    arr = cython_array(shape=(len(objs),), itemsize=sizeof(void *), format="O")
-    # arr[:] = objs # This does not work (segmentation faults)
-    for i, obj in enumerate(objs):
-        arr[i] = obj
-    return arr
-
-
 def make_protocol_handler(colparser=ListParser()):
     """
     Given a column parser to deserialize ResultMessages, return a suitable
diff --git a/cassandra/datatypes.pxd b/cassandra/datatypes.pxd
index d4db2b02..cd58b6b3 100644
--- a/cassandra/datatypes.pxd
+++ b/cassandra/datatypes.pxd
@@ -1,17 +1,3 @@
-# cdef class LLDataType:
-#     """
-#     Low-level Cassandra datatype
-#     """
-#
-#     cdef Py_ssize_t size
-#
-#     cdef void deserialize_ptr(self, char *buf, Py_ssize_t size,
-#                               Py_ssize_t index, void *out, protocol_version)
-
 cdef class DataType:
+    cdef object cqltype
     cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version)
-
-
-cdef class Int64(DataType):
-    pass
-
diff --git a/cassandra/datatypes.pyx b/cassandra/datatypes.pyx
index a1c50fcb..24dd18e6 100644
--- a/cassandra/datatypes.pyx
+++ b/cassandra/datatypes.pyx
@@ -1,8 +1,21 @@
 include 'marshal.pyx'
 
+from cython.view cimport array as cython_array
+from cassandra.datatypes import Int64, GenericDataType
+from cassandra.cqltypes import LongType
+
+# TODO: Port cqltypes to this module
+
 cdef class DataType:
+    """
+    Cython-based datatype
+    """
+
+    def __init__(self, cqltype):
+        self.cqltype = cqltype
+
     cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
-        pass
+        raise NotImplementedError
 
 
 cdef class Int64(DataType):
@@ -20,14 +33,28 @@ cdef class GenericDataType(DataType):
     Wrap a generic datatype for deserialization
     """
 
-    cdef object cqltype
-
-    def __init__(self, cqltype):
-        self.cqltype = cqltype
-
     cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
         return self.cqltype.deserialize(buf[:size], protocol_version)
 
     def __str__(self):
         return "GenericDataType(%s)" % (self.cqltype,)
 
+
+def make_datatypes(coltypes):
+    cdef DataType[::1] datatypes
+    return obj_array([make_datatype(ct) for ct in coltypes])
+
+
+def make_datatype(coltype):
+    return Int64(coltype) if coltype == LongType else GenericDataType(coltype)
+
+
+def obj_array(list objs):
+    """Create a (Cython) array of objects given a list of objects"""
+    cdef object[:] arr
+    arr = cython_array(shape=(len(objs),), itemsize=sizeof(void *), format="O")
+    # arr[:] = objs # This does not work (segmentation faults)
+    for i, obj in enumerate(objs):
+        arr[i] = obj
+    return arr
+
diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 4c279c3a..3dd28286 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -13,13 +13,14 @@ include "ioutils.pyx"
 from libc.stdint cimport uint64_t
 from cpython.ref cimport Py_INCREF, PyObject
 
-from cassandra.objparser cimport RowParser
 from cassandra.bytesio cimport BytesIOReader
 from cassandra.datatypes cimport DataType
+from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser
 from cassandra import cqltypes
 from cassandra.util import is_little_endian
 
 import numpy as np
+import pandas as pd
 
 
 cdef extern from "numpyFlags.h":
@@ -31,26 +32,18 @@ cdef extern from "Python.h":
     # An integer type large enough to hold a pointer
     ctypedef uint64_t Py_uintptr_t
 
-cdef extern from "numpy/arrayobject.h":
-    # Avoid using 'numpy' from Cython, as it access the 'data' attribute
-    # of PyArrayObject, which is deprecated:
-    #
-    #     warning: #warning "Using deprecated NumPy API, disable it by
-    #     #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [-Wcpp]
-    #
-    ctypedef class np.ndarray [object PyArrayObject]:
-        pass
-
 
 # Simple array descriptor, useful to parse rows into a NumPy array
 ctypedef struct ArrDesc:
     Py_uintptr_t buf_ptr
-    Py_ssize_t stride
+    int stride # should be large enough as we allocate contiguous arrays
     int is_object
 
-cdef ArrDesc[:] _dummyArray = <ArrDesc[:0]> NULL
-arrDescDtype = np.array(_dummyArray).dtype
-
+arrDescDtype = np.dtype(
+    [ ('buf_ptr', np.uintp)
+    , ('stride', np.dtype('i'))
+    , ('is_object', np.dtype('i'))
+    ])
 
 _cqltype_to_numpy = {
     cqltypes.LongType:          np.dtype('>i8'),
@@ -63,15 +56,27 @@ _cqltype_to_numpy = {
 
 obj_dtype = np.dtype('O')
 
-def make_array(coltype, array_size):
-    """
-    Allocate a new NumPy array of the given column type and size.
-    """
-    dtype = _cqltype_to_numpy.get(coltype, obj_dtype)
-    return np.empty((array_size,), dtype=dtype)
+
+cdef class NumpyParser(ColumnParser):
+    """Decode a ResultMessage into a bunch of NumPy arrays"""
+
+    cpdef parse_rows(self, BytesIOReader reader, ParseDesc desc):
+        cdef Py_ssize_t i, rowcount
+
+        rowcount = read_int(reader)
+        array_descs, arrays = make_arrays(desc, rowcount)
+        cdef RowParser rowparser = NumPyRowParser(array_descs)
+        for i in range(rowcount):
+            rowparser.unpack_row(reader, desc)
+
+        # arrays = map(make_native_byteorder, arrays)
+        return arrays
+        # return pd.DataFrame(dict(zip(desc.colnames, arrays)))
 
 
-def make_arrays(colnames, coltypes, array_size):
+### Helper functions to create NumPy arrays and array descriptors
+
+def make_arrays(ParseDesc desc, array_size):
     """
     Allocate arrays for each result column.
 
@@ -80,21 +85,30 @@ def make_arrays(colnames, coltypes, array_size):
         'arrays' is a dict mapping column names to arrays
             (e.g. this can be fed into pandas.DataFrame)
     """
-    row_size = len(colnames)
-    array_descs = np.empty((row_size,), arrDescDtype)
-    arrays = {}
+    array_descs = np.empty((desc.rowsize,), arrDescDtype)
+    arrays = []
 
-    for i, colname, coltype in zip(range(row_size), colnames, coltypes):
+    for i, coltype in enumerate(desc.coltypes):
         arr = make_array(coltype, array_size)
-        array_descs[i].buf_ptr = arr.ctypes.data
-        array_descs[i].stride = arr.strides[0]
-        array_descs[i].is_object = coltype in _cqltype_to_numpy
-        arrays[colname] = arr
+        array_descs[i]['buf_ptr'] = arr.ctypes.data
+        array_descs[i]['stride'] = arr.strides[0]
+        array_descs[i]['is_object'] = coltype in _cqltype_to_numpy
+        arrays.append(arr)
 
     return array_descs, arrays
 
 
-cdef class NativeRowParser(RowParser):
+def make_array(coltype, array_size):
+    """
+    Allocate a new NumPy array of the given column type and size.
+    """
+    dtype = _cqltype_to_numpy.get(coltype, obj_dtype)
+    return np.empty((array_size,), dtype=dtype)
+
+
+#### Parse rows into NumPy arrays
+
+cdef class NumPyRowParser(RowParser):
     """
     This is a row parser that copies bytes into arrays (e.g. NumPy arrays)
     for types it recognizes, such as int64. Values of other types are
@@ -106,18 +120,15 @@ cdef class NativeRowParser(RowParser):
     """
 
     cdef ArrDesc[::1] arrays
-    cdef DataType[::1] datatypes
-    cdef Py_ssize_t size
 
-    def __init__(self, ArrDesc[::1] arrays, DataType[::1] datatypes):
+    def __init__(self, ArrDesc[::1] arrays):
         self.arrays = arrays
-        self.datatypes = datatypes
-        self.size = len(datatypes)
 
-    cpdef unpack_row(self, BytesIOReader reader, protocol_version):
+    cpdef unpack_row(self, BytesIOReader reader, ParseDesc desc):
         cdef char *buf
-        cdef Py_ssize_t i, bufsize, rowsize = self.size
+        cdef Py_ssize_t i, bufsize, rowsize = desc.rowsize
         cdef ArrDesc arr
+        cdef DataType dt
 
         for i in range(rowsize):
             buf = get_buf(reader, &bufsize)
@@ -127,8 +138,8 @@ cdef class NativeRowParser(RowParser):
             arr = self.arrays[i]
 
             if arr.is_object:
-                dt = self.datatypes[i]
-                val = dt.deserialize(buf, bufsize, protocol_version)
+                dt = desc.datatypes[i]
+                val = dt.deserialize(buf, bufsize, desc.protocol_version)
                 Py_INCREF(val)
                 (<PyObject **> arr.buf_ptr)[0] = <PyObject *> val
             else:
diff --git a/cassandra/objparser.pxd b/cassandra/objparser.pxd
deleted file mode 100644
index edfa2a60..00000000
--- a/cassandra/objparser.pxd
+++ /dev/null
@@ -1,11 +0,0 @@
-from cassandra.bytesio cimport BytesIOReader
-from cassandra.datatypes cimport DataType
-
-cdef class ColumnParser:
-    cpdef parse_rows(self, BytesIOReader reader, DataType[::1] datatypes,
-                     protocol_version)
-
-
-cdef class RowParser:
-    cpdef unpack_row(self, BytesIOReader reader, protocol_version)
-
diff --git a/cassandra/objparser.pyx b/cassandra/objparser.pyx
index 1fa9d283..da6e6c01 100644
--- a/cassandra/objparser.pyx
+++ b/cassandra/objparser.pyx
@@ -20,49 +20,33 @@ from cpython.ref cimport (
 
 from cassandra.bytesio cimport BytesIOReader
 from cassandra.datatypes cimport DataType
-
-
-cdef class ColumnParser:
-    """Decode a ResultMessage into a set of columns"""
-    cpdef parse_rows(self, BytesIOReader reader, DataType[::1] datatypes,
-                     protocol_version):
-        raise NotImplementedError
+from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser
 
 
 cdef class ListParser(ColumnParser):
     """Decode a ResultMessage into a list of tuples (or other objects)"""
 
-    cpdef parse_rows(self, BytesIOReader r, DataType[::1] datatypes, ver):
+    cpdef parse_rows(self, BytesIOReader reader, ParseDesc desc):
         cdef Py_ssize_t i, rowcount
-        rowcount = read_int(r)
-        cdef RowParser rowparser = TupleRowParser(len(datatypes), datatypes)
-        return [rowparser.unpack_row(r, ver) for i in range(rowcount)]
+        rowcount = read_int(reader)
+        cdef RowParser rowparser = TupleRowParser()
+        return [rowparser.unpack_row(reader, desc) for i in range(rowcount)]
 
 
 cdef class LazyParser(ColumnParser):
     """Decode a ResultMessage lazily using a generator"""
 
-    cpdef parse_rows(self, BytesIOReader r, DataType[::1] datatypes, ver):
+    cpdef parse_rows(self, BytesIOReader reader, ParseDesc desc):
         # Use a little helper function as closures (generators) are not
         # supported in cpdef methods
-        return parse_rows_lazy(r, self.rowparser, datatypes, ver)
+        return parse_rows_lazy(reader, desc)
 
 
-def parse_rows_lazy(BytesIOReader r, DataType[::1] datatypes, ver):
+def parse_rows_lazy(BytesIOReader reader, ParseDesc desc):
     cdef Py_ssize_t i, rowcount
-    rowcount = read_int(r)
-    cdef RowParser rowparser = TupleRowParser(len(datatypes), datatypes)
-    return (rowparser.unpack_row(r, ver) for i in range(rowcount))
-
-
-cdef class RowParser:
-    """Parser for a single row"""
-
-    cpdef unpack_row(self, BytesIOReader reader, protocol_version):
-        """
-        Unpack a single row of data in a ResultMessage.
-        """
-        raise NotImplementedError
+    rowcount = read_int(reader)
+    cdef RowParser rowparser = TupleRowParser()
+    return (rowparser.unpack_row(reader, desc) for i in range(rowcount))
 
 
 cdef class TupleRowParser(RowParser):
@@ -78,26 +62,19 @@ cdef class TupleRowParser(RowParser):
         into objects
     """
 
-    cdef DataType[::1] datatypes
-    cdef Py_ssize_t size
-
-    def __init__(self, Py_ssize_t n, DataType[::1] datatypes):
-        self.datatypes = datatypes
-        self.size = n
-
-    cpdef unpack_row(self, BytesIOReader reader, protocol_version):
+    cpdef unpack_row(self, BytesIOReader reader, ParseDesc desc):
         cdef char *buf
-        cdef Py_ssize_t i, bufsize, rowsize = self.size
+        cdef Py_ssize_t i, bufsize, rowsize = desc.rowsize
         cdef DataType dt
-        cdef tuple res = PyTuple_New(self.size)
+        cdef tuple res = PyTuple_New(desc.rowsize)
 
         for i in range(rowsize):
             buf = get_buf(reader, &bufsize)
             if buf == NULL:
                 val = None
             else:
-                dt = self.datatypes[i]
-                val = dt.deserialize(buf, bufsize, protocol_version)
+                dt = desc.datatypes[i]
+                val = dt.deserialize(buf, bufsize, desc.protocol_version)
 
             Py_INCREF(val)
             PyTuple_SET_ITEM(res, i, val)
diff --git a/cassandra/parsing.pxd b/cassandra/parsing.pxd
new file mode 100644
index 00000000..c4774385
--- /dev/null
+++ b/cassandra/parsing.pxd
@@ -0,0 +1,16 @@
+from cassandra.bytesio cimport BytesIOReader
+from cassandra.datatypes cimport DataType
+
+cdef class ParseDesc:
+    cdef public object colnames
+    cdef public object coltypes
+    cdef DataType[::1] datatypes
+    cdef public object protocol_version
+    cdef Py_ssize_t rowsize
+
+cdef class ColumnParser:
+    cpdef parse_rows(self, BytesIOReader reader, ParseDesc desc)
+
+cdef class RowParser:
+    cpdef unpack_row(self, BytesIOReader reader, ParseDesc desc)
+
diff --git a/cassandra/parsing.pyx b/cassandra/parsing.pyx
new file mode 100644
index 00000000..71196d14
--- /dev/null
+++ b/cassandra/parsing.pyx
@@ -0,0 +1,30 @@
+"""
+Module containing the definitions and declarations (parsing.pxd) for parsers.
+"""
+
+cdef class ParseDesc:
+    """Description of what structure to parse"""
+
+    def __init__(self, colnames, coltypes, datatypes, protocol_version):
+        self.colnames = colnames
+        self.coltypes = coltypes
+        self.datatypes = datatypes
+        self.protocol_version = protocol_version
+        self.rowsize = len(colnames)
+
+
+cdef class ColumnParser:
+    """Decode a ResultMessage into a set of columns"""
+
+    cpdef parse_rows(self, BytesIOReader reader, ParseDesc desc):
+        raise NotImplementedError
+
+
+cdef class RowParser:
+    """Parser for a single row"""
+
+    cpdef unpack_row(self, BytesIOReader reader, ParseDesc desc):
+        """
+        Unpack a single row of data in a ResultMessage.
+        """
+        raise NotImplementedError

From 51e090cc61a4a2ace3772556a109ac1968602d30 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Fri, 31 Jul 2015 17:49:24 +0100
Subject: [PATCH 16/70] Experiment with various optimizations

---
 cassandra/datatypes.pyx   |  2 ++
 cassandra/marshal.pyx     |  7 ++--
 cassandra/numpyparser.pyx | 76 ++++++++++++++++++---------------------
 3 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/cassandra/datatypes.pyx b/cassandra/datatypes.pyx
index 24dd18e6..272435b2 100644
--- a/cassandra/datatypes.pyx
+++ b/cassandra/datatypes.pyx
@@ -1,3 +1,5 @@
+# -- cython: profile=True
+
 include 'marshal.pyx'
 
 from cython.view cimport array as cython_array
diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index 92fb1293..cc80461b 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -53,12 +53,15 @@ cdef inline void swap_order(char *buf, Py_ssize_t size):
     cdef char c
 
     if is_little_endian:
-        for i in range(size//2):
+        for i in range(div2(size)):
             end = size - i - 1
             c = buf[i]
             buf[i] = buf[end]
             buf[end] = c
 
+cdef inline Py_ssize_t div2(Py_ssize_t x):
+    return x >> 1
+
 ### Packing and unpacking of signed integers
 
 cpdef inline bytes int64_pack(int64_t x):
@@ -80,9 +83,9 @@ cpdef inline bytes int32_pack(int32_t x):
 cpdef inline int32_t int32_unpack(const char *buf):
     cdef int32_t x = (<int32_t *> buf)[0]
     cdef char *p = <char *> &x
-    swap_order(<char *> &x, 4)
     # if is_little_endian:
     #     p[0], p[1], p[2], p[3] = p[3], p[2], p[1], p[0]
+    swap_order(<char *> &x, 4)
     return x
 
 cpdef inline bytes int16_pack(int16_t x):
diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 3dd28286..9360c247 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -1,3 +1,5 @@
+# -- cython: profile=True
+
 """
 This module provider an optional protocol parser that returns
 NumPy arrays.
@@ -10,6 +12,7 @@ as numpy is an optional dependency.
 
 include "ioutils.pyx"
 
+cimport cython
 from libc.stdint cimport uint64_t
 from cpython.ref cimport Py_INCREF, PyObject
 
@@ -62,15 +65,17 @@ cdef class NumpyParser(ColumnParser):
 
     cpdef parse_rows(self, BytesIOReader reader, ParseDesc desc):
         cdef Py_ssize_t i, rowcount
+        cdef ArrDesc[::1] array_descs
+        cdef ArrDesc *arrs
 
         rowcount = read_int(reader)
         array_descs, arrays = make_arrays(desc, rowcount)
-        cdef RowParser rowparser = NumPyRowParser(array_descs)
-        for i in range(rowcount):
-            rowparser.unpack_row(reader, desc)
+        arrs = &array_descs[0]
 
-        # arrays = map(make_native_byteorder, arrays)
-        return arrays
+        for i in range(rowcount):
+            unpack_row(reader, desc, arrs)
+
+        return [make_native_byteorder(arr) for arr in arrays]
         # return pd.DataFrame(dict(zip(desc.colnames, arrays)))
 
 
@@ -92,7 +97,7 @@ def make_arrays(ParseDesc desc, array_size):
         arr = make_array(coltype, array_size)
         array_descs[i]['buf_ptr'] = arr.ctypes.data
         array_descs[i]['stride'] = arr.strides[0]
-        array_descs[i]['is_object'] = coltype in _cqltype_to_numpy
+        array_descs[i]['is_object'] = coltype not in _cqltype_to_numpy
         arrays.append(arr)
 
     return array_descs, arrays
@@ -108,48 +113,37 @@ def make_array(coltype, array_size):
 
 #### Parse rows into NumPy arrays
 
-cdef class NumPyRowParser(RowParser):
-    """
-    This is a row parser that copies bytes into arrays (e.g. NumPy arrays)
-    for types it recognizes, such as int64. Values of other types are
-    converted to objects.
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline int unpack_row(
+        BytesIOReader reader, ParseDesc desc, ArrDesc *arrays) except -1:
+    cdef char *buf
+    cdef Py_ssize_t i, bufsize, rowsize = desc.rowsize
+    cdef ArrDesc arr
+    cdef DataType dt
 
-    NOTE: This class is stateful, in that every time unpack_row is called it
-          advanced the pointer into the array by updates the buf_ptr field
-          of self.arrays
-    """
+    for i in range(rowsize):
+        buf = get_buf(reader, &bufsize)
+        if buf == NULL:
+            raise ValueError("Unexpected end of stream")
 
-    cdef ArrDesc[::1] arrays
+        arr = arrays[i]
 
-    def __init__(self, ArrDesc[::1] arrays):
-        self.arrays = arrays
+        if arr.is_object:
+            dt = desc.datatypes[i]
+            val = dt.deserialize(buf, bufsize, desc.protocol_version)
+            Py_INCREF(val)
+            (<PyObject **> arr.buf_ptr)[0] = <PyObject *> val
+        else:
+            memcopy(buf, <char *> arr.buf_ptr, bufsize)
 
-    cpdef unpack_row(self, BytesIOReader reader, ParseDesc desc):
-        cdef char *buf
-        cdef Py_ssize_t i, bufsize, rowsize = desc.rowsize
-        cdef ArrDesc arr
-        cdef DataType dt
+        # Update the pointer into the array for the next time
+        arrays[i].buf_ptr += arr.stride
 
-        for i in range(rowsize):
-            buf = get_buf(reader, &bufsize)
-            if buf == NULL:
-                raise ValueError("Unexpected end of stream")
-
-            arr = self.arrays[i]
-
-            if arr.is_object:
-                dt = desc.datatypes[i]
-                val = dt.deserialize(buf, bufsize, desc.protocol_version)
-                Py_INCREF(val)
-                (<PyObject **> arr.buf_ptr)[0] = <PyObject *> val
-            else:
-                memcopy(buf, <char *> arr.buf_ptr, bufsize)
-
-            # Update the pointer into the array for the next time
-            self.arrays[i].buf_ptr += arr.stride
+    return 0
 
 
-cdef inline memcopy(char *src, char *dst, Py_ssize_t size):
+cdef inline void memcopy(char *src, char *dst, Py_ssize_t size):
     """
     Our own simple memcopy which can be inlined. This is useful because our data types
     are only a few bytes.

From 24e03f7f1409b7edb7b5f96025159c83608d6710 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Tue, 4 Aug 2015 10:56:28 +0100
Subject: [PATCH 17/70] Clean up some old code

---
 cassandra/bytesio.pyx                 |   1 -
 cassandra/cython_protocol_handler.pyx | 118 --------------------------
 cassandra/ioutils.pyx                 |   1 -
 3 files changed, 120 deletions(-)

diff --git a/cassandra/bytesio.pyx b/cassandra/bytesio.pyx
index d392b23f..b18b1aa5 100644
--- a/cassandra/bytesio.pyx
+++ b/cassandra/bytesio.pyx
@@ -53,4 +53,3 @@ class PyBytesIOReader(BytesIOReader):
         r = self.buf[self.pos:newpos]
         self.pos = newpos
         return r
-
diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
index 98c7f1d6..6ef3ae9f 100644
--- a/cassandra/cython_protocol_handler.pyx
+++ b/cassandra/cython_protocol_handler.pyx
@@ -1,17 +1,7 @@
 # -- cython: profile=True
 
-from libc.stdint cimport int64_t, int32_t
-
-# from cassandra.marshal cimport (int8_pack, int8_unpack, int16_pack, int16_unpack,
-#                                 uint16_pack, uint16_unpack, uint32_pack, uint32_unpack,
-#                                 int32_pack, int32_unpack, int64_pack, int64_unpack, float_pack, float_unpack, double_pack, double_unpack)
-
-# from cassandra.marshal import varint_pack, varint_unpack
-# from cassandra import util
-# from cassandra.cqltypes import EMPTY, LongType
 from cassandra.protocol import ResultMessage, ProtocolHandler
 
-# from cassandra.bytesio cimport BytesIOReader
 from cassandra.parsing cimport ParseDesc, ColumnParser
 from cassandra.datatypes import make_datatypes
 from cassandra.objparser import ListParser
@@ -80,111 +70,3 @@ def make_protocol_handler(colparser=ListParser()):
         message_types_by_opcode = my_opcodes
 
     return CythonProtocolHandler
-
-
-# cdef parse_rows2(BytesIOReader reader, list colnames, list coltypes, protocol_version):
-#     cdef Py_ssize_t i, rowcount
-#     cdef char *raw_val
-#     cdef int[::1] colcodes
-#
-#     colcodes = np.array(
-#                 [FastResultMessage.code_to_type.get(coltype, -1) for coltype in coltypes],
-#                 dtype=np.dtype('i'))
-#
-#     rowcount = read_int(reader)
-#     # return RowIterator(reader, coltypes, colcodes, protocol_version, rowcount)
-#     return [parse_row(reader, coltypes, colcodes, protocol_version)
-#                 for i in range(rowcount)]
-#
-#
-# cdef class RowIterator:
-#     """
-#     Result iterator for a set of rows
-#
-#     There seems to be an issue with generator expressions + memoryviews, so we
-#     have a special iterator class instead.
-#     """
-#
-#     cdef list coltypes
-#     cdef int[::1] colcodes
-#     cdef Py_ssize_t rowcount, pos
-#     cdef BytesIOReader reader
-#     cdef object protocol_version
-#
-#     def __init__(self, reader, coltypes, colcodes, protocol_version, rowcount):
-#         self.reader = reader
-#         self.coltypes = coltypes
-#         self.colcodes = colcodes
-#         self.protocol_version = protocol_version
-#         self.rowcount = rowcount
-#         self.pos = 0
-#
-#     def __iter__(self):
-#         return self
-#
-#     def __next__(self):
-#         if self.pos >= self.rowcount:
-#             raise StopIteration
-#         self.pos += 1
-#         return parse_row(self.reader, self.coltypes, self.colcodes, self.protocol_version)
-#
-#     next = __next__
-#
-#
-# cdef inline parse_row(BytesIOReader reader, list coltypes, int[::1] colcodes,
-#                       protocol_version):
-#     cdef Py_ssize_t j
-#
-#     row = []
-#     for j, ctype in enumerate(coltypes):
-#         raw_val_size = read_int(reader)
-#         if raw_val_size < 0:
-#             val = None
-#         else:
-#             raw_val = reader.read(raw_val_size)
-#             val = from_binary(ctype, colcodes[j], raw_val,
-#                               raw_val_size, protocol_version)
-#         row.append(val)
-#
-#     return row
-#
-#
-# cdef inline from_binary(cqltype, int typecode, char *byts, int32_t size, protocol_version):
-#     """
-#     Deserialize a bytestring into a value. See the deserialize() method
-#     for more information. This method differs in that if None or the empty
-#     string is passed in, None may be returned.
-#
-#     This method provides a fast-path deserialization routine.
-#     """
-#     if size == 0 and cqltype.empty_binary_ok:
-#         return empty(cqltype)
-#     return deserialize(cqltype, typecode, byts, size, protocol_version)
-#
-#
-# cdef empty(cqltype):
-#     return EMPTY if cqltype.support_empty_values else None
-#
-#
-# def to_binary(cqltype, val, protocol_version):
-#     """
-#     Serialize a value into a bytestring. See the serialize() method for
-#     more information. This method differs in that if None is passed in,
-#     the result is the empty string.
-#     """
-#     return b'' if val is None else cqltype.serialize(val, protocol_version)
-#
-# cdef DataType obj = Int64()
-#
-# cdef deserialize(cqltype, int typecode, char *byts, int32_t size, protocol_version):
-#     # if typecode == typecodes.LongType:
-#     #     # return int64_unpack(byts)
-#     #     return obj.deserialize(byts, size, protocol_version)
-#     # else:
-#     # return deserialize_generic(cqltype, typecode, byts, size, protocol_version)
-#     return cqltype.deserialize(byts[:size], protocol_version)
-#
-# cdef deserialize_generic(cqltype, int typecode, char *byts, int32_t size,
-#         protocol_version):
-#     return cqltype.deserialize(byts[:size], protocol_version)
-#
\ No newline at end of file
diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx
index 8749457b..41d50851 100644
--- a/cassandra/ioutils.pyx
+++ b/cassandra/ioutils.pyx
@@ -18,4 +18,3 @@ cdef inline char *get_buf(BytesIOReader reader, Py_ssize_t *size_out):
 
 cdef inline int32_t read_int(BytesIOReader reader):
     return int32_unpack(reader.read(4))
-

From e671354ebfa887d9dafcea24819ae529aa0a52c8 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Tue, 4 Aug 2015 11:11:56 +0100
Subject: [PATCH 18/70] Improve error handling for deserialization

---
 cassandra/bytesio.pxd     |  2 +-
 cassandra/bytesio.pyx     | 15 +++++++++------
 cassandra/ioutils.pyx     |  9 ++++-----
 cassandra/numpyparser.pyx |  4 ----
 cassandra/objparser.pyx   |  7 ++-----
 5 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/cassandra/bytesio.pxd b/cassandra/bytesio.pxd
index 349fd600..9754dd23 100644
--- a/cassandra/bytesio.pxd
+++ b/cassandra/bytesio.pxd
@@ -3,5 +3,5 @@ cdef class BytesIOReader:
     cdef char *buf_ptr
     cdef Py_ssize_t pos
     cdef Py_ssize_t size
-    cdef char *read(self, Py_ssize_t n = ?)
+    cdef char *read(self, Py_ssize_t n = ?) except NULL
 
diff --git a/cassandra/bytesio.pyx b/cassandra/bytesio.pyx
index b18b1aa5..68796120 100644
--- a/cassandra/bytesio.pyx
+++ b/cassandra/bytesio.pyx
@@ -11,7 +11,7 @@ cdef class BytesIOReader:
         self.size = len(buf)
         self.buf_ptr = self.buf
 
-    cdef char *read(self, Py_ssize_t n = -1):
+    cdef char *read(self, Py_ssize_t n = -1) except NULL:
         """Read at most size bytes from the file
         (less if the read hits EOF before obtaining size bytes).
 
@@ -24,13 +24,16 @@ cdef class BytesIOReader:
 
         if n < 0:
             newpos = self.size
-        elif newpos > self.size:
-            self.pos = self.size
-            return b''
+
+        if newpos > self.size:
+            # Raise an error here, as we do not want the caller to consume past the
+            # end of the buffer
+            raise EOFError("Cannot read past the end of the file")
         else:
             res = self.buf_ptr + self.pos
-            self.pos = newpos
-            return res
+
+        self.pos = newpos
+        return res
 
 
 class PyBytesIOReader(BytesIOReader):
diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx
index 41d50851..db3ce633 100644
--- a/cassandra/ioutils.pyx
+++ b/cassandra/ioutils.pyx
@@ -3,7 +3,7 @@ from libc.stdint cimport int32_t
 from cassandra.bytesio cimport BytesIOReader
 
 
-cdef inline char *get_buf(BytesIOReader reader, Py_ssize_t *size_out):
+cdef inline char *get_buf(BytesIOReader reader, Py_ssize_t *size_out) except NULL:
     """
     Get a pointer into the buffer provided by BytesIOReader for the
     next data item in the stream of values.
@@ -11,10 +11,9 @@ cdef inline char *get_buf(BytesIOReader reader, Py_ssize_t *size_out):
     raw_val_size = read_int(reader)
     size_out[0] = raw_val_size
     if raw_val_size < 0:
-        return NULL
-    else:
-        return reader.read(raw_val_size)
+        raise ValueError("Expected positive item size")
+    return reader.read(raw_val_size)
 
 
-cdef inline int32_t read_int(BytesIOReader reader):
+cdef inline int32_t read_int(BytesIOReader reader) except ?0xDEAD:
     return int32_unpack(reader.read(4))
diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 9360c247..936a3f99 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -23,7 +23,6 @@ from cassandra import cqltypes
 from cassandra.util import is_little_endian
 
 import numpy as np
-import pandas as pd
 
 
 cdef extern from "numpyFlags.h":
@@ -124,9 +123,6 @@ cdef inline int unpack_row(
 
     for i in range(rowsize):
         buf = get_buf(reader, &bufsize)
-        if buf == NULL:
-            raise ValueError("Unexpected end of stream")
-
         arr = arrays[i]
 
         if arr.is_object:
diff --git a/cassandra/objparser.pyx b/cassandra/objparser.pyx
index da6e6c01..6ae614b9 100644
--- a/cassandra/objparser.pyx
+++ b/cassandra/objparser.pyx
@@ -70,11 +70,8 @@ cdef class TupleRowParser(RowParser):
 
         for i in range(rowsize):
             buf = get_buf(reader, &bufsize)
-            if buf == NULL:
-                val = None
-            else:
-                dt = desc.datatypes[i]
-                val = dt.deserialize(buf, bufsize, desc.protocol_version)
+            dt = desc.datatypes[i]
+            val = dt.deserialize(buf, bufsize, desc.protocol_version)
 
             Py_INCREF(val)
             PyTuple_SET_ITEM(res, i, val)

From 9a72f8d5cddffd8a5e8e3dc5878683623079535e Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Tue, 4 Aug 2015 14:21:39 +0100
Subject: [PATCH 19/70] Start on unit tests for Cython code

---
 cassandra/bytesio.pyx                    | 30 ++----------------------
 cassandra/datatypes.pyx                  |  1 +
 setup.py                                 |  2 ++
 tests/unit/cython/__init__.py            |  0
 tests/unit/cython/bytesio_testhelper.pyx | 30 ++++++++++++++++++++++++
 tests/unit/cython/dummy_module.pyx       |  2 ++
 tests/unit/cython/test_bytesio.py        | 21 +++++++++++++++++
 tests/unit/cython/utils.py               | 27 +++++++++++++++++++++
 8 files changed, 85 insertions(+), 28 deletions(-)
 create mode 100644 tests/unit/cython/__init__.py
 create mode 100644 tests/unit/cython/bytesio_testhelper.pyx
 create mode 100644 tests/unit/cython/dummy_module.pyx
 create mode 100644 tests/unit/cython/test_bytesio.py
 create mode 100644 tests/unit/cython/utils.py

diff --git a/cassandra/bytesio.pyx b/cassandra/bytesio.pyx
index 68796120..eb81c2fe 100644
--- a/cassandra/bytesio.pyx
+++ b/cassandra/bytesio.pyx
@@ -20,39 +20,13 @@ cdef class BytesIOReader:
         string is returned when EOF is encountered immediately.
         """
         cdef Py_ssize_t newpos = self.pos + n
-        cdef char *res
-
         if n < 0:
             newpos = self.size
-
-        if newpos > self.size:
+        elif newpos > self.size:
             # Raise an error here, as we do not want the caller to consume past the
             # end of the buffer
             raise EOFError("Cannot read past the end of the file")
-        else:
-            res = self.buf_ptr + self.pos
 
+        cdef char *res = self.buf_ptr + self.pos
         self.pos = newpos
         return res
-
-
-class PyBytesIOReader(BytesIOReader):
-    """
-    Python-compatible BytesIOReader class
-    """
-
-    def read(self, n = -1):
-        """Read at most size bytes from the file
-        (less if the read hits EOF before obtaining size bytes).
-
-        If the size argument is negative or omitted, read all data until EOF
-        is reached. The bytes are returned as a string object. An empty
-        string is returned when EOF is encountered immediately.
-        """
-        if n is None or n < 0:
-            newpos = self.len
-        else:
-            newpos = min(self.pos+n, self.len)
-        r = self.buf[self.pos:newpos]
-        self.pos = newpos
-        return r
diff --git a/cassandra/datatypes.pyx b/cassandra/datatypes.pyx
index 272435b2..b0c1adb2 100644
--- a/cassandra/datatypes.pyx
+++ b/cassandra/datatypes.pyx
@@ -60,3 +60,4 @@ def obj_array(list objs):
         arr[i] = obj
     return arr
 
+
diff --git a/setup.py b/setup.py
index 7fe2631a..ce5e5166 100644
--- a/setup.py
+++ b/setup.py
@@ -274,6 +274,8 @@ if "--no-cython" not in sys.argv:
             exclude_failures=True))
         extensions.extend(cythonize("cassandra/*.pyx",
             compiler_directives=directives))
+        extensions.extend(cythonize("tests/unit/cython/*.pyx",
+            compiler_directives=directives))
     except ImportError:
         sys.stderr.write("Cython is not installed. Not compiling core driver files as extensions (optional).")
 
diff --git a/tests/unit/cython/__init__.py b/tests/unit/cython/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/cython/bytesio_testhelper.pyx b/tests/unit/cython/bytesio_testhelper.pyx
new file mode 100644
index 00000000..7f898c4c
--- /dev/null
+++ b/tests/unit/cython/bytesio_testhelper.pyx
@@ -0,0 +1,30 @@
+from cassandra.bytesio cimport BytesIOReader
+
+def test_read1(assert_equal, assert_raises):
+    cdef BytesIOReader reader = BytesIOReader(b'abcdef')
+    assert_equal(reader.read(2)[:2], b'ab')
+    assert_equal(reader.read(2)[:2], b'cd')
+    assert_equal(reader.read(0)[:0], b'')
+    assert_equal(reader.read(2)[:2], b'ef')
+
+def test_read2(assert_equal, assert_raises):
+    cdef BytesIOReader reader = BytesIOReader(b'abcdef')
+    reader.read(5)
+    reader.read(1)
+
+def test_read3(assert_equal, assert_raises):
+    cdef BytesIOReader reader = BytesIOReader(b'abcdef')
+    reader.read(6)
+
+def test_read_eof(assert_equal, assert_raises):
+    cdef BytesIOReader reader = BytesIOReader(b'abcdef')
+    reader.read(5)
+    # cannot convert reader.read to an object, do it manually
+    # assert_raises(EOFError, reader.read, 2)
+    try:
+        reader.read(2)
+    except EOFError:
+        pass
+    else:
+        raise Exception("Expected an EOFError")
+    reader.read(1) # see that we can still read this
diff --git a/tests/unit/cython/dummy_module.pyx b/tests/unit/cython/dummy_module.pyx
new file mode 100644
index 00000000..8bd1206b
--- /dev/null
+++ b/tests/unit/cython/dummy_module.pyx
@@ -0,0 +1,2 @@
+# This is a dummy module used by utils.py to determine whether
+# cassandra was build with Cython
\ No newline at end of file
diff --git a/tests/unit/cython/test_bytesio.py b/tests/unit/cython/test_bytesio.py
new file mode 100644
index 00000000..65cc463a
--- /dev/null
+++ b/tests/unit/cython/test_bytesio.py
@@ -0,0 +1,21 @@
+from tests.unit.cython.utils import cyimport, cythontest
+bytesio_testhelper = cyimport('tests.unit.cython.bytesio_testhelper')
+
+try:
+    import unittest2 as unittest
+except ImportError:
+    import unittest  # noqa
+
+
+class BytesIOTest(unittest.TestCase):
+    """Test Cython BytesIO proxy"""
+
+    @cythontest
+    def test_reading(self):
+        bytesio_testhelper.test_read1(self.assertEqual, self.assertRaises)
+        bytesio_testhelper.test_read2(self.assertEqual, self.assertRaises)
+        bytesio_testhelper.test_read3(self.assertEqual, self.assertRaises)
+
+    @cythontest
+    def test_reading_error(self):
+        bytesio_testhelper.test_read_eof(self.assertEqual, self.assertRaises)
diff --git a/tests/unit/cython/utils.py b/tests/unit/cython/utils.py
new file mode 100644
index 00000000..eea4698f
--- /dev/null
+++ b/tests/unit/cython/utils.py
@@ -0,0 +1,27 @@
+try:
+    import tests.unit.cython.dummy_module
+except ImportError:
+    have_cython = False
+else:
+    have_cython = True
+
+try:
+    import unittest2 as unittest
+except ImportError:
+    import unittest  # noqa
+
+def cyimport(import_path):
+    """
+    Import a Cython module if available, otherwise return None
+    (and skip any relevant tests).
+    """
+    try:
+        return __import__(import_path, fromlist=True)
+    except ImportError:
+        if have_cython:
+            raise
+        return None
+
+# @cythontest
+# def test_something(self): ...
+cythontest = unittest.skipUnless(have_cython, 'Cython is not available')

From 27e3505ececf009dd915b0c299036ed54e45aa6d Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Tue, 4 Aug 2015 14:41:02 +0100
Subject: [PATCH 20/70] Fix some issues with integration tests

---
 tests/integration/__init__.py          | 2 +-
 tests/integration/standard/__init__.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
index 057da5c8..f609492f 100644
--- a/tests/integration/__init__.py
+++ b/tests/integration/__init__.py
@@ -160,7 +160,7 @@ def remove_cluster():
                 CCM_CLUSTER.remove()
                 CCM_CLUSTER = None
                 return
-            except WindowsError:
+            except OSError:
                 ex_type, ex, tb = sys.exc_info()
                 log.warn("{0}: {1} Backtrace: {2}".format(ex_type.__name__, ex, traceback.extract_tb(tb)))
                 del tb
diff --git a/tests/integration/standard/__init__.py b/tests/integration/standard/__init__.py
index 794d75bf..484ed237 100644
--- a/tests/integration/standard/__init__.py
+++ b/tests/integration/standard/__init__.py
@@ -11,6 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+try:
+    import unittest2 as unittest
+except ImportError:
+    import unittest  # noqa
+
 try:
     from ccmlib import common
 except ImportError as e:

From 99dea50c735d2ca3296f20ee0bbc208de5a4e881 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Tue, 4 Aug 2015 16:50:29 +0100
Subject: [PATCH 21/70] Add cython protocol integration test

---
 .gitignore                                    |  3 +
 .../standard/test_custom_protocol_handler.py  | 44 +----------
 .../standard/test_cython_protocol_handlers.py | 75 +++++++++++++++++++
 3 files changed, 82 insertions(+), 40 deletions(-)
 create mode 100644 tests/integration/standard/test_cython_protocol_handlers.py

diff --git a/.gitignore b/.gitignore
index ee93232c..42aa53e4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 *.so
 *.egg
 *.egg-info
+*.attr
 .tox
 .python-version
 build
@@ -19,6 +20,7 @@ setuptools*.egg
 
 cassandra/*.c
 !cassandra/murmur3.c
+cassandra/*.html
 
 # OSX
 .DS_Store
@@ -38,3 +40,4 @@ cassandra/*.c
 
 #iPython
 *.ipynb
+
diff --git a/tests/integration/standard/test_custom_protocol_handler.py b/tests/integration/standard/test_custom_protocol_handler.py
index 61a23831..856e6979 100644
--- a/tests/integration/standard/test_custom_protocol_handler.py
+++ b/tests/integration/standard/test_custom_protocol_handler.py
@@ -21,7 +21,8 @@ from cassandra.protocol import ProtocolHandler, ResultMessage, UUIDType, read_in
 from cassandra.query import tuple_factory
 from cassandra.cluster import Cluster
 from tests.integration import use_singledc, PROTOCOL_VERSION, execute_until_pass
-from tests.integration.datatype_utils import update_datatypes, PRIMITIVE_DATATYPES, get_sample
+from tests.integration.datatype_utils import update_datatypes, PRIMITIVE_DATATYPES
+from tests.integration.standard.utils import create_table_with_all_types, get_all_primitive_params
 from six import binary_type
 
 import uuid
@@ -106,11 +107,11 @@ class CustomProtocolHandlerTest(unittest.TestCase):
         session.client_protocol_handler = CustomProtocolHandlerResultMessageTracked
         session.row_factory = tuple_factory
 
-        columns_string = create_table_with_all_types("test_table", session)
+        columns_string = create_table_with_all_types("alltypes", session)
 
         # verify data
         params = get_all_primitive_params()
-        results = session.execute("SELECT {0} FROM alltypes WHERE zz=0".format(columns_string))[0]
+        results = session.execute("SELECT {0} FROM alltypes WHERE pimkey=0".format(columns_string))[0]
         for expected, actual in zip(params, results):
             self.assertEqual(actual, expected)
         # Ensure we have covered the various primitive types
@@ -118,43 +119,6 @@ class CustomProtocolHandlerTest(unittest.TestCase):
         session.shutdown()
 
 
-def create_table_with_all_types(table_name, session):
-    """
-    Method that given a table_name and session construct a table that contains all possible primitive types
-    :param table_name: Name of table to create
-    :param session: session to use for table creation
-    :return: a string containing and columns. This can be used to query the table.
-    """
-    # create table
-    alpha_type_list = ["zz int PRIMARY KEY"]
-    col_names = ["zz"]
-    start_index = ord('a')
-    for i, datatype in enumerate(PRIMITIVE_DATATYPES):
-        alpha_type_list.append("{0} {1}".format(chr(start_index + i), datatype))
-        col_names.append(chr(start_index + i))
-
-    session.execute("CREATE TABLE alltypes ({0})".format(', '.join(alpha_type_list)), timeout=120)
-
-    # create the input
-    params = get_all_primitive_params()
-
-    # insert into table as a simple statement
-    columns_string = ', '.join(col_names)
-    placeholders = ', '.join(["%s"] * len(col_names))
-    session.execute("INSERT INTO alltypes ({0}) VALUES ({1})".format(columns_string, placeholders), params, timeout=120)
-    return columns_string
-
-
-def get_all_primitive_params():
-    """
-    Simple utility method used to give back a list of all possible primitive data sample types.
-    """
-    params = [0]
-    for datatype in PRIMITIVE_DATATYPES:
-        params.append((get_sample(datatype)))
-    return params
-
-
 class CustomResultMessageRaw(ResultMessage):
     """
     This is a custom Result Message that is used to return raw results, rather then
diff --git a/tests/integration/standard/test_cython_protocol_handlers.py b/tests/integration/standard/test_cython_protocol_handlers.py
new file mode 100644
index 00000000..35b131a9
--- /dev/null
+++ b/tests/integration/standard/test_cython_protocol_handlers.py
@@ -0,0 +1,75 @@
+"""Test the various Cython-based message deserializers"""
+
+# Based on test_custom_protocol_handler.py
+
+try:
+    import unittest2 as unittest
+except ImportError:
+    import unittest
+
+from cassandra.cluster import Cluster
+from tests.integration import use_singledc, PROTOCOL_VERSION
+from tests.integration.datatype_utils import update_datatypes
+from tests.integration.standard.utils import create_table_with_all_types, get_all_primitive_params
+from six import next
+
+try:
+    from cassandra.cython_protocol_handler import make_protocol_handler
+except ImportError as e:
+    raise unittest.skip("Skipping test, not compiled with Cython enabled")
+
+from cassandra.numpyparser import NumpyParser
+from cassandra.objparser import ListParser, LazyParser
+
+
+def setup_module():
+    use_singledc()
+    update_datatypes()
+
+
+class CustomProtocolHandlerTest(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION)
+        cls.session = cls.cluster.connect()
+        cls.session.execute("CREATE KEYSPACE testspace WITH replication = "
+                            "{ 'class' : 'SimpleStrategy', 'replication_factor': '1'}")
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.session.execute("DROP KEYSPACE testspace")
+        cls.cluster.shutdown()
+
+    def test_cython_parser(self):
+        """
+        Test Cython-based parser that returns a list of tuples
+        """
+        self.cython_parser(ListParser())
+
+    def test_cython_lazy_parser(self):
+        """
+        Test Cython-based parser that returns a list of tuples
+        """
+        self.cython_parser(LazyParser())
+
+    def cython_parser(self, colparser):
+        session = Cluster().connect()
+        session.set_keyspace("smallspace")
+
+        # use our custom protocol handler
+        session.client_protocol_handler = make_protocol_handler(colparser)
+        # session.row_factory = tuple_factory
+        create_table_with_all_types("test_table", session)
+
+        # verify data
+        params = get_all_primitive_params()
+        [first_result] = session.execute("SELECT * FROM test_table WHERE primkey=0")
+        self.assertEqual(len(params), len(first_result),
+                         msg="Not the right number of columns?")
+        print(first_result)
+        assert False
+        for expected, actual in zip(params, first_result):
+            self.assertEqual(actual, expected)
+
+        session.shutdown()

From 5996aa622da9dc3404bd1bdefbc0d941595e8a6e Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Tue, 4 Aug 2015 17:29:23 +0100
Subject: [PATCH 22/70] Some fixes to cython integration test

---
 .../standard/test_custom_protocol_handler.py          |  6 +++---
 .../standard/test_cython_protocol_handlers.py         | 11 +++++------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/tests/integration/standard/test_custom_protocol_handler.py b/tests/integration/standard/test_custom_protocol_handler.py
index 856e6979..edd066be 100644
--- a/tests/integration/standard/test_custom_protocol_handler.py
+++ b/tests/integration/standard/test_custom_protocol_handler.py
@@ -63,7 +63,7 @@ class CustomProtocolHandlerTest(unittest.TestCase):
         """
 
         # Ensure that we get normal uuid back first
-        session = Cluster().connect()
+        session = Cluster(protocol_version=PROTOCOL_VERSION).connect(keyspace="custserdes")
         session.row_factory = tuple_factory
         result_set = session.execute("SELECT schema_version FROM system.local")
         result = result_set.pop()
@@ -103,7 +103,7 @@ class CustomProtocolHandlerTest(unittest.TestCase):
         @test_category data_types:serialization
         """
         # Connect using a custom protocol handler that tracks the various types the result message is used with.
-        session = Cluster().connect(keyspace="custserdes")
+        session = Cluster(protocol_version=PROTOCOL_VERSION).connect(keyspace="custserdes")
         session.client_protocol_handler = CustomProtocolHandlerResultMessageTracked
         session.row_factory = tuple_factory
 
@@ -111,7 +111,7 @@ class CustomProtocolHandlerTest(unittest.TestCase):
 
         # verify data
         params = get_all_primitive_params()
-        results = session.execute("SELECT {0} FROM alltypes WHERE pimkey=0".format(columns_string))[0]
+        results = session.execute("SELECT {0} FROM alltypes WHERE primkey=0".format(columns_string))[0]
         for expected, actual in zip(params, results):
             self.assertEqual(actual, expected)
         # Ensure we have covered the various primitive types
diff --git a/tests/integration/standard/test_cython_protocol_handlers.py b/tests/integration/standard/test_cython_protocol_handlers.py
index 35b131a9..059c9317 100644
--- a/tests/integration/standard/test_cython_protocol_handlers.py
+++ b/tests/integration/standard/test_cython_protocol_handlers.py
@@ -16,7 +16,7 @@ from six import next
 try:
     from cassandra.cython_protocol_handler import make_protocol_handler
 except ImportError as e:
-    raise unittest.skip("Skipping test, not compiled with Cython enabled")
+    raise unittest.SkipTest("Skipping test, not compiled with Cython enabled")
 
 from cassandra.numpyparser import NumpyParser
 from cassandra.objparser import ListParser, LazyParser
@@ -35,6 +35,8 @@ class CustomProtocolHandlerTest(unittest.TestCase):
         cls.session = cls.cluster.connect()
         cls.session.execute("CREATE KEYSPACE testspace WITH replication = "
                             "{ 'class' : 'SimpleStrategy', 'replication_factor': '1'}")
+        cls.session.set_keyspace("testspace")
+        create_table_with_all_types("test_table", cls.session)
 
     @classmethod
     def tearDownClass(cls):
@@ -54,21 +56,18 @@ class CustomProtocolHandlerTest(unittest.TestCase):
         self.cython_parser(LazyParser())
 
     def cython_parser(self, colparser):
-        session = Cluster().connect()
-        session.set_keyspace("smallspace")
+        cluster = Cluster(protocol_version=PROTOCOL_VERSION)
+        session = cluster.connect(keyspace="testspace")
 
         # use our custom protocol handler
         session.client_protocol_handler = make_protocol_handler(colparser)
         # session.row_factory = tuple_factory
-        create_table_with_all_types("test_table", session)
 
         # verify data
         params = get_all_primitive_params()
         [first_result] = session.execute("SELECT * FROM test_table WHERE primkey=0")
         self.assertEqual(len(params), len(first_result),
                          msg="Not the right number of columns?")
-        print(first_result)
-        assert False
         for expected, actual in zip(params, first_result):
             self.assertEqual(actual, expected)
 

From 1879d9be31a0faa32e90780f26c016b328b7ea6c Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Wed, 5 Aug 2015 18:42:40 +0100
Subject: [PATCH 23/70] Start on deserializers for cqltypes

---
 cassandra/buffer.pxd                  |   8 ++
 cassandra/buffer.pyx                  |  38 ++++++++++
 cassandra/bytesio.pxd                 |   1 -
 cassandra/cython_protocol_handler.pyx |   5 +-
 cassandra/datatypes.pxd               |   3 -
 cassandra/datatypes.pyx               |  63 ----------------
 cassandra/deserializers.pxd           |   7 ++
 cassandra/deserializers.pyx           | 101 ++++++++++++++++++++++++++
 cassandra/ioutils.pyx                 |  11 ++-
 cassandra/marshal.pyx                 |   5 --
 cassandra/numpyparser.pyx             |  16 ++--
 cassandra/objparser.pyx               |  18 +++--
 cassandra/parsing.pxd                 |   4 +-
 13 files changed, 185 insertions(+), 95 deletions(-)
 create mode 100644 cassandra/buffer.pxd
 create mode 100644 cassandra/buffer.pyx
 delete mode 100644 cassandra/datatypes.pxd
 delete mode 100644 cassandra/datatypes.pyx
 create mode 100644 cassandra/deserializers.pxd
 create mode 100644 cassandra/deserializers.pyx

diff --git a/cassandra/buffer.pxd b/cassandra/buffer.pxd
new file mode 100644
index 00000000..f431d311
--- /dev/null
+++ b/cassandra/buffer.pxd
@@ -0,0 +1,8 @@
+cdef struct Buffer:
+    char *ptr
+    Py_ssize_t size
+
+cdef inline Buffer from_bytes(bytes byts)
+cdef inline bytes to_bytes(Buffer *buf)
+cdef inline char *buf_ptr(Buffer *buf)
+cdef inline Buffer from_ptr_and_size(char *ptr, Py_ssize_t size)
\ No newline at end of file
diff --git a/cassandra/buffer.pyx b/cassandra/buffer.pyx
new file mode 100644
index 00000000..570a7496
--- /dev/null
+++ b/cassandra/buffer.pyx
@@ -0,0 +1,38 @@
+"""
+Simple buffer data structure. This buffer can be included:
+
+    include "buffer.pyx"
+
+or imported:
+
+    from cassanda cimport buffer
+
+but this prevents inlining of the functions below.
+"""
+
+from cpython.bytes cimport PyBytes_AS_STRING
+    # char* PyBytes_AS_STRING(object string)
+    # Macro form of PyBytes_AsString() but without error
+    # checking. Only string objects are supported; no Unicode objects
+    # should be passed.
+
+from cassandra.buffer cimport Buffer
+
+cdef struct Buffer:
+    char *ptr
+    Py_ssize_t size
+
+cdef inline Buffer from_bytes(bytes byts):
+    return from_ptr_and_size(PyBytes_AS_STRING(byts), len(byts))
+
+cdef inline bytes to_bytes(Buffer *buf):
+    return buf.ptr[:buf.size]
+
+cdef inline char *buf_ptr(Buffer *buf):
+    return buf.ptr
+
+cdef inline Buffer from_ptr_and_size(char *ptr, Py_ssize_t size):
+    cdef Buffer res
+    res.ptr = ptr
+    res.size = size
+    return res
\ No newline at end of file
diff --git a/cassandra/bytesio.pxd b/cassandra/bytesio.pxd
index 9754dd23..64bbdcca 100644
--- a/cassandra/bytesio.pxd
+++ b/cassandra/bytesio.pxd
@@ -4,4 +4,3 @@ cdef class BytesIOReader:
     cdef Py_ssize_t pos
     cdef Py_ssize_t size
     cdef char *read(self, Py_ssize_t n = ?) except NULL
-
diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
index 6ef3ae9f..af91c4d7 100644
--- a/cassandra/cython_protocol_handler.pyx
+++ b/cassandra/cython_protocol_handler.pyx
@@ -3,7 +3,7 @@
 from cassandra.protocol import ResultMessage, ProtocolHandler
 
 from cassandra.parsing cimport ParseDesc, ColumnParser
-from cassandra.datatypes import make_datatypes
+from cassandra.deserializers import make_deserializers
 from cassandra.objparser import ListParser
 
 
@@ -21,7 +21,8 @@ def make_recv_results_rows(ColumnParser colparser):
         colnames = [c[2] for c in column_metadata]
         coltypes = [c[3] for c in column_metadata]
 
-        desc = ParseDesc(colnames, coltypes, make_datatypes(coltypes), protocol_version)
+        desc = ParseDesc(colnames, coltypes, make_deserializers(coltypes),
+                         protocol_version)
         reader = BytesIOReader(f.read())
         parsed_rows = colparser.parse_rows(reader, desc)
 
diff --git a/cassandra/datatypes.pxd b/cassandra/datatypes.pxd
deleted file mode 100644
index cd58b6b3..00000000
--- a/cassandra/datatypes.pxd
+++ /dev/null
@@ -1,3 +0,0 @@
-cdef class DataType:
-    cdef object cqltype
-    cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version)
diff --git a/cassandra/datatypes.pyx b/cassandra/datatypes.pyx
deleted file mode 100644
index b0c1adb2..00000000
--- a/cassandra/datatypes.pyx
+++ /dev/null
@@ -1,63 +0,0 @@
-# -- cython: profile=True
-
-include 'marshal.pyx'
-
-from cython.view cimport array as cython_array
-from cassandra.datatypes import Int64, GenericDataType
-from cassandra.cqltypes import LongType
-
-# TODO: Port cqltypes to this module
-
-cdef class DataType:
-    """
-    Cython-based datatype
-    """
-
-    def __init__(self, cqltype):
-        self.cqltype = cqltype
-
-    cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
-        raise NotImplementedError
-
-
-cdef class Int64(DataType):
-
-    cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
-        cdef int64_t x = int64_unpack(buf)
-        return x
-
-    def __str__(self):
-        return "int64"
-
-
-cdef class GenericDataType(DataType):
-    """
-    Wrap a generic datatype for deserialization
-    """
-
-    cdef object deserialize(self, char *buf, Py_ssize_t size, protocol_version):
-        return self.cqltype.deserialize(buf[:size], protocol_version)
-
-    def __str__(self):
-        return "GenericDataType(%s)" % (self.cqltype,)
-
-
-def make_datatypes(coltypes):
-    cdef DataType[::1] datatypes
-    return obj_array([make_datatype(ct) for ct in coltypes])
-
-
-def make_datatype(coltype):
-    return Int64(coltype) if coltype == LongType else GenericDataType(coltype)
-
-
-def obj_array(list objs):
-    """Create a (Cython) array of objects given a list of objects"""
-    cdef object[:] arr
-    arr = cython_array(shape=(len(objs),), itemsize=sizeof(void *), format="O")
-    # arr[:] = objs # This does not work (segmentation faults)
-    for i, obj in enumerate(objs):
-        arr[i] = obj
-    return arr
-
-
diff --git a/cassandra/deserializers.pxd b/cassandra/deserializers.pxd
new file mode 100644
index 00000000..333479f3
--- /dev/null
+++ b/cassandra/deserializers.pxd
@@ -0,0 +1,7 @@
+# -- cython: profile=True
+
+from cassandra.buffer cimport Buffer
+
+cdef class Deserializer:
+    cdef deserialize(self, Buffer *buf, protocol_version)
+    # cdef deserialize(self, CString byts, protocol_version)
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
new file mode 100644
index 00000000..21245364
--- /dev/null
+++ b/cassandra/deserializers.pyx
@@ -0,0 +1,101 @@
+# -- cython: profile=True
+
+include 'marshal.pyx'
+include 'buffer.pyx'
+
+from cython.view cimport array as cython_array
+from decimal import Decimal
+from uuid import UUID
+
+import inspect
+
+cdef class Deserializer:
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        raise NotImplementedError
+
+
+cdef class DesLongType(Deserializer):
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        return int64_unpack(buf.ptr)
+
+
+# TODO: Use libmpdec: http://www.bytereef.org/mpdecimal/index.html
+cdef class DesDecimalType(Deserializer):
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        scale = int32_unpack(buf.ptr)
+        unscaled = varint_unpack(buf.ptr + 4)
+        return Decimal('%de%d' % (unscaled, -scale))
+
+
+cdef class DesUUIDType(Deserializer):
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        return UUID(bytes=to_bytes(buf))
+
+
+cdef class DesBooleanType(Deserializer):
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        return bool(int8_unpack(buf.ptr))
+
+
+cdef class DesByteType(Deserializer):
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        return int8_unpack(buf.ptr)
+
+
+cdef class DesAsciiType(Deserializer):
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        if six.PY2:
+            return to_bytes(buf)
+        return to_bytes(buf).decode('ascii')
+
+
+cdef class DesFloatType(Deserializer):
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        return float_unpack(buf.ptr)
+
+
+cdef class DesDoubleType(Deserializer):
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        return double_unpack(buf.ptr)
+
+
+cdef class DesInt32Type(Deserializer):
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        return int32_unpack(buf.ptr)
+
+
+cdef class GenericDeserializer(Deserializer):
+    """
+    Wrap a generic datatype for deserialization
+    """
+
+    def __init__(self, cqltype):
+        self.cqltype = cqltype
+
+    cdef deserialize(self, Buffer *buf, protocol_version):
+        return self.cqltype.deserialize(to_bytes(buf), protocol_version)
+
+#--------------------------------------------------------------------------
+
+def make_deserializers(cqltypes):
+    """Create an array of Deserializers for each given cqltype in cqltypes"""
+    cdef Deserializer[::1] deserializers
+    return obj_array([find_deserializer(ct) for ct in cqltypes])
+
+
+cpdef Deserializer find_deserializer(cqltype):
+    """Find a deserializer for a cqltype"""
+    deserializer = None
+    if inspect.isclass(cqltype):
+        deserializer = globals().get('Des' + cqltype.__name__)()
+    return deserializer or GenericDeserializer(cqltype)
+
+
+def obj_array(list objs):
+    """Create a (Cython) array of objects given a list of objects"""
+    cdef object[:] arr
+    arr = cython_array(shape=(len(objs),), itemsize=sizeof(void *), format="O")
+    # arr[:] = objs # This does not work (segmentation faults)
+    for i, obj in enumerate(objs):
+        arr[i] = obj
+    return arr
diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx
index db3ce633..0f8c3e3e 100644
--- a/cassandra/ioutils.pyx
+++ b/cassandra/ioutils.pyx
@@ -1,19 +1,22 @@
 include 'marshal.pyx'
+include 'buffer.pyx'
+
 from libc.stdint cimport int32_t
 from cassandra.bytesio cimport BytesIOReader
 
 
-cdef inline char *get_buf(BytesIOReader reader, Py_ssize_t *size_out) except NULL:
+cdef inline int get_buf(BytesIOReader reader, Buffer *buf_out) except -1:
     """
     Get a pointer into the buffer provided by BytesIOReader for the
     next data item in the stream of values.
     """
-    raw_val_size = read_int(reader)
-    size_out[0] = raw_val_size
+    cdef Py_ssize_t raw_val_size = read_int(reader)
     if raw_val_size < 0:
         raise ValueError("Expected positive item size")
-    return reader.read(raw_val_size)
 
+    buf_out.ptr = reader.read(raw_val_size)
+    buf_out.size = raw_val_size
+    return 0
 
 cdef inline int32_t read_int(BytesIOReader reader) except ?0xDEAD:
     return int32_unpack(reader.read(4))
diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index cc80461b..9e1c8ca5 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -71,9 +71,6 @@ cpdef inline int64_t int64_unpack(const char *buf):
     # The 'const' makes sure the buffer is not mutated in-place!
     cdef int64_t x = (<int64_t *> buf)[0]
     cdef char *p = <char *> &x
-    # if is_little_endian:
-    #     p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7] = (
-    #         p[7], p[6], p[5], p[4], p[3], p[2], p[1], p[0])
     swap_order(<char *> &x, 8)
     return x
 
@@ -83,8 +80,6 @@ cpdef inline bytes int32_pack(int32_t x):
 cpdef inline int32_t int32_unpack(const char *buf):
     cdef int32_t x = (<int32_t *> buf)[0]
     cdef char *p = <char *> &x
-    # if is_little_endian:
-    #     p[0], p[1], p[2], p[3] = p[3], p[2], p[1], p[0]
     swap_order(<char *> &x, 4)
     return x
 
diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 936a3f99..149843a6 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -17,7 +17,7 @@ from libc.stdint cimport uint64_t
 from cpython.ref cimport Py_INCREF, PyObject
 
 from cassandra.bytesio cimport BytesIOReader
-from cassandra.datatypes cimport DataType
+from cassandra.deserializers cimport Deserializer
 from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser
 from cassandra import cqltypes
 from cassandra.util import is_little_endian
@@ -116,22 +116,22 @@ def make_array(coltype, array_size):
 @cython.wraparound(False)
 cdef inline int unpack_row(
         BytesIOReader reader, ParseDesc desc, ArrDesc *arrays) except -1:
-    cdef char *buf
-    cdef Py_ssize_t i, bufsize, rowsize = desc.rowsize
+    cdef Buffer buf
+    cdef Py_ssize_t i, rowsize = desc.rowsize
     cdef ArrDesc arr
-    cdef DataType dt
+    cdef Deserializer deserializer
 
     for i in range(rowsize):
-        buf = get_buf(reader, &bufsize)
+        get_buf(reader, &buf)
         arr = arrays[i]
 
         if arr.is_object:
-            dt = desc.datatypes[i]
-            val = dt.deserialize(buf, bufsize, desc.protocol_version)
+            deserializer = desc.datatypes[i]
+            val = deserializer.deserialize(&buf, desc.protocol_version)
             Py_INCREF(val)
             (<PyObject **> arr.buf_ptr)[0] = <PyObject *> val
         else:
-            memcopy(buf, <char *> arr.buf_ptr, bufsize)
+            memcopy(buf.ptr, <char *> arr.buf_ptr, buf.size)
 
         # Update the pointer into the array for the next time
         arrays[i].buf_ptr += arr.stride
diff --git a/cassandra/objparser.pyx b/cassandra/objparser.pyx
index 6ae614b9..62723ceb 100644
--- a/cassandra/objparser.pyx
+++ b/cassandra/objparser.pyx
@@ -19,7 +19,7 @@ from cpython.ref cimport (
         )
 
 from cassandra.bytesio cimport BytesIOReader
-from cassandra.datatypes cimport DataType
+from cassandra.deserializers cimport Deserializer
 from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser
 
 
@@ -63,16 +63,20 @@ cdef class TupleRowParser(RowParser):
     """
 
     cpdef unpack_row(self, BytesIOReader reader, ParseDesc desc):
-        cdef char *buf
-        cdef Py_ssize_t i, bufsize, rowsize = desc.rowsize
-        cdef DataType dt
+        cdef Buffer buf
+        cdef Py_ssize_t i, rowsize = desc.rowsize
+        cdef Deserializer deserializer
         cdef tuple res = PyTuple_New(desc.rowsize)
 
         for i in range(rowsize):
-            buf = get_buf(reader, &bufsize)
-            dt = desc.datatypes[i]
-            val = dt.deserialize(buf, bufsize, desc.protocol_version)
+            # Read the next few bytes
+            get_buf(reader, &buf)
 
+            # Deserialize bytes to python object
+            deserializer = desc.datatypes[i]
+            val = deserializer.deserialize(&buf, desc.protocol_version)
+
+            # Insert new object into tuple
             Py_INCREF(val)
             PyTuple_SET_ITEM(res, i, val)
 
diff --git a/cassandra/parsing.pxd b/cassandra/parsing.pxd
index c4774385..40043f29 100644
--- a/cassandra/parsing.pxd
+++ b/cassandra/parsing.pxd
@@ -1,10 +1,10 @@
 from cassandra.bytesio cimport BytesIOReader
-from cassandra.datatypes cimport DataType
+from cassandra.deserializers cimport Deserializer
 
 cdef class ParseDesc:
     cdef public object colnames
     cdef public object coltypes
-    cdef DataType[::1] datatypes
+    cdef Deserializer[::1] datatypes
     cdef public object protocol_version
     cdef Py_ssize_t rowsize
 

From ae7c4b2e81ba35f455f80b67d5fa109b88b541b7 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Wed, 5 Aug 2015 18:49:19 +0100
Subject: [PATCH 24/70] Forgot to add test utility module

---
 tests/integration/standard/utils.py | 46 +++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 tests/integration/standard/utils.py

diff --git a/tests/integration/standard/utils.py b/tests/integration/standard/utils.py
new file mode 100644
index 00000000..bd0c80b5
--- /dev/null
+++ b/tests/integration/standard/utils.py
@@ -0,0 +1,46 @@
+"""
+Helper module to populate a dummy Cassandra tables with data.
+"""
+
+from tests.integration.datatype_utils import PRIMITIVE_DATATYPES, get_sample
+
+def create_table_with_all_types(table_name, session):
+    """
+    Method that given a table_name and session construct a table that contains
+    all possible primitive types.
+
+    :param table_name: Name of table to create
+    :param session: session to use for table creation
+    :return: a string containing the names of all the columns.
+             This can be used to query the table.
+    """
+    # create table
+    alpha_type_list = ["primkey int PRIMARY KEY"]
+    col_names = ["primkey"]
+    start_index = ord('a')
+    for i, datatype in enumerate(PRIMITIVE_DATATYPES):
+        alpha_type_list.append("{0} {1}".format(chr(start_index + i), datatype))
+        col_names.append(chr(start_index + i))
+
+    session.execute("CREATE TABLE {0} ({1})".format(
+                        table_name, ', '.join(alpha_type_list)), timeout=120)
+
+    # create the input
+    params = get_all_primitive_params()
+
+    # insert into table as a simple statement
+    columns_string = ', '.join(col_names)
+    placeholders = ', '.join(["%s"] * len(col_names))
+    session.execute("INSERT INTO {0} ({1}) VALUES ({2})".format(
+                        table_name, columns_string, placeholders), params, timeout=120)
+    return columns_string
+
+
+def get_all_primitive_params():
+    """
+    Simple utility method used to give back a list of all possible primitive data sample types.
+    """
+    params = [0]
+    for datatype in PRIMITIVE_DATATYPES:
+        params.append(get_sample(datatype))
+    return params

From 26ef8682244fcafb573f841ba7b43551725bb078 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Wed, 5 Aug 2015 20:09:06 +0100
Subject: [PATCH 25/70] Some small fixes to deserializers

---
 cassandra/deserializers.pyx | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 21245364..2bf91553 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -69,6 +69,8 @@ cdef class GenericDeserializer(Deserializer):
     Wrap a generic datatype for deserialization
     """
 
+    cdef object cqltype
+
     def __init__(self, cqltype):
         self.cqltype = cqltype
 
@@ -85,10 +87,11 @@ def make_deserializers(cqltypes):
 
 cpdef Deserializer find_deserializer(cqltype):
     """Find a deserializer for a cqltype"""
-    deserializer = None
-    if inspect.isclass(cqltype):
-        deserializer = globals().get('Des' + cqltype.__name__)()
-    return deserializer or GenericDeserializer(cqltype)
+    name = inspect.isclass(cqltype) and 'Des' + cqltype.__name__
+    if name in globals():
+        deserializer_cls =  globals()[name]
+        deserializer_cls()
+    return GenericDeserializer(cqltype)
 
 
 def obj_array(list objs):

From ddeb7536623ebbd916c0d5af4130e99742578d99 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Wed, 5 Aug 2015 20:21:31 +0100
Subject: [PATCH 26/70] Some more small fixes

---
 cassandra/numpyparser.pyx | 2 +-
 cassandra/objparser.pyx   | 8 +-------
 cassandra/parsing.pxd     | 2 +-
 cassandra/parsing.pyx     | 4 ++--
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 149843a6..7be86400 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -126,7 +126,7 @@ cdef inline int unpack_row(
         arr = arrays[i]
 
         if arr.is_object:
-            deserializer = desc.datatypes[i]
+            deserializer = desc.deserializers[i]
             val = deserializer.deserialize(&buf, desc.protocol_version)
             Py_INCREF(val)
             (<PyObject **> arr.buf_ptr)[0] = <PyObject *> val
diff --git a/cassandra/objparser.pyx b/cassandra/objparser.pyx
index 62723ceb..e98a991e 100644
--- a/cassandra/objparser.pyx
+++ b/cassandra/objparser.pyx
@@ -54,12 +54,6 @@ cdef class TupleRowParser(RowParser):
     Parse a single returned row into a tuple of objects:
 
         (obj1, ..., objN)
-
-    Attributes
-    ===========
-    datatypes:
-        this is a memoryview of N DataType objects that can deserialize bytes
-        into objects
     """
 
     cpdef unpack_row(self, BytesIOReader reader, ParseDesc desc):
@@ -73,7 +67,7 @@ cdef class TupleRowParser(RowParser):
             get_buf(reader, &buf)
 
             # Deserialize bytes to python object
-            deserializer = desc.datatypes[i]
+            deserializer = desc.deserializers[i]
             val = deserializer.deserialize(&buf, desc.protocol_version)
 
             # Insert new object into tuple
diff --git a/cassandra/parsing.pxd b/cassandra/parsing.pxd
index 40043f29..13bc8411 100644
--- a/cassandra/parsing.pxd
+++ b/cassandra/parsing.pxd
@@ -4,7 +4,7 @@ from cassandra.deserializers cimport Deserializer
 cdef class ParseDesc:
     cdef public object colnames
     cdef public object coltypes
-    cdef Deserializer[::1] datatypes
+    cdef Deserializer[::1] deserializers
     cdef public object protocol_version
     cdef Py_ssize_t rowsize
 
diff --git a/cassandra/parsing.pyx b/cassandra/parsing.pyx
index 71196d14..c9afd4b5 100644
--- a/cassandra/parsing.pyx
+++ b/cassandra/parsing.pyx
@@ -5,10 +5,10 @@ Module containing the definitions and declarations (parsing.pxd) for parsers.
 cdef class ParseDesc:
     """Description of what structure to parse"""
 
-    def __init__(self, colnames, coltypes, datatypes, protocol_version):
+    def __init__(self, colnames, coltypes, deserializers, protocol_version):
         self.colnames = colnames
         self.coltypes = coltypes
-        self.datatypes = datatypes
+        self.deserializers = deserializers
         self.protocol_version = protocol_version
         self.rowsize = len(colnames)
 

From 2d8ad6ad3a53fd22e16302e03334a4dd8f5fe6b7 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 13:45:23 +0100
Subject: [PATCH 27/70] More Cython-based object deserializers

---
 cassandra/buffer.pxd                  |  32 +++-
 cassandra/buffer.pyx                  |  38 -----
 cassandra/cython_protocol_handler.pyx |   2 +-
 cassandra/cython_utils.pyx            |  27 ++++
 cassandra/deserializers.pxd           |   2 +-
 cassandra/deserializers.pyx           | 203 ++++++++++++++++++++++++--
 cassandra/ioutils.pyx                 |   2 +-
 cassandra/marshal.pyx                 |  41 ++++--
 cassandra/parsing.pxd                 |   2 +-
 9 files changed, 275 insertions(+), 74 deletions(-)
 delete mode 100644 cassandra/buffer.pyx
 create mode 100644 cassandra/cython_utils.pyx

diff --git a/cassandra/buffer.pxd b/cassandra/buffer.pxd
index f431d311..cfe93e01 100644
--- a/cassandra/buffer.pxd
+++ b/cassandra/buffer.pxd
@@ -1,8 +1,32 @@
+"""
+Simple buffer data structure that provides a view on existing memory
+(e.g. from a bytes object). This memory must stay alive while the
+buffer is in use.
+"""
+
+from cpython.bytes cimport PyBytes_AS_STRING
+    # char* PyBytes_AS_STRING(object string)
+    # Macro form of PyBytes_AsString() but without error
+    # checking. Only string objects are supported; no Unicode objects
+    # should be passed.
+
+from cassandra.buffer cimport Buffer
+
 cdef struct Buffer:
     char *ptr
     Py_ssize_t size
 
-cdef inline Buffer from_bytes(bytes byts)
-cdef inline bytes to_bytes(Buffer *buf)
-cdef inline char *buf_ptr(Buffer *buf)
-cdef inline Buffer from_ptr_and_size(char *ptr, Py_ssize_t size)
\ No newline at end of file
+cdef inline Buffer from_bytes(bytes byts):
+    return from_ptr_and_size(PyBytes_AS_STRING(byts), len(byts))
+
+cdef inline bytes to_bytes(Buffer *buf):
+    return buf.ptr[:buf.size]
+
+cdef inline char *buf_ptr(Buffer *buf):
+    return buf.ptr
+
+cdef inline Buffer from_ptr_and_size(char *ptr, Py_ssize_t size):
+    cdef Buffer res
+    res.ptr = ptr
+    res.size = size
+    return res
diff --git a/cassandra/buffer.pyx b/cassandra/buffer.pyx
deleted file mode 100644
index 570a7496..00000000
--- a/cassandra/buffer.pyx
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-Simple buffer data structure. This buffer can be included:
-
-    include "buffer.pyx"
-
-or imported:
-
-    from cassanda cimport buffer
-
-but this prevents inlining of the functions below.
-"""
-
-from cpython.bytes cimport PyBytes_AS_STRING
-    # char* PyBytes_AS_STRING(object string)
-    # Macro form of PyBytes_AsString() but without error
-    # checking. Only string objects are supported; no Unicode objects
-    # should be passed.
-
-from cassandra.buffer cimport Buffer
-
-cdef struct Buffer:
-    char *ptr
-    Py_ssize_t size
-
-cdef inline Buffer from_bytes(bytes byts):
-    return from_ptr_and_size(PyBytes_AS_STRING(byts), len(byts))
-
-cdef inline bytes to_bytes(Buffer *buf):
-    return buf.ptr[:buf.size]
-
-cdef inline char *buf_ptr(Buffer *buf):
-    return buf.ptr
-
-cdef inline Buffer from_ptr_and_size(char *ptr, Py_ssize_t size):
-    cdef Buffer res
-    res.ptr = ptr
-    res.size = size
-    return res
\ No newline at end of file
diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
index af91c4d7..629ce887 100644
--- a/cassandra/cython_protocol_handler.pyx
+++ b/cassandra/cython_protocol_handler.pyx
@@ -11,7 +11,7 @@ include "ioutils.pyx"
 
 
 def make_recv_results_rows(ColumnParser colparser):
-    def recv_results_rows(cls, f, protocol_version, user_type_map):
+    def recv_results_rows(cls, f, int protocol_version, user_type_map):
         """
         Parse protocol data given as a BytesIO f into a set of columns (e.g. list of tuples)
         This is used as the recv_results_rows method of (Fast)ResultMessage
diff --git a/cassandra/cython_utils.pyx b/cassandra/cython_utils.pyx
new file mode 100644
index 00000000..fe4fbab9
--- /dev/null
+++ b/cassandra/cython_utils.pyx
@@ -0,0 +1,27 @@
+"""
+Duplicate module of util.py, with some accelerated functions
+used for deserialization.
+"""
+
+# from __future__ import with_statement
+
+from cpython.datetime cimport timedelta_new
+    # cdef inline object timedelta_new(int days, int seconds, int useconds)
+    # Create timedelta object using DateTime CAPI factory function.
+    # Note, there are no range checks for any of the arguments.
+
+import calendar
+import datetime
+import random
+import six
+import uuid
+import sys
+
+DATETIME_EPOC = datetime.datetime(1970, 1, 1)
+
+assert sys.byteorder in ('little', 'big')
+is_little_endian = sys.byteorder == 'little'
+
+cdef datetime_from_timestamp(timestamp):
+    return DATETIME_EPOC + timedelta_new(0, timestamp, 0)
+
diff --git a/cassandra/deserializers.pxd b/cassandra/deserializers.pxd
index 333479f3..882d19d1 100644
--- a/cassandra/deserializers.pxd
+++ b/cassandra/deserializers.pxd
@@ -3,5 +3,5 @@
 from cassandra.buffer cimport Buffer
 
 cdef class Deserializer:
-    cdef deserialize(self, Buffer *buf, protocol_version)
+    cdef deserialize(self, Buffer *buf, int protocol_version)
     # cdef deserialize(self, CString byts, protocol_version)
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 2bf91553..680e14a2 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -1,69 +1,246 @@
 # -- cython: profile=True
 
+from libc.stdint cimport int32_t, uint16_t
+
 include 'marshal.pyx'
-include 'buffer.pyx'
+include 'cython_utils.pyx'
+from cassandra.buffer cimport Buffer, to_bytes
 
 from cython.view cimport array as cython_array
+
+import socket
+import inspect
 from decimal import Decimal
 from uuid import UUID
 
-import inspect
+from cassandra import util
+
 
 cdef class Deserializer:
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         raise NotImplementedError
 
 
 cdef class DesLongType(Deserializer):
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         return int64_unpack(buf.ptr)
 
 
 # TODO: Use libmpdec: http://www.bytereef.org/mpdecimal/index.html
 cdef class DesDecimalType(Deserializer):
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         scale = int32_unpack(buf.ptr)
         unscaled = varint_unpack(buf.ptr + 4)
         return Decimal('%de%d' % (unscaled, -scale))
 
 
 cdef class DesUUIDType(Deserializer):
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         return UUID(bytes=to_bytes(buf))
 
 
 cdef class DesBooleanType(Deserializer):
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         return bool(int8_unpack(buf.ptr))
 
 
 cdef class DesByteType(Deserializer):
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         return int8_unpack(buf.ptr)
 
 
 cdef class DesAsciiType(Deserializer):
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         if six.PY2:
             return to_bytes(buf)
         return to_bytes(buf).decode('ascii')
 
 
 cdef class DesFloatType(Deserializer):
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         return float_unpack(buf.ptr)
 
 
 cdef class DesDoubleType(Deserializer):
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         return double_unpack(buf.ptr)
 
 
 cdef class DesInt32Type(Deserializer):
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         return int32_unpack(buf.ptr)
 
 
+cdef class DesIntegerType(Deserializer):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        return varint_unpack(to_bytes(buf))
+
+
+cdef class DesInetAddressType(Deserializer):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        cdef bytes byts = to_bytes(buf)
+
+        # TODO: optimize inet_ntop, inet_ntoa
+        if len(buf.size) == 16:
+            return util.inet_ntop(socket.AF_INET6, byts)
+        else:
+            # util.inet_pton could also handle, but this is faster
+            # since we've already determined the AF
+            return socket.inet_ntoa(byts)
+
+
+cdef class DesCounterColumnType(DesLongType):
+    pass
+
+
+cdef class DesDateType(Deserializer):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        timestamp = int64_unpack(buf.ptr) / 1000.0
+        return datetime_from_timestamp(timestamp)
+
+
+cdef class TimestampType(DesDateType):
+    pass
+
+
+cdef class TimeUUIDType(DesDateType):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        return UUID(bytes=to_bytes(buf))
+
+
+# Values of the 'date'` type are encoded as 32-bit unsigned integers
+# representing a number of days with epoch (January 1st, 1970) at the center of the
+# range (2^31).
+EPOCH_OFFSET_DAYS = 2 ** 31
+
+cdef class DesSimpleDateType(Deserializer):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        days = uint32_unpack(buf.ptr) - EPOCH_OFFSET_DAYS
+        return util.Date(days)
+
+
+cdef class DesShortType(Deserializer):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        return int16_unpack(buf.ptr)
+
+
+cdef class DesTimeType(Deserializer):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        return util.Time(int64_unpack(to_bytes(buf)))
+
+
+cdef class DesUTF8Type(Deserializer):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        return to_bytes(buf).decode('utf8')
+
+
+cdef class DesVarcharType(DesUTF8Type):
+    pass
+
+
+cdef class _DesParameterizedType(Deserializer):
+
+    cdef object cqltype
+    cdef object adapter
+    cdef object subtypes
+    cdef Deserializer[::1] deserializers
+
+    def __init__(self, cqltype):
+        assert cqltype.subtypes and len(cqltype.subtypes) == 1
+        self.cqltype = cqltype
+        self.adapter = cqltype.adapter
+        self.subtypes = cqltype.subtypes
+        self.deserializers = make_deserializers(cqltype.subtypes)
+
+
+cdef class _DesSimpleParameterizedType(_DesParameterizedType):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        cdef uint16_t v2_and_below = 0
+        cdef int32_t v3_and_above = 0
+
+        if protocol_version >= 3:
+            result = _deserialize_parameterized[int32_t](
+                v3_and_above, self.deserializers[0], buf, protocol_version)
+        else:
+            result = _deserialize_parameterized[uint16_t](
+                v2_and_below, self.deserializers[0], buf, protocol_version)
+        return self.adapter(result)
+
+
+ctypedef fused itemlen_t:
+    uint16_t # protocol <= v2
+    int32_t  # protocol >= v3
+
+
+cdef itemlen_t _unpack(itemlen_t dummy, const char *buf):
+    cdef itemlen_t result
+    if itemlen_t is uint16_t:
+        result = uint16_unpack(buf)
+    else:
+        result = int32_unpack(buf)
+    return result
+
+cdef list _deserialize_parameterized(
+        itemlen_t dummy, Deserializer deserializer,
+        Buffer *buf, int protocol_version):
+    cdef itemlen_t itemlen
+    cdef Buffer sub_buf
+
+    cdef itemlen_t numelements = _unpack[itemlen_t](dummy, buf.ptr)
+    cdef itemlen_t p = sizeof(itemlen_t)
+    cdef list result = []
+
+    for _ in range(numelements):
+        itemlen = _unpack[itemlen_t](dummy, buf.ptr + p)
+        p += sizeof(itemlen_t)
+        sub_buf.ptr = buf.ptr + p
+        sub_buf.size = itemlen
+        p += itemlen
+        result.append(deserializer.deserialize(&sub_buf, protocol_version))
+
+    return result
+
+# cdef deserialize_v3_and_above(
+#         Deserializer deserializer, Buffer *buf, int protocol_version):
+#     cdef Py_ssize_t itemlen
+#     cdef Buffer sub_buf
+#
+#     cdef Py_ssize_t numelements = int32_unpack(buf.ptr)
+#     cdef Py_ssize_t p = 4
+#     cdef list result = []
+#
+#     for _ in range(numelements):
+#         itemlen = int32_unpack(buf.ptr + p)
+#         p += 4
+#         sub_buf.ptr = buf.ptr + p
+#         sub_buf.size = itemlen
+#         p += itemlen
+#         result.append(deserializer.deserialize(&sub_buf, protocol_version))
+#
+#     return result
+#
+#
+# cdef deserialize_v2_and_below(
+#         Deserializer deserializer, Buffer *buf, int protocol_version):
+#     cdef Py_ssize_t itemlen
+#     cdef Buffer sub_buf
+#
+#     cdef Py_ssize_t numelements = uint16_unpack(buf.ptr)
+#     cdef Py_ssize_t p = 2
+#     cdef list result = []
+#
+#     for _ in range(numelements):
+#         itemlen = uint16_unpack(buf.ptr + p)
+#         p += 2
+#         sub_buf.ptr = buf.ptr + p
+#         sub_buf.size = itemlen
+#         p += itemlen
+#         result.append(deserializer.deserialize(&sub_buf, protocol_version))
+#
+#     return result
+
+
+
 cdef class GenericDeserializer(Deserializer):
     """
     Wrap a generic datatype for deserialization
@@ -74,7 +251,7 @@ cdef class GenericDeserializer(Deserializer):
     def __init__(self, cqltype):
         self.cqltype = cqltype
 
-    cdef deserialize(self, Buffer *buf, protocol_version):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
         return self.cqltype.deserialize(to_bytes(buf), protocol_version)
 
 #--------------------------------------------------------------------------
diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx
index 0f8c3e3e..0d6da6e4 100644
--- a/cassandra/ioutils.pyx
+++ b/cassandra/ioutils.pyx
@@ -1,5 +1,5 @@
 include 'marshal.pyx'
-include 'buffer.pyx'
+from cassandra.buffer cimport Buffer
 
 from libc.stdint cimport int32_t
 from cassandra.bytesio cimport BytesIOReader
diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index 9e1c8ca5..336ee1c7 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -25,6 +25,8 @@ from libc.stdint cimport (int8_t, int16_t, int32_t, int64_t,
 cdef bint is_little_endian
 from cassandra.util import is_little_endian
 
+cdef bint PY3 = six.PY3
+
 # cdef extern from "marshal.h":
 #     cdef str c_string_to_python(char *p, Py_ssize_t len)
 
@@ -165,21 +167,30 @@ v3_header_pack = v3_header_struct.pack
 v3_header_unpack = v3_header_struct.unpack
 
 
-if six.PY3:
-    def varint_unpack(term):
-        val = int(''.join("%02x" % i for i in term), 16)
-        if (term[0] & 128) != 0:
-            # There is a bug in Cython (0.20 - 0.22), where if we do
-            # '1 << (len(term) * 8)' Cython generates '1' directly into the
-            # C code, causing integer overflows. Treat it as an object for now
-            val -= (<object> 1L) << (len(term) * 8)
-        return val
-else:
-    def varint_unpack(term):  # noqa
-        val = int(term.encode('hex'), 16)
-        if (ord(term[0]) & 128) != 0:
-            val = val - (1 << (len(term) * 8))
-        return val
+cpdef varint_unpack(term):
+    """Unpack a variable-sized integer"""
+    if PY3:
+        return varint_unpack_py3(term)
+    else:
+        return varint_unpack_py2(term)
+
+# TODO: Optimize these two functions
+def varint_unpack_py3(term):
+    cdef int64_t one = 1L
+    val = int(''.join("%02x" % i for i in term), 16)
+    if (term[0] & 128) != 0:
+        # There is a bug in Cython (0.20 - 0.22), where if we do
+        # '1 << (len(term) * 8)' Cython generates '1' directly into the
+        # C code, causing integer overflows
+        val -= one << (len(term) * 8)
+    return val
+
+def varint_unpack_py2(term):  # noqa
+    cdef int64_t one = 1L
+    val = int(term.encode('hex'), 16)
+    if (ord(term[0]) & 128) != 0:
+        val = val - (one << (len(term) * 8))
+    return val
 
 
 def bitlength(n):
diff --git a/cassandra/parsing.pxd b/cassandra/parsing.pxd
index 13bc8411..9daecad9 100644
--- a/cassandra/parsing.pxd
+++ b/cassandra/parsing.pxd
@@ -5,7 +5,7 @@ cdef class ParseDesc:
     cdef public object colnames
     cdef public object coltypes
     cdef Deserializer[::1] deserializers
-    cdef public object protocol_version
+    cdef public int protocol_version
     cdef Py_ssize_t rowsize
 
 cdef class ColumnParser:

From 758ab324db1a2fc6848a2aef232caec03a9f5c89 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 15:51:51 +0100
Subject: [PATCH 28/70] Add Cython-based MapType deserializer

---
 cassandra/cqltypes.py       |   2 +-
 cassandra/deserializers.pyx | 170 +++++++++++++++++++++++-------------
 2 files changed, 112 insertions(+), 60 deletions(-)

diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py
index 77fc2b91..ee6b1101 100644
--- a/cassandra/cqltypes.py
+++ b/cassandra/cqltypes.py
@@ -300,7 +300,7 @@ class _CassandraType(object):
         Given a set of other CassandraTypes, create a new subtype of this type
         using them as parameters. This is how composite types are constructed.
 
-            >>> MapType.apply_parameters(DateType, BooleanType)
+            >>> MapType.apply_parameters([DateType, BooleanType])
             <class 'cassandra.types.MapType(DateType, BooleanType)'>
 
         `subtypes` will be a sequence of CassandraTypes.  If provided, `names`
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 680e14a2..e9b06154 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -13,6 +13,7 @@ import inspect
 from decimal import Decimal
 from uuid import UUID
 
+from cassandra import cqltypes
 from cassandra import util
 
 
@@ -152,46 +153,56 @@ cdef class _DesParameterizedType(Deserializer):
         self.subtypes = cqltype.subtypes
         self.deserializers = make_deserializers(cqltype.subtypes)
 
+#--------------------------------------------------------------------------
+# List and set deserialization
+
+cdef class DesListType(_DesParameterizedType):
+
+    cdef Deserializer deserializer
+
+    def __init__(self, cqltype):
+        super().__init__(cqltype)
+        self.deserializer = self.deserializers[0]
 
-cdef class _DesSimpleParameterizedType(_DesParameterizedType):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        cdef uint16_t v2_and_below = 0
-        cdef int32_t v3_and_above = 0
+        cdef uint16_t v2_and_below = 2
+        cdef int32_t v3_and_above = 3
 
         if protocol_version >= 3:
-            result = _deserialize_parameterized[int32_t](
-                v3_and_above, self.deserializers[0], buf, protocol_version)
+            result = _deserialize_list_or_set[int32_t](
+                v3_and_above, buf, protocol_version, self.deserializer)
         else:
-            result = _deserialize_parameterized[uint16_t](
-                v2_and_below, self.deserializers[0], buf, protocol_version)
+            result = _deserialize_list_or_set[uint16_t](
+                v2_and_below, buf, protocol_version, self.deserializer)
+
         return self.adapter(result)
 
 
+DesSetType = DesListType
+
+
 ctypedef fused itemlen_t:
     uint16_t # protocol <= v2
     int32_t  # protocol >= v3
 
+cdef list _deserialize_list_or_set(itemlen_t dummy_version,
+                                   Buffer *buf, int protocol_version,
+                                   Deserializer deserializer):
+    """
+    Deserialize a list or set.
 
-cdef itemlen_t _unpack(itemlen_t dummy, const char *buf):
-    cdef itemlen_t result
-    if itemlen_t is uint16_t:
-        result = uint16_unpack(buf)
-    else:
-        result = int32_unpack(buf)
-    return result
-
-cdef list _deserialize_parameterized(
-        itemlen_t dummy, Deserializer deserializer,
-        Buffer *buf, int protocol_version):
+    The 'dummy' parameter is needed to make fused types work, so that
+    we can specialize on the protocol version.
+    """
     cdef itemlen_t itemlen
     cdef Buffer sub_buf
 
-    cdef itemlen_t numelements = _unpack[itemlen_t](dummy, buf.ptr)
+    cdef itemlen_t numelements = _unpack[itemlen_t](dummy_version, buf.ptr)
     cdef itemlen_t p = sizeof(itemlen_t)
     cdef list result = []
 
     for _ in range(numelements):
-        itemlen = _unpack[itemlen_t](dummy, buf.ptr + p)
+        itemlen = _unpack[itemlen_t](dummy_version, buf.ptr + p)
         p += sizeof(itemlen_t)
         sub_buf.ptr = buf.ptr + p
         sub_buf.size = itemlen
@@ -200,46 +211,80 @@ cdef list _deserialize_parameterized(
 
     return result
 
-# cdef deserialize_v3_and_above(
-#         Deserializer deserializer, Buffer *buf, int protocol_version):
-#     cdef Py_ssize_t itemlen
-#     cdef Buffer sub_buf
-#
-#     cdef Py_ssize_t numelements = int32_unpack(buf.ptr)
-#     cdef Py_ssize_t p = 4
-#     cdef list result = []
-#
-#     for _ in range(numelements):
-#         itemlen = int32_unpack(buf.ptr + p)
-#         p += 4
-#         sub_buf.ptr = buf.ptr + p
-#         sub_buf.size = itemlen
-#         p += itemlen
-#         result.append(deserializer.deserialize(&sub_buf, protocol_version))
-#
-#     return result
-#
-#
-# cdef deserialize_v2_and_below(
-#         Deserializer deserializer, Buffer *buf, int protocol_version):
-#     cdef Py_ssize_t itemlen
-#     cdef Buffer sub_buf
-#
-#     cdef Py_ssize_t numelements = uint16_unpack(buf.ptr)
-#     cdef Py_ssize_t p = 2
-#     cdef list result = []
-#
-#     for _ in range(numelements):
-#         itemlen = uint16_unpack(buf.ptr + p)
-#         p += 2
-#         sub_buf.ptr = buf.ptr + p
-#         sub_buf.size = itemlen
-#         p += itemlen
-#         result.append(deserializer.deserialize(&sub_buf, protocol_version))
-#
-#     return result
+cdef itemlen_t _unpack(itemlen_t dummy_version, const char *buf):
+    cdef itemlen_t result
+    if itemlen_t is uint16_t:
+        result = uint16_unpack(buf)
+    else:
+        result = int32_unpack(buf)
+    return result
+
+#--------------------------------------------------------------------------
+# Map deserialization
+
+cdef class DesMapType(_DesParameterizedType):
+
+    cdef Deserializer key_deserializer, val_deserializer
+
+    def __init__(self, cqltype):
+        super().__init__(cqltype)
+        self.key_deserializer = self.deserializers[0]
+        self.val_deserializer = self.deserializers[1]
+
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        cdef uint16_t v2_and_below = 0
+        cdef int32_t v3_and_above = 0
+        key_type, val_type = self.cqltype.subtypes
+
+        if protocol_version >= 3:
+            result = _deserialize_map[int32_t](
+                v3_and_above, buf, protocol_version,
+                self.key_deserializer, self.val_deserializer,
+                key_type, val_type)
+        else:
+            result = _deserialize_map[uint16_t](
+                v2_and_below, buf, protocol_version,
+                self.key_deserializer, self.val_deserializer,
+                key_type, val_type)
+
+        return self.adapter(result)
 
 
+cdef _deserialize_map(itemlen_t dummy_version,
+                      Buffer *buf, int protocol_version,
+                      Deserializer key_deserializer, Deserializer val_deserializer,
+                      key_type, val_type):
+    cdef itemlen_t itemlen, val_len, key_len
+    cdef Buffer key_buf, val_buf
+
+    cdef itemlen_t numelements = _unpack[itemlen_t](dummy_version, buf.ptr)
+    cdef itemlen_t p = sizeof(itemlen_t)
+    cdef list result = []
+
+    numelements = _unpack[itemlen_t](dummy_version, buf.ptr)
+    p = sizeof(itemlen_t)
+    themap = util.OrderedMapSerializedKey(key_type, protocol_version)
+    for _ in range(numelements):
+        key_len = _unpack[itemlen_t](dummy_version, buf.ptr + p)
+        p += sizeof(itemlen_t)
+        # keybytes = byts[p:p + key_len]
+        key_buf.ptr = buf.ptr + p
+        key_buf.size = key_len
+        p += key_len
+        val_len = _unpack(dummy_version, buf.ptr + p)
+        p += sizeof(itemlen_t)
+        # valbytes = byts[p:p + val_len]
+        val_buf.ptr = buf.ptr + p
+        val_buf.size = val_len
+        p += val_len
+        key = key_deserializer.deserialize(&key_buf, protocol_version)
+        val = val_deserializer.deserialize(&val_buf, protocol_version)
+        themap._insert_unchecked(key, to_bytes(&key_buf), val)
+
+    return themap
+
+#--------------------------------------------------------------------------
+# Generic deserialization
 
 cdef class GenericDeserializer(Deserializer):
     """
@@ -255,6 +300,7 @@ cdef class GenericDeserializer(Deserializer):
         return self.cqltype.deserialize(to_bytes(buf), protocol_version)
 
 #--------------------------------------------------------------------------
+# Helper utilities
 
 def make_deserializers(cqltypes):
     """Create an array of Deserializers for each given cqltype in cqltypes"""
@@ -264,10 +310,16 @@ def make_deserializers(cqltypes):
 
 cpdef Deserializer find_deserializer(cqltype):
     """Find a deserializer for a cqltype"""
-    name = inspect.isclass(cqltype) and 'Des' + cqltype.__name__
+    name = 'Des' + cqltype.__name__
     if name in globals():
         deserializer_cls =  globals()[name]
         deserializer_cls()
+    elif issubclass(cqltype, cqltypes.ListType):
+        return DesListType
+    elif issubclass(cqltype, cqltypes.SetType):
+        return DesSetType
+    elif issubclass(cqltype, cqltypes.MapType):
+        return DesMapType
     return GenericDeserializer(cqltype)
 
 

From e2820de2ba8e6760eb86ab22344b40bc9b4fc20f Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 16:19:57 +0100
Subject: [PATCH 29/70] Add Cython-based tuple deserializer

---
 cassandra/deserializers.pyx | 49 +++++++++++++++++++++++++++++++++++++
 cassandra/objparser.pyx     |  9 ++++---
 2 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index e9b06154..3501428d 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -5,14 +5,19 @@ from libc.stdint cimport int32_t, uint16_t
 include 'marshal.pyx'
 include 'cython_utils.pyx'
 from cassandra.buffer cimport Buffer, to_bytes
+from cassandra.parsing cimport ParseDesc, RowParser
 
 from cython.view cimport array as cython_array
+from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM
+from cpython.ref cimport Py_INCREF
+
 
 import socket
 import inspect
 from decimal import Decimal
 from uuid import UUID
 
+from cassandra.objparser import TupleRowParser
 from cassandra import cqltypes
 from cassandra import util
 
@@ -283,6 +288,48 @@ cdef _deserialize_map(itemlen_t dummy_version,
 
     return themap
 
+#--------------------------------------------------------------------------
+# Tuple deserialization
+
+cdef class DesTupleType(_DesParameterizedType):
+
+    # TODO: Use TupleRowParser to parse these tuples
+
+    cdef Py_ssize_t tuple_len
+
+    def __init__(self, cqltype):
+        super().__init__(cqltype)
+        self.tuple_len = len(cqltype.subtypes)
+
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        cdef Py_ssize_t i, p
+        cdef int32_t itemlen
+        cdef tuple res = PyTuple_New(self.tuple_len)
+        cdef Buffer item_buf
+        cdef Deserializer deserializer
+
+        protocol_version = max(3, protocol_version)
+
+        p = 0
+        values = []
+        for i in range(self.tuple_len):
+            item = None
+            if p != buf.size:
+                itemlen = int32_unpack(buf.ptr + p)
+                p += 4
+                if itemlen >= 0:
+                    item_buf.ptr = buf.ptr + p
+                    item_buf.size = itemlen
+                    deserializer = self.deserializers[i]
+                    item = deserializer.deserialize(&item_buf, protocol_version)
+                    p += itemlen
+
+            # Insert new object into tuple (PyTuple_SET_ITEM steals a reference)
+            Py_INCREF(item)
+            PyTuple_SET_ITEM(res, i, item)
+
+        return res
+
 #--------------------------------------------------------------------------
 # Generic deserialization
 
@@ -320,6 +367,8 @@ cpdef Deserializer find_deserializer(cqltype):
         return DesSetType
     elif issubclass(cqltype, cqltypes.MapType):
         return DesMapType
+    elif issubclass(cqltype, cqltypes.TupleType):
+        return DesTupleType
     return GenericDeserializer(cqltype)
 
 
diff --git a/cassandra/objparser.pyx b/cassandra/objparser.pyx
index e98a991e..bf251942 100644
--- a/cassandra/objparser.pyx
+++ b/cassandra/objparser.pyx
@@ -66,9 +66,12 @@ cdef class TupleRowParser(RowParser):
             # Read the next few bytes
             get_buf(reader, &buf)
 
-            # Deserialize bytes to python object
-            deserializer = desc.deserializers[i]
-            val = deserializer.deserialize(&buf, desc.protocol_version)
+            if buf.size == 0:
+                val = None
+            else:
+                # Deserialize bytes to python object
+                deserializer = desc.deserializers[i]
+                val = deserializer.deserialize(&buf, desc.protocol_version)
 
             # Insert new object into tuple
             Py_INCREF(val)

From 80c5a1931d664e8fa01784d93d48fefbce9a6114 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 18:02:50 +0100
Subject: [PATCH 30/70] Simply UserType deserialization in cqltypes.py

---
 cassandra/cqltypes.py | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py
index ee6b1101..9383c53e 100644
--- a/cassandra/cqltypes.py
+++ b/cassandra/cqltypes.py
@@ -943,27 +943,7 @@ class UserType(TupleType):
 
     @classmethod
     def deserialize_safe(cls, byts, protocol_version):
-        proto_version = max(3, protocol_version)
-        p = 0
-        values = []
-        for col_type in cls.subtypes:
-            if p == len(byts):
-                break
-            itemlen = int32_unpack(byts[p:p + 4])
-            p += 4
-            if itemlen >= 0:
-                item = byts[p:p + itemlen]
-                p += itemlen
-            else:
-                item = None
-            # collections inside UDTs are always encoded with at least the
-            # version 3 format
-            values.append(col_type.from_binary(item, proto_version))
-
-        if len(values) < len(cls.subtypes):
-            nones = [None] * (len(cls.subtypes) - len(values))
-            values = values + nones
-
+        values = super(UserType, cls).deserialize_safe(byts, protocol_version)
         if cls.mapped_class:
             return cls.mapped_class(**dict(zip(cls.fieldnames, values)))
         else:

From e160ec11d7a20b1757404e66c3e20c74676c7f56 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 18:18:19 +0100
Subject: [PATCH 31/70] Abstract over CPython tuple API

---
 cassandra/tuple.pxd | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 cassandra/tuple.pxd

diff --git a/cassandra/tuple.pxd b/cassandra/tuple.pxd
new file mode 100644
index 00000000..185e8364
--- /dev/null
+++ b/cassandra/tuple.pxd
@@ -0,0 +1,27 @@
+from cpython.tuple cimport (
+        PyTuple_New,
+        # Return value: New reference.
+        # Return a new tuple object of size len, or NULL on failure.
+        PyTuple_SET_ITEM,
+        # Like PyTuple_SetItem(), but does no error checking, and should
+        # only be used to fill in brand new tuples. Note: This function
+        # ``steals'' a reference to o.
+        )
+
+from cpython.ref cimport (
+        Py_INCREF
+        # void Py_INCREF(object o)
+        #     Increment the reference count for object o. The object must not
+        #     be NULL; if you aren't sure that it isn't NULL, use
+        #     Py_XINCREF().
+        )
+
+cdef inline tuple tuple_new(Py_ssize_t n):
+    """Allocate a new tuple object"""
+    return PyTuple_New(n)
+
+cdef inline void tuple_set(tuple tup, Py_ssize_t idx, object item):
+    """Insert new object into tuple. No item must have been set yet."""
+    # PyTuple_SET_ITEM steals a reference, so we need to INCREF
+    Py_INCREF(item)
+    PyTuple_SET_ITEM(tup, idx, item)

From 9e3dbcb034282c22d9c836eab19eec5bc54eeb35 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 18:21:50 +0100
Subject: [PATCH 32/70] Use cleaner tuple API

---
 cassandra/deserializers.pyx | 10 +++-------
 cassandra/objparser.pyx     | 24 +++---------------------
 2 files changed, 6 insertions(+), 28 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 3501428d..17f28c20 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -8,9 +8,7 @@ from cassandra.buffer cimport Buffer, to_bytes
 from cassandra.parsing cimport ParseDesc, RowParser
 
 from cython.view cimport array as cython_array
-from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM
-from cpython.ref cimport Py_INCREF
-
+from cassandra.tuple cimport tuple_new, tuple_set
 
 import socket
 import inspect
@@ -304,7 +302,7 @@ cdef class DesTupleType(_DesParameterizedType):
     cdef deserialize(self, Buffer *buf, int protocol_version):
         cdef Py_ssize_t i, p
         cdef int32_t itemlen
-        cdef tuple res = PyTuple_New(self.tuple_len)
+        cdef tuple res = tuple_new(self.tuple_len)
         cdef Buffer item_buf
         cdef Deserializer deserializer
 
@@ -324,9 +322,7 @@ cdef class DesTupleType(_DesParameterizedType):
                     item = deserializer.deserialize(&item_buf, protocol_version)
                     p += itemlen
 
-            # Insert new object into tuple (PyTuple_SET_ITEM steals a reference)
-            Py_INCREF(item)
-            PyTuple_SET_ITEM(res, i, item)
+            tuple_set(res, i, item)
 
         return res
 
diff --git a/cassandra/objparser.pyx b/cassandra/objparser.pyx
index bf251942..d4642cbd 100644
--- a/cassandra/objparser.pyx
+++ b/cassandra/objparser.pyx
@@ -1,26 +1,9 @@
 include "ioutils.pyx"
 
-from cpython.tuple cimport (
-        PyTuple_New,
-        # Return value: New reference.
-        # Return a new tuple object of size len, or NULL on failure.
-        PyTuple_SET_ITEM,
-        # Like PyTuple_SetItem(), but does no error checking, and should
-        # only be used to fill in brand new tuples. Note: This function
-        # ``steals'' a reference to o.
-        )
-
-from cpython.ref cimport (
-        Py_INCREF
-        # void Py_INCREF(object o)
-        #     Increment the reference count for object o. The object must not
-        #     be NULL; if you aren't sure that it isn't NULL, use
-        #     Py_XINCREF().
-        )
-
 from cassandra.bytesio cimport BytesIOReader
 from cassandra.deserializers cimport Deserializer
 from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser
+from cassandra.tuple cimport tuple_new, tuple_set
 
 
 cdef class ListParser(ColumnParser):
@@ -60,7 +43,7 @@ cdef class TupleRowParser(RowParser):
         cdef Buffer buf
         cdef Py_ssize_t i, rowsize = desc.rowsize
         cdef Deserializer deserializer
-        cdef tuple res = PyTuple_New(desc.rowsize)
+        cdef tuple res = tuple_new(desc.rowsize)
 
         for i in range(rowsize):
             # Read the next few bytes
@@ -74,7 +57,6 @@ cdef class TupleRowParser(RowParser):
                 val = deserializer.deserialize(&buf, desc.protocol_version)
 
             # Insert new object into tuple
-            Py_INCREF(val)
-            PyTuple_SET_ITEM(res, i, val)
+            tuple_set(res, i, val)
 
         return res

From a7887a17ebebcc69b713768f1d828f0143f0d8bf Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 18:31:44 +0100
Subject: [PATCH 33/70] Composite type deserialization

---
 cassandra/deserializers.pyx | 64 +++++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 17f28c20..9dc37fe2 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -148,6 +148,7 @@ cdef class _DesParameterizedType(Deserializer):
     cdef object adapter
     cdef object subtypes
     cdef Deserializer[::1] deserializers
+    cdef Py_ssize_t subtypes_len
 
     def __init__(self, cqltype):
         assert cqltype.subtypes and len(cqltype.subtypes) == 1
@@ -287,22 +288,16 @@ cdef _deserialize_map(itemlen_t dummy_version,
     return themap
 
 #--------------------------------------------------------------------------
-# Tuple deserialization
+# Tuple and UserType deserialization
 
 cdef class DesTupleType(_DesParameterizedType):
 
     # TODO: Use TupleRowParser to parse these tuples
 
-    cdef Py_ssize_t tuple_len
-
-    def __init__(self, cqltype):
-        super().__init__(cqltype)
-        self.tuple_len = len(cqltype.subtypes)
-
     cdef deserialize(self, Buffer *buf, int protocol_version):
         cdef Py_ssize_t i, p
         cdef int32_t itemlen
-        cdef tuple res = tuple_new(self.tuple_len)
+        cdef tuple res = tuple_new(self.subtypes_len)
         cdef Buffer item_buf
         cdef Deserializer deserializer
 
@@ -310,7 +305,7 @@ cdef class DesTupleType(_DesParameterizedType):
 
         p = 0
         values = []
-        for i in range(self.tuple_len):
+        for i in range(self.subtypes_len):
             item = None
             if p != buf.size:
                 itemlen = int32_unpack(buf.ptr + p)
@@ -326,6 +321,48 @@ cdef class DesTupleType(_DesParameterizedType):
 
         return res
 
+
+cdef class DesUserType(DesTupleType):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        typ = self.cqltype
+        values = DesTupleType.deserialize(self, buf, protocol_version)
+        if typ.mapped_class:
+            return typ.mapped_class(**dict(zip(typ.fieldnames, values)))
+        else:
+            return typ.tuple_type(*values)
+
+#--------------------------------------------------------------------------
+# CompositeType
+
+cdef class DesCompositeType(_DesParameterizedType):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        cdef Py_ssize_t i
+        cdef Buffer elem_buf
+        cdef int16_t element_length
+        cdef Deserializer deserializer
+        cdef tuple res = tuple_new(self.subtypes_len)
+
+        for i in range(self.subtypes_len):
+            if not buf.size:
+                # CompositeType can have missing elements at the end
+                break
+
+            element_length = uint16_unpack(buf.ptr)
+            elem_buf.ptr = buf.ptr + 2
+            elem_buf.size = element_length
+
+            # skip element length, element, and the EOC (one byte)
+            buf.ptr = buf.ptr + 2 + element_length + 1
+            buf.size = buf.size - (2 + element_length + 1)
+            deserializer = self.deserializers[i]
+            item = deserializer.deserialize(&elem_buf, protocol_version)
+            tuple_set(res, i, item)
+
+        return res
+
+
+DesDynamicCompositeType = DesCompositeType
+
 #--------------------------------------------------------------------------
 # Generic deserialization
 
@@ -363,8 +400,17 @@ cpdef Deserializer find_deserializer(cqltype):
         return DesSetType
     elif issubclass(cqltype, cqltypes.MapType):
         return DesMapType
+    elif issubclass(cqltype, cqltypes.UserType):
+        # UserType is a subclass of TupleType, so should precede it
+        return DesUserType
     elif issubclass(cqltype, cqltypes.TupleType):
         return DesTupleType
+    elif issubclass(cqltype, cqltypes.DynamicCompositeType):
+        # DynamicCompositeType is a subclass of CompositeType, so should precede it
+        return DesDynamicCompositeType
+    elif issubclass(cqltype, cqltypes.CompositeType):
+        return DesCompositeType
+
     return GenericDeserializer(cqltype)
 
 

From 74fa1ad4736c44d2ff81c7651929ac26d87dad68 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 18:37:12 +0100
Subject: [PATCH 34/70] Deserialization for ReveredType and FrozenType

---
 cassandra/deserializers.pyx | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 9dc37fe2..42890580 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -157,10 +157,8 @@ cdef class _DesParameterizedType(Deserializer):
         self.subtypes = cqltype.subtypes
         self.deserializers = make_deserializers(cqltype.subtypes)
 
-#--------------------------------------------------------------------------
-# List and set deserialization
 
-cdef class DesListType(_DesParameterizedType):
+cdef class _DesSingleParamType(_DesParameterizedType):
 
     cdef Deserializer deserializer
 
@@ -168,6 +166,11 @@ cdef class DesListType(_DesParameterizedType):
         super().__init__(cqltype)
         self.deserializer = self.deserializers[0]
 
+
+#--------------------------------------------------------------------------
+# List and set deserialization
+
+cdef class DesListType(_DesSingleParamType):
     cdef deserialize(self, Buffer *buf, int protocol_version):
         cdef uint16_t v2_and_below = 2
         cdef int32_t v3_and_above = 3
@@ -288,7 +291,6 @@ cdef _deserialize_map(itemlen_t dummy_version,
     return themap
 
 #--------------------------------------------------------------------------
-# Tuple and UserType deserialization
 
 cdef class DesTupleType(_DesParameterizedType):
 
@@ -331,8 +333,6 @@ cdef class DesUserType(DesTupleType):
         else:
             return typ.tuple_type(*values)
 
-#--------------------------------------------------------------------------
-# CompositeType
 
 cdef class DesCompositeType(_DesParameterizedType):
     cdef deserialize(self, Buffer *buf, int protocol_version):
@@ -363,6 +363,16 @@ cdef class DesCompositeType(_DesParameterizedType):
 
 DesDynamicCompositeType = DesCompositeType
 
+
+cdef class DesReversedType(_DesSingleParamType):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        return self.deserializer.deserialize(buf, protocol_version)
+
+
+cdef class DesFrozenType(_DesSingleParamType):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        return self.deserializer.deserialize(buf, protocol_version)
+
 #--------------------------------------------------------------------------
 # Generic deserialization
 
@@ -410,6 +420,10 @@ cpdef Deserializer find_deserializer(cqltype):
         return DesDynamicCompositeType
     elif issubclass(cqltype, cqltypes.CompositeType):
         return DesCompositeType
+    elif issubclass(cqltype, cqltypes.ReversedType):
+        return DesReversedType
+    elif issubclass(cqltype, cqltypes.FrozenType):
+        return DesFrozenType
 
     return GenericDeserializer(cqltype)
 

From ddebc448529372a0b65a0d99b0cc50844a1d250c Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 18:57:32 +0100
Subject: [PATCH 35/70] Minor code cleanup

---
 cassandra/cython_utils.pyx | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/cassandra/cython_utils.pyx b/cassandra/cython_utils.pyx
index fe4fbab9..677b8009 100644
--- a/cassandra/cython_utils.pyx
+++ b/cassandra/cython_utils.pyx
@@ -3,18 +3,12 @@ Duplicate module of util.py, with some accelerated functions
 used for deserialization.
 """
 
-# from __future__ import with_statement
-
 from cpython.datetime cimport timedelta_new
     # cdef inline object timedelta_new(int days, int seconds, int useconds)
     # Create timedelta object using DateTime CAPI factory function.
     # Note, there are no range checks for any of the arguments.
 
-import calendar
 import datetime
-import random
-import six
-import uuid
 import sys
 
 DATETIME_EPOC = datetime.datetime(1970, 1, 1)
@@ -24,4 +18,3 @@ is_little_endian = sys.byteorder == 'little'
 
 cdef datetime_from_timestamp(timestamp):
     return DATETIME_EPOC + timedelta_new(0, timestamp, 0)
-

From 2f4a2d480fd9e97982a5b0dfc8ebfcc7b3472d19 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 21:45:07 +0100
Subject: [PATCH 36/70] Take more care with empty and negative sizes of binary
 data

---
 cassandra/cython_protocol_handler.pyx | 73 --------------------------
 cassandra/deserializers.pxd           | 21 ++++++++
 cassandra/deserializers.pyx           | 75 ++++++++++++++++-----------
 cassandra/ioutils.pyx                 |  2 +-
 cassandra/numpyparser.pyx             |  6 ++-
 cassandra/objparser.pyx               | 11 ++--
 6 files changed, 74 insertions(+), 114 deletions(-)
 delete mode 100644 cassandra/cython_protocol_handler.pyx

diff --git a/cassandra/cython_protocol_handler.pyx b/cassandra/cython_protocol_handler.pyx
deleted file mode 100644
index 629ce887..00000000
--- a/cassandra/cython_protocol_handler.pyx
+++ /dev/null
@@ -1,73 +0,0 @@
-# -- cython: profile=True
-
-from cassandra.protocol import ResultMessage, ProtocolHandler
-
-from cassandra.parsing cimport ParseDesc, ColumnParser
-from cassandra.deserializers import make_deserializers
-from cassandra.objparser import ListParser
-
-
-include "ioutils.pyx"
-
-
-def make_recv_results_rows(ColumnParser colparser):
-    def recv_results_rows(cls, f, int protocol_version, user_type_map):
-        """
-        Parse protocol data given as a BytesIO f into a set of columns (e.g. list of tuples)
-        This is used as the recv_results_rows method of (Fast)ResultMessage
-        """
-        paging_state, column_metadata = cls.recv_results_metadata(f, user_type_map)
-
-        colnames = [c[2] for c in column_metadata]
-        coltypes = [c[3] for c in column_metadata]
-
-        desc = ParseDesc(colnames, coltypes, make_deserializers(coltypes),
-                         protocol_version)
-        reader = BytesIOReader(f.read())
-        parsed_rows = colparser.parse_rows(reader, desc)
-
-        return (paging_state, (colnames, parsed_rows))
-
-    return recv_results_rows
-
-
-def make_protocol_handler(colparser=ListParser()):
-    """
-    Given a column parser to deserialize ResultMessages, return a suitable
-    Cython-based protocol handler.
-
-    There are three Cython-based protocol handlers (least to most performant):
-
-        1. objparser.ListParser
-            this parser decodes result messages into a list of tuples
-
-        2. objparser.LazyParser
-            this parser decodes result messages lazily by returning an iterator
-
-        3. numpyparser.NumPyParser
-            this parser decodes result messages into NumPy arrays
-
-    The default is to use objparser.ListParser
-    """
-    # TODO: It may be cleaner to turn ProtocolHandler and ResultMessage into
-    # TODO:     instances and use methods instead of class methods
-
-    class FastResultMessage(ResultMessage):
-        """
-        Cython version of Result Message that has a faster implementation of
-        recv_results_row.
-        """
-        # type_codes = ResultMessage.type_codes.copy()
-        code_to_type = dict((v, k) for k, v in ResultMessage.type_codes.items())
-        recv_results_rows = classmethod(make_recv_results_rows(colparser))
-
-    class CythonProtocolHandler(ProtocolHandler):
-        """
-        Use FastResultMessage to decode query result message messages.
-        """
-
-        my_opcodes = ProtocolHandler.message_types_by_opcode.copy()
-        my_opcodes[FastResultMessage.opcode] = FastResultMessage
-        message_types_by_opcode = my_opcodes
-
-    return CythonProtocolHandler
diff --git a/cassandra/deserializers.pxd b/cassandra/deserializers.pxd
index 882d19d1..5b820061 100644
--- a/cassandra/deserializers.pxd
+++ b/cassandra/deserializers.pxd
@@ -3,5 +3,26 @@
 from cassandra.buffer cimport Buffer
 
 cdef class Deserializer:
+    # The cqltypes._CassandraType corresponding to this deserializer
+    cdef object cqltype
+
+    # String may be empty, whereas other values may not be.
+    # Other values may be NULL, in which case the integer length
+    # of the binary data is negative. However, non-string types
+    # may also return a zero length for legacy reasons
+    # (see http://code.metager.de/source/xref/apache/cassandra/doc/native_protocol_v3.spec
+    # paragraph 6)
+    cdef bint empty_binary_ok
+
     cdef deserialize(self, Buffer *buf, int protocol_version)
     # cdef deserialize(self, CString byts, protocol_version)
+
+
+cdef inline object from_binary(Deserializer deserializer,
+                               Buffer *buf,
+                               int protocol_version):
+    if buf.size <= 0 and not deserializer.empty_binary_ok:
+        return _ret_empty(deserializer, buf.size)
+    return deserializer.deserialize(buf, protocol_version)
+
+cdef _ret_empty(Deserializer deserializer, Py_ssize_t buf_size)
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 42890580..8924e13c 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -21,6 +21,12 @@ from cassandra import util
 
 
 cdef class Deserializer:
+    """Cython-based deserializer class for a cqltype"""
+
+    def __init__(self, cqltype):
+        self.cqltype = cqltype
+        self.empty_binary_ok = False
+
     cdef deserialize(self, Buffer *buf, int protocol_version):
         raise NotImplementedError
 
@@ -144,25 +150,21 @@ cdef class DesVarcharType(DesUTF8Type):
 
 cdef class _DesParameterizedType(Deserializer):
 
-    cdef object cqltype
-    cdef object adapter
     cdef object subtypes
     cdef Deserializer[::1] deserializers
     cdef Py_ssize_t subtypes_len
 
     def __init__(self, cqltype):
-        assert cqltype.subtypes and len(cqltype.subtypes) == 1
-        self.cqltype = cqltype
-        self.adapter = cqltype.adapter
+        super().__init__(cqltype)
         self.subtypes = cqltype.subtypes
         self.deserializers = make_deserializers(cqltype.subtypes)
 
 
 cdef class _DesSingleParamType(_DesParameterizedType):
-
     cdef Deserializer deserializer
 
     def __init__(self, cqltype):
+        assert cqltype.subtypes and len(cqltype.subtypes) == 1, cqltype.subtypes
         super().__init__(cqltype)
         self.deserializer = self.deserializers[0]
 
@@ -182,7 +184,7 @@ cdef class DesListType(_DesSingleParamType):
             result = _deserialize_list_or_set[uint16_t](
                 v2_and_below, buf, protocol_version, self.deserializer)
 
-        return self.adapter(result)
+        return self.cqltype.adapter(result)
 
 
 DesSetType = DesListType
@@ -214,7 +216,7 @@ cdef list _deserialize_list_or_set(itemlen_t dummy_version,
         sub_buf.ptr = buf.ptr + p
         sub_buf.size = itemlen
         p += itemlen
-        result.append(deserializer.deserialize(&sub_buf, protocol_version))
+        result.append(from_binary(deserializer, &sub_buf, protocol_version))
 
     return result
 
@@ -284,8 +286,8 @@ cdef _deserialize_map(itemlen_t dummy_version,
         val_buf.ptr = buf.ptr + p
         val_buf.size = val_len
         p += val_len
-        key = key_deserializer.deserialize(&key_buf, protocol_version)
-        val = val_deserializer.deserialize(&val_buf, protocol_version)
+        key = from_binary(key_deserializer, &key_buf, protocol_version)
+        val = from_binary(val_deserializer, &val_buf, protocol_version)
         themap._insert_unchecked(key, to_bytes(&key_buf), val)
 
     return themap
@@ -316,7 +318,7 @@ cdef class DesTupleType(_DesParameterizedType):
                     item_buf.ptr = buf.ptr + p
                     item_buf.size = itemlen
                     deserializer = self.deserializers[i]
-                    item = deserializer.deserialize(&item_buf, protocol_version)
+                    item = from_binary(deserializer, &item_buf, protocol_version)
                     p += itemlen
 
             tuple_set(res, i, item)
@@ -355,7 +357,7 @@ cdef class DesCompositeType(_DesParameterizedType):
             buf.ptr = buf.ptr + 2 + element_length + 1
             buf.size = buf.size - (2 + element_length + 1)
             deserializer = self.deserializers[i]
-            item = deserializer.deserialize(&elem_buf, protocol_version)
+            item = from_binary(deserializer, &elem_buf, protocol_version)
             tuple_set(res, i, item)
 
         return res
@@ -366,12 +368,26 @@ DesDynamicCompositeType = DesCompositeType
 
 cdef class DesReversedType(_DesSingleParamType):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return self.deserializer.deserialize(buf, protocol_version)
+        return from_binary(self.deserializer, buf, protocol_version)
 
 
 cdef class DesFrozenType(_DesSingleParamType):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return self.deserializer.deserialize(buf, protocol_version)
+        return from_binary(self.deserializer, buf, protocol_version)
+
+#--------------------------------------------------------------------------
+
+cdef _ret_empty(Deserializer deserializer, Py_ssize_t buf_size):
+    """
+    Decide whether to return None or EMPTY when a buffer size is
+    zero or negative. This is used by from_binary in deserializers.pxd.
+    """
+    if buf_size < 0:
+        return None
+    elif deserializer.cqltype.support_empty_values:
+        return cqltypes.EMPTY
+    else:
+        return None
 
 #--------------------------------------------------------------------------
 # Generic deserialization
@@ -381,11 +397,6 @@ cdef class GenericDeserializer(Deserializer):
     Wrap a generic datatype for deserialization
     """
 
-    cdef object cqltype
-
-    def __init__(self, cqltype):
-        self.cqltype = cqltype
-
     cdef deserialize(self, Buffer *buf, int protocol_version):
         return self.cqltype.deserialize(to_bytes(buf), protocol_version)
 
@@ -401,31 +412,33 @@ def make_deserializers(cqltypes):
 cpdef Deserializer find_deserializer(cqltype):
     """Find a deserializer for a cqltype"""
     name = 'Des' + cqltype.__name__
+
     if name in globals():
-        deserializer_cls =  globals()[name]
-        deserializer_cls()
+        cls = globals()[name]
     elif issubclass(cqltype, cqltypes.ListType):
-        return DesListType
+        cls = DesListType
     elif issubclass(cqltype, cqltypes.SetType):
-        return DesSetType
+        cls = DesSetType
     elif issubclass(cqltype, cqltypes.MapType):
-        return DesMapType
+        cls = DesMapType
     elif issubclass(cqltype, cqltypes.UserType):
         # UserType is a subclass of TupleType, so should precede it
-        return DesUserType
+        cls = DesUserType
     elif issubclass(cqltype, cqltypes.TupleType):
-        return DesTupleType
+        cls = DesTupleType
     elif issubclass(cqltype, cqltypes.DynamicCompositeType):
         # DynamicCompositeType is a subclass of CompositeType, so should precede it
-        return DesDynamicCompositeType
+        cls = DesDynamicCompositeType
     elif issubclass(cqltype, cqltypes.CompositeType):
-        return DesCompositeType
+        cls = DesCompositeType
     elif issubclass(cqltype, cqltypes.ReversedType):
-        return DesReversedType
+        cls = DesReversedType
     elif issubclass(cqltype, cqltypes.FrozenType):
-        return DesFrozenType
+        cls = DesFrozenType
+    else:
+        cls = GenericDeserializer
 
-    return GenericDeserializer(cqltype)
+    return cls(cqltype)
 
 
 def obj_array(list objs):
diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx
index 0d6da6e4..d5aeff6c 100644
--- a/cassandra/ioutils.pyx
+++ b/cassandra/ioutils.pyx
@@ -12,7 +12,7 @@ cdef inline int get_buf(BytesIOReader reader, Buffer *buf_out) except -1:
     """
     cdef Py_ssize_t raw_val_size = read_int(reader)
     if raw_val_size < 0:
-        raise ValueError("Expected positive item size")
+        raw_val_size = 0
 
     buf_out.ptr = reader.read(raw_val_size)
     buf_out.size = raw_val_size
diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 7be86400..8499d938 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -17,7 +17,7 @@ from libc.stdint cimport uint64_t
 from cpython.ref cimport Py_INCREF, PyObject
 
 from cassandra.bytesio cimport BytesIOReader
-from cassandra.deserializers cimport Deserializer
+from cassandra.deserializers cimport Deserializer, from_binary
 from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser
 from cassandra import cqltypes
 from cassandra.util import is_little_endian
@@ -125,9 +125,11 @@ cdef inline int unpack_row(
         get_buf(reader, &buf)
         arr = arrays[i]
 
+        if buf.size == 0:
+            raise ValueError("Cannot handle NULL value")
         if arr.is_object:
             deserializer = desc.deserializers[i]
-            val = deserializer.deserialize(&buf, desc.protocol_version)
+            val = from_binary(deserializer, &buf, desc.protocol_version)
             Py_INCREF(val)
             (<PyObject **> arr.buf_ptr)[0] = <PyObject *> val
         else:
diff --git a/cassandra/objparser.pyx b/cassandra/objparser.pyx
index d4642cbd..8aca1427 100644
--- a/cassandra/objparser.pyx
+++ b/cassandra/objparser.pyx
@@ -1,7 +1,7 @@
 include "ioutils.pyx"
 
 from cassandra.bytesio cimport BytesIOReader
-from cassandra.deserializers cimport Deserializer
+from cassandra.deserializers cimport Deserializer, from_binary
 from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser
 from cassandra.tuple cimport tuple_new, tuple_set
 
@@ -49,12 +49,9 @@ cdef class TupleRowParser(RowParser):
             # Read the next few bytes
             get_buf(reader, &buf)
 
-            if buf.size == 0:
-                val = None
-            else:
-                # Deserialize bytes to python object
-                deserializer = desc.deserializers[i]
-                val = deserializer.deserialize(&buf, desc.protocol_version)
+            # Deserialize bytes to python object
+            deserializer = desc.deserializers[i]
+            val = from_binary(deserializer, &buf, desc.protocol_version)
 
             # Insert new object into tuple
             tuple_set(res, i, val)

From 7ce8e3a3c17450589dc253b3387053cd8c6efa5e Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 21:45:49 +0100
Subject: [PATCH 37/70] Use Cython-based deserializers whenever available

---
 cassandra/cython_deps.py                      |  5 ++
 cassandra/protocol.py                         | 65 ++++++++++++++++++-
 cassandra/rowparser.pyx                       | 26 ++++++++
 .../standard/test_cython_protocol_handlers.py | 18 ++---
 4 files changed, 102 insertions(+), 12 deletions(-)
 create mode 100644 cassandra/cython_deps.py
 create mode 100644 cassandra/rowparser.pyx

diff --git a/cassandra/cython_deps.py b/cassandra/cython_deps.py
new file mode 100644
index 00000000..41516426
--- /dev/null
+++ b/cassandra/cython_deps.py
@@ -0,0 +1,5 @@
+try:
+    from cassandra.rowparser import make_recv_results_rows
+    HAVE_CYTHON = True
+except ImportError:
+    HAVE_CYTHON = False
\ No newline at end of file
diff --git a/cassandra/protocol.py b/cassandra/protocol.py
index a6ce22ec..de8a464d 100644
--- a/cassandra/protocol.py
+++ b/cassandra/protocol.py
@@ -40,6 +40,7 @@ from cassandra.cqltypes import (AsciiType, BytesType, BooleanType,
                                 TupleType, lookup_casstype, SimpleDateType,
                                 TimeType, ByteType, ShortType)
 from cassandra.policies import WriteType
+from cassandra.cython_deps import HAVE_CYTHON
 from cassandra import util
 
 log = logging.getLogger(__name__)
@@ -69,10 +70,16 @@ _message_types_by_opcode = {}
 
 _UNSET_VALUE = object()
 
+def register_class(cls):
+    _message_types_by_opcode[cls.opcode] = cls
+
+def get_registered_classes():
+    return _message_types_by_opcode.copy()
+
 class _RegisterMessageType(type):
     def __init__(cls, name, bases, dct):
         if not name.startswith('_'):
-            _message_types_by_opcode[cls.opcode] = cls
+            register_class(cls)
 
 
 @six.add_metaclass(_RegisterMessageType)
@@ -987,6 +994,62 @@ class ProtocolHandler(object):
         return msg
 
 
+def cython_protocol_handler(colparser):
+    """
+    Given a column parser to deserialize ResultMessages, return a suitable
+    Cython-based protocol handler.
+
+    There are three Cython-based protocol handlers (least to most performant):
+
+        1. objparser.ListParser
+            this parser decodes result messages into a list of tuples
+
+        2. objparser.LazyParser
+            this parser decodes result messages lazily by returning an iterator
+
+        3. numpyparser.NumPyParser
+            this parser decodes result messages into NumPy arrays
+
+    The default is to use objparser.ListParser
+    """
+    # TODO: It may be cleaner to turn ProtocolHandler and ResultMessage into
+    # TODO:     instances and use methods instead of class methods
+    from cassandra.rowparser import make_recv_results_rows
+
+    class FastResultMessage(ResultMessage):
+        """
+        Cython version of Result Message that has a faster implementation of
+        recv_results_row.
+        """
+        # type_codes = ResultMessage.type_codes.copy()
+        code_to_type = dict((v, k) for k, v in ResultMessage.type_codes.items())
+        recv_results_rows = classmethod(make_recv_results_rows(colparser))
+
+    class CythonProtocolHandler(ProtocolHandler):
+        """
+        Use FastResultMessage to decode query result message messages.
+        """
+
+        my_opcodes = ProtocolHandler.message_types_by_opcode.copy()
+        my_opcodes[FastResultMessage.opcode] = FastResultMessage
+        message_types_by_opcode = my_opcodes
+
+    return CythonProtocolHandler
+
+
+if HAVE_CYTHON:
+    from cassandra.objparser import ListParser, LazyParser
+    from cassandra.numpyparser import NumpyParser
+
+    ProtocolHandler = cython_protocol_handler(ListParser())
+    LazyProtocolHandler = cython_protocol_handler(LazyParser())
+    NumpyProtocolHandler = cython_protocol_handler(NumpyParser())
+else:
+    # Use Python-based ProtocolHandler
+    LazyProtocolHandler = None
+    NumpyProtocolHandler = None
+
+
 def read_byte(f):
     return int8_unpack(f.read(1))
 
diff --git a/cassandra/rowparser.pyx b/cassandra/rowparser.pyx
new file mode 100644
index 00000000..1c855769
--- /dev/null
+++ b/cassandra/rowparser.pyx
@@ -0,0 +1,26 @@
+# -- cython: profile=True
+
+from cassandra.parsing cimport ParseDesc, ColumnParser
+from cassandra.deserializers import make_deserializers
+
+include "ioutils.pyx"
+
+def make_recv_results_rows(ColumnParser colparser):
+    def recv_results_rows(cls, f, int protocol_version, user_type_map):
+        """
+        Parse protocol data given as a BytesIO f into a set of columns (e.g. list of tuples)
+        This is used as the recv_results_rows method of (Fast)ResultMessage
+        """
+        paging_state, column_metadata = cls.recv_results_metadata(f, user_type_map)
+
+        colnames = [c[2] for c in column_metadata]
+        coltypes = [c[3] for c in column_metadata]
+
+        desc = ParseDesc(colnames, coltypes, make_deserializers(coltypes),
+                         protocol_version)
+        reader = BytesIOReader(f.read())
+        parsed_rows = colparser.parse_rows(reader, desc)
+
+        return (paging_state, (colnames, parsed_rows))
+
+    return recv_results_rows
diff --git a/tests/integration/standard/test_cython_protocol_handlers.py b/tests/integration/standard/test_cython_protocol_handlers.py
index 059c9317..ba75cf72 100644
--- a/tests/integration/standard/test_cython_protocol_handlers.py
+++ b/tests/integration/standard/test_cython_protocol_handlers.py
@@ -8,19 +8,15 @@ except ImportError:
     import unittest
 
 from cassandra.cluster import Cluster
+from cassandra.protocol import ProtocolHandler, LazyProtocolHandler, NumpyProtocolHandler
 from tests.integration import use_singledc, PROTOCOL_VERSION
 from tests.integration.datatype_utils import update_datatypes
 from tests.integration.standard.utils import create_table_with_all_types, get_all_primitive_params
-from six import next
 
-try:
-    from cassandra.cython_protocol_handler import make_protocol_handler
-except ImportError as e:
+from cassandra.cython_deps import HAVE_CYTHON
+if not HAVE_CYTHON:
     raise unittest.SkipTest("Skipping test, not compiled with Cython enabled")
 
-from cassandra.numpyparser import NumpyParser
-from cassandra.objparser import ListParser, LazyParser
-
 
 def setup_module():
     use_singledc()
@@ -47,20 +43,20 @@ class CustomProtocolHandlerTest(unittest.TestCase):
         """
         Test Cython-based parser that returns a list of tuples
         """
-        self.cython_parser(ListParser())
+        self.cython_parser(ProtocolHandler)
 
     def test_cython_lazy_parser(self):
         """
         Test Cython-based parser that returns a list of tuples
         """
-        self.cython_parser(LazyParser())
+        self.cython_parser(LazyProtocolHandler)
 
-    def cython_parser(self, colparser):
+    def cython_parser(self, protocol_handler):
         cluster = Cluster(protocol_version=PROTOCOL_VERSION)
         session = cluster.connect(keyspace="testspace")
 
         # use our custom protocol handler
-        session.client_protocol_handler = make_protocol_handler(colparser)
+        session.client_protocol_handler = protocol_handler
         # session.row_factory = tuple_factory
 
         # verify data

From e4e98e7e9fb30dac5815c05c0d008ab37dec2986 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Thu, 6 Aug 2015 21:59:07 +0100
Subject: [PATCH 38/70] Some small optimizations to deserializers

---
 cassandra/deserializers.pyx | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 8924e13c..008e49cd 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -51,7 +51,9 @@ cdef class DesUUIDType(Deserializer):
 
 cdef class DesBooleanType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return bool(int8_unpack(buf.ptr))
+        if int8_unpack(buf.ptr):
+            return True
+        return False
 
 
 cdef class DesByteType(Deserializer):
@@ -184,10 +186,11 @@ cdef class DesListType(_DesSingleParamType):
             result = _deserialize_list_or_set[uint16_t](
                 v2_and_below, buf, protocol_version, self.deserializer)
 
-        return self.cqltype.adapter(result)
+        return result
 
-
-DesSetType = DesListType
+cdef class DesSetType(DesListType):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        return util.sortedset(DesListType.deserialize(self, buf, protocol_version))
 
 
 ctypedef fused itemlen_t:

From a3c73f6670e34ec96423b213b2a14e78fb96ec24 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Fri, 7 Aug 2015 09:22:33 +0100
Subject: [PATCH 39/70] Forgot to initialize datetime C API

---
 cassandra/cython_utils.pyx  | 15 +++++++++++----
 cassandra/deserializers.pyx |  2 +-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/cassandra/cython_utils.pyx b/cassandra/cython_utils.pyx
index 677b8009..7ee385ec 100644
--- a/cassandra/cython_utils.pyx
+++ b/cassandra/cython_utils.pyx
@@ -3,10 +3,15 @@ Duplicate module of util.py, with some accelerated functions
 used for deserialization.
 """
 
-from cpython.datetime cimport timedelta_new
-    # cdef inline object timedelta_new(int days, int seconds, int useconds)
-    # Create timedelta object using DateTime CAPI factory function.
-    # Note, there are no range checks for any of the arguments.
+from cpython.datetime cimport (
+    timedelta_new,
+        # cdef inline object timedelta_new(int days, int seconds, int useconds)
+        # Create timedelta object using DateTime CAPI factory function.
+        # Note, there are no range checks for any of the arguments.
+    import_datetime,
+        # Datetime C API initialization function.
+        # You have to call it before any usage of DateTime CAPI functions.
+    )
 
 import datetime
 import sys
@@ -16,5 +21,7 @@ DATETIME_EPOC = datetime.datetime(1970, 1, 1)
 assert sys.byteorder in ('little', 'big')
 is_little_endian = sys.byteorder == 'little'
 
+import_datetime()
+
 cdef datetime_from_timestamp(timestamp):
     return DATETIME_EPOC + timedelta_new(0, timestamp, 0)
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 008e49cd..aefd6bac 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -259,7 +259,7 @@ cdef class DesMapType(_DesParameterizedType):
                 self.key_deserializer, self.val_deserializer,
                 key_type, val_type)
 
-        return self.adapter(result)
+        return result
 
 
 cdef _deserialize_map(itemlen_t dummy_version,

From 56a25df0fce5d438a06042e7072c1ee3ddf85d43 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Fri, 7 Aug 2015 10:16:47 +0100
Subject: [PATCH 40/70] Don't lose out on microseconds when creating datetime

---
 cassandra/cython_utils.pyx                                 | 6 ++++--
 cassandra/deserializers.pyx                                | 7 ++-----
 .../cqlengine/columns/test_container_columns.py            | 3 ++-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/cassandra/cython_utils.pyx b/cassandra/cython_utils.pyx
index 7ee385ec..de87c1e0 100644
--- a/cassandra/cython_utils.pyx
+++ b/cassandra/cython_utils.pyx
@@ -23,5 +23,7 @@ is_little_endian = sys.byteorder == 'little'
 
 import_datetime()
 
-cdef datetime_from_timestamp(timestamp):
-    return DATETIME_EPOC + timedelta_new(0, timestamp, 0)
+cdef datetime_from_timestamp(double timestamp):
+    cdef int seconds = <int> timestamp
+    cdef int microseconds = (<int64_t> (timestamp * 1000000)) % 1000000
+    return DATETIME_EPOC + timedelta_new(0, seconds, microseconds)
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index aefd6bac..35667694 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -3,19 +3,16 @@
 from libc.stdint cimport int32_t, uint16_t
 
 include 'marshal.pyx'
-include 'cython_utils.pyx'
 from cassandra.buffer cimport Buffer, to_bytes
-from cassandra.parsing cimport ParseDesc, RowParser
+from cassandra.cython_utils cimport datetime_from_timestamp
 
 from cython.view cimport array as cython_array
 from cassandra.tuple cimport tuple_new, tuple_set
 
 import socket
-import inspect
 from decimal import Decimal
 from uuid import UUID
 
-from cassandra.objparser import TupleRowParser
 from cassandra import cqltypes
 from cassandra import util
 
@@ -107,7 +104,7 @@ cdef class DesCounterColumnType(DesLongType):
 
 cdef class DesDateType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        timestamp = int64_unpack(buf.ptr) / 1000.0
+        cdef double timestamp = int64_unpack(buf.ptr) / 1000.0
         return datetime_from_timestamp(timestamp)
 
 
diff --git a/tests/integration/cqlengine/columns/test_container_columns.py b/tests/integration/cqlengine/columns/test_container_columns.py
index 213c625c..ad67419c 100644
--- a/tests/integration/cqlengine/columns/test_container_columns.py
+++ b/tests/integration/cqlengine/columns/test_container_columns.py
@@ -386,7 +386,8 @@ class TestMapColumn(BaseCassEngTestCase):
         k2 = uuid4()
         now = datetime.now()
         then = now + timedelta(days=1)
-        m1 = TestMapModel.create(int_map={1: k1, 2: k2}, text_map={'now': now, 'then': then})
+        m1 = TestMapModel.create(int_map={1: k1, 2: k2},
+                                 text_map={'now': now, 'then': then})
         m2 = TestMapModel.get(partition=m1.partition)
 
         self.assertTrue(isinstance(m2.int_map, dict))

From 8462e865f7c2d089884aa061b39275323605f43a Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Fri, 7 Aug 2015 10:38:16 +0100
Subject: [PATCH 41/70] Squash little bug in cython decimal deserializer

---
 cassandra/deserializers.pyx | 7 ++++++-
 cassandra/marshal.pyx       | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 35667694..ad1e5d1a 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -36,8 +36,13 @@ cdef class DesLongType(Deserializer):
 # TODO: Use libmpdec: http://www.bytereef.org/mpdecimal/index.html
 cdef class DesDecimalType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
+        cdef Buffer varint_buf
+        varint_buf.ptr = buf.ptr + 4
+        varint_buf.size = buf.size - 4
+
         scale = int32_unpack(buf.ptr)
-        unscaled = varint_unpack(buf.ptr + 4)
+        unscaled = varint_unpack(to_bytes(&varint_buf))
+
         return Decimal('%de%d' % (unscaled, -scale))
 
 
diff --git a/cassandra/marshal.pyx b/cassandra/marshal.pyx
index 336ee1c7..0ab65c46 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/marshal.pyx
@@ -177,6 +177,7 @@ cpdef varint_unpack(term):
 # TODO: Optimize these two functions
 def varint_unpack_py3(term):
     cdef int64_t one = 1L
+
     val = int(''.join("%02x" % i for i in term), 16)
     if (term[0] & 128) != 0:
         # There is a bug in Cython (0.20 - 0.22), where if we do

From 0cec7fef76287f710bab39b3bb2888b4621a5ad3 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Fri, 7 Aug 2015 12:03:54 +0100
Subject: [PATCH 42/70] Some more small bug fixes to Cython-based deserializers

---
 cassandra/deserializers.pyx                        | 11 ++++++++++-
 tests/integration/cqlengine/query/test_queryset.py |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index ad1e5d1a..151d7747 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -95,7 +95,7 @@ cdef class DesInetAddressType(Deserializer):
         cdef bytes byts = to_bytes(buf)
 
         # TODO: optimize inet_ntop, inet_ntoa
-        if len(buf.size) == 16:
+        if buf.size == 16:
             return util.inet_ntop(socket.AF_INET6, byts)
         else:
             # util.inet_pton could also handle, but this is faster
@@ -162,6 +162,7 @@ cdef class _DesParameterizedType(Deserializer):
         super().__init__(cqltype)
         self.subtypes = cqltype.subtypes
         self.deserializers = make_deserializers(cqltype.subtypes)
+        self.subtypes_len = len(self.subtypes)
 
 
 cdef class _DesSingleParamType(_DesParameterizedType):
@@ -352,6 +353,14 @@ cdef class DesCompositeType(_DesParameterizedType):
         for i in range(self.subtypes_len):
             if not buf.size:
                 # CompositeType can have missing elements at the end
+
+                # Fill the tuple with None values and slice it
+                #
+                # (I'm not sure a tuple needs to be fully initialized before
+                #  it can be destroyed, so play it safe)
+                for j in range(i, self.subtypes_len):
+                    tuple_set(res, j, None)
+                res = res[:i]
                 break
 
             element_length = uint16_unpack(buf.ptr)
diff --git a/tests/integration/cqlengine/query/test_queryset.py b/tests/integration/cqlengine/query/test_queryset.py
index 7bb101b9..45277520 100644
--- a/tests/integration/cqlengine/query/test_queryset.py
+++ b/tests/integration/cqlengine/query/test_queryset.py
@@ -629,7 +629,7 @@ class TestMinMaxTimeUUIDFunctions(BaseCassEngTestCase):
         # test kwarg filtering
         q = TimeUUIDQueryModel.filter(partition=pk, time__lte=functions.MaxTimeUUID(midpoint))
         q = [d for d in q]
-        assert len(q) == 2
+        self.assertEqual(len(q), 2, msg="Got: %s" % q)
         datas = [d.data for d in q]
         assert '1' in datas
         assert '2' in datas

From e4e0e219ee8123e42dc032483bb3bd036535a064 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Fri, 7 Aug 2015 14:22:58 +0100
Subject: [PATCH 43/70] Forgot pxd file for cython_utils

---
 cassandra/cython_utils.pxd | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 cassandra/cython_utils.pxd

diff --git a/cassandra/cython_utils.pxd b/cassandra/cython_utils.pxd
new file mode 100644
index 00000000..d2bf7d20
--- /dev/null
+++ b/cassandra/cython_utils.pxd
@@ -0,0 +1,2 @@
+from libc.stdint cimport int64_t
+cdef datetime_from_timestamp(double timestamp)
\ No newline at end of file

From f31772e8778bc23cc3102f01c58f9fa95037284a Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Fri, 7 Aug 2015 14:55:49 +0100
Subject: [PATCH 44/70] Fix bug in integration test test_types

---
 cassandra/util.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/cassandra/util.py b/cassandra/util.py
index 4cf3879e..0e8a818b 100644
--- a/cassandra/util.py
+++ b/cassandra/util.py
@@ -493,8 +493,7 @@ except ImportError:
 
         def __init__(self, iterable=()):
             self._items = []
-            for i in iterable:
-                self.add(i)
+            self.update(iterable)
 
         def __len__(self):
             return len(self._items)
@@ -567,6 +566,10 @@ except ImportError:
             else:
                 self._items.append(item)
 
+        def update(self, iterable):
+            for i in iterable:
+                self.add(i)
+
         def clear(self):
             del self._items[:]
 

From 302d7ab1d1a517d5a5669bf25e90c371c087634e Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Fri, 7 Aug 2015 15:34:06 +0100
Subject: [PATCH 45/70] Forgot to initialize flags in string types to support
 empty strings

---
 cassandra/deserializers.pyx | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 151d7747..a0d55340 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -64,6 +64,11 @@ cdef class DesByteType(Deserializer):
 
 
 cdef class DesAsciiType(Deserializer):
+
+    def __init__(self, cqltype):
+        super().__init__(cqltype)
+        self.empty_binary_ok = True
+
     cdef deserialize(self, Buffer *buf, int protocol_version):
         if six.PY2:
             return to_bytes(buf)
@@ -144,6 +149,10 @@ cdef class DesTimeType(Deserializer):
 
 
 cdef class DesUTF8Type(Deserializer):
+    def __init__(self, cqltype):
+        super().__init__(cqltype)
+        self.empty_binary_ok = True
+
     cdef deserialize(self, Buffer *buf, int protocol_version):
         return to_bytes(buf).decode('utf8')
 

From 0baf6965204c15a27d34d4ef9973e1662f9a624e Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Fri, 7 Aug 2015 15:46:04 +0100
Subject: [PATCH 46/70] Fix use of next() in test_concurrent

---
 tests/integration/standard/test_concurrent.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/standard/test_concurrent.py b/tests/integration/standard/test_concurrent.py
index 45b73613..bf928b8e 100644
--- a/tests/integration/standard/test_concurrent.py
+++ b/tests/integration/standard/test_concurrent.py
@@ -24,6 +24,8 @@ from cassandra.query import tuple_factory, SimpleStatement
 
 from tests.integration import use_singledc, PROTOCOL_VERSION
 
+from six import next
+
 try:
     import unittest2 as unittest
 except ImportError:
@@ -151,7 +153,7 @@ class ClusterTests(unittest.TestCase):
 
             results = self.execute_concurrent_args_helper(self.session, statement, parameters, results_generator=True)
             for i in range(num_statements):
-                result = results.next()
+                result = next(results)
                 self.assertEqual((True, [(i,)]), result)
 
     def test_execute_concurrent_paged_result(self):

From f3e2295457fd19099d32f897114dc37d2dfb2269 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Fri, 7 Aug 2015 16:12:06 +0100
Subject: [PATCH 47/70] Python 3 compatibility for stress tests

---
 tests/stress_tests/test_multi_inserts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/stress_tests/test_multi_inserts.py b/tests/stress_tests/test_multi_inserts.py
index b23a29dd..12b5b70e 100644
--- a/tests/stress_tests/test_multi_inserts.py
+++ b/tests/stress_tests/test_multi_inserts.py
@@ -75,7 +75,7 @@ class StressInsertsTests(unittest.TestCase):
                     break
                 for conn in pool.get_connections():
                     if conn.in_flight > 1:
-                        print self.session.get_pool_state()
+                        print(self.session.get_pool_state())
                         leaking_connections = True
                         break
             i = i + 1

From 3b6b720c4be2a84d815a768c86ac4209603ed8cb Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Fri, 7 Aug 2015 17:06:30 +0100
Subject: [PATCH 48/70] Be more careful how optional values are decoded

---
 cassandra/deserializers.pxd                   |  7 +++-
 cassandra/ioutils.pyx                         | 13 ++++--
 .../cqlengine/query/test_updates.py           | 42 +++++++++----------
 3 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/cassandra/deserializers.pxd b/cassandra/deserializers.pxd
index 5b820061..015fda37 100644
--- a/cassandra/deserializers.pxd
+++ b/cassandra/deserializers.pxd
@@ -21,8 +21,11 @@ cdef class Deserializer:
 cdef inline object from_binary(Deserializer deserializer,
                                Buffer *buf,
                                int protocol_version):
-    if buf.size <= 0 and not deserializer.empty_binary_ok:
+    if buf.size < 0:
+        return None
+    elif buf.size == 0 and not deserializer.empty_binary_ok:
         return _ret_empty(deserializer, buf.size)
-    return deserializer.deserialize(buf, protocol_version)
+    else:
+        return deserializer.deserialize(buf, protocol_version)
 
 cdef _ret_empty(Deserializer deserializer, Py_ssize_t buf_size)
diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx
index d5aeff6c..203997e9 100644
--- a/cassandra/ioutils.pyx
+++ b/cassandra/ioutils.pyx
@@ -9,12 +9,19 @@ cdef inline int get_buf(BytesIOReader reader, Buffer *buf_out) except -1:
     """
     Get a pointer into the buffer provided by BytesIOReader for the
     next data item in the stream of values.
+
+    BEWARE:
+        If the next item has a zero negative size, the pointer will be set to NULL.
+        A negative size happens when the value is NULL in the database, whereas a
+        zero size may happen either for legacy reasons, or for data types such as
+        strings (which may be empty).
     """
     cdef Py_ssize_t raw_val_size = read_int(reader)
-    if raw_val_size < 0:
-        raw_val_size = 0
+    if raw_val_size <= 0:
+        buf_out.ptr = NULL
+    else:
+        buf_out.ptr = reader.read(raw_val_size)
 
-    buf_out.ptr = reader.read(raw_val_size)
     buf_out.size = raw_val_size
     return 0
 
diff --git a/tests/integration/cqlengine/query/test_updates.py b/tests/integration/cqlengine/query/test_updates.py
index a3b80f15..6c539012 100644
--- a/tests/integration/cqlengine/query/test_updates.py
+++ b/tests/integration/cqlengine/query/test_updates.py
@@ -52,17 +52,17 @@ class QueryUpdateTests(BaseCassEngTestCase):
 
         # sanity check
         for i, row in enumerate(TestQueryUpdateModel.objects(partition=partition)):
-            assert row.cluster == i
-            assert row.count == i
-            assert row.text == str(i)
+            self.assertEqual(row.cluster, i)
+            self.assertEqual(row.count, i)
+            self.assertEqual(row.text, str(i))
 
         # perform update
         TestQueryUpdateModel.objects(partition=partition, cluster=3).update(count=6)
 
         for i, row in enumerate(TestQueryUpdateModel.objects(partition=partition)):
-            assert row.cluster == i
-            assert row.count == (6 if i == 3 else i)
-            assert row.text == str(i)
+            self.assertEqual(row.cluster, i)
+            self.assertEqual(row.count, 6 if i == 3 else i)
+            self.assertEqual(row.text, str(i))
 
     def test_update_values_validation(self):
         """ tests calling udpate on models with values passed in """
@@ -72,9 +72,9 @@ class QueryUpdateTests(BaseCassEngTestCase):
 
         # sanity check
         for i, row in enumerate(TestQueryUpdateModel.objects(partition=partition)):
-            assert row.cluster == i
-            assert row.count == i
-            assert row.text == str(i)
+            self.assertEqual(row.cluster, i)
+            self.assertEqual(row.count, i)
+            self.assertEqual(row.text, str(i))
 
         # perform update
         with self.assertRaises(ValidationError):
@@ -98,17 +98,17 @@ class QueryUpdateTests(BaseCassEngTestCase):
 
         # sanity check
         for i, row in enumerate(TestQueryUpdateModel.objects(partition=partition)):
-            assert row.cluster == i
-            assert row.count == i
-            assert row.text == str(i)
+            self.assertEqual(row.cluster, i)
+            self.assertEqual(row.count, i)
+            self.assertEqual(row.text, str(i))
 
         # perform update
         TestQueryUpdateModel.objects(partition=partition, cluster=3).update(text=None)
 
         for i, row in enumerate(TestQueryUpdateModel.objects(partition=partition)):
-            assert row.cluster == i
-            assert row.count == i
-            assert row.text == (None if i == 3 else str(i))
+            self.assertEqual(row.cluster, i)
+            self.assertEqual(row.count, i)
+            self.assertEqual(row.text, None if i == 3 else str(i))
 
     def test_mixed_value_and_null_update(self):
         """ tests that updating a columns value, and removing another works properly """
@@ -118,17 +118,17 @@ class QueryUpdateTests(BaseCassEngTestCase):
 
         # sanity check
         for i, row in enumerate(TestQueryUpdateModel.objects(partition=partition)):
-            assert row.cluster == i
-            assert row.count == i
-            assert row.text == str(i)
+            self.assertEqual(row.cluster, i)
+            self.assertEqual(row.count, i)
+            self.assertEqual(row.text, str(i))
 
         # perform update
         TestQueryUpdateModel.objects(partition=partition, cluster=3).update(count=6, text=None)
 
         for i, row in enumerate(TestQueryUpdateModel.objects(partition=partition)):
-            assert row.cluster == i
-            assert row.count == (6 if i == 3 else i)
-            assert row.text == (None if i == 3 else str(i))
+            self.assertEqual(row.cluster, i)
+            self.assertEqual(row.count, 6 if i == 3 else i)
+            self.assertEqual(row.text, None if i == 3 else str(i))
 
     def test_counter_updates(self):
         pass

From 53b2b48f582b2079b48b5336a06aeb97089fe042 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Sat, 8 Aug 2015 12:28:53 +0100
Subject: [PATCH 49/70] Be more careful when Cython is available but NumPy is
 not

---
 cassandra/cython_deps.py   |  8 +++++++-
 cassandra/protocol.py      | 11 +++++++----
 tests/unit/cython/utils.py | 12 ++++--------
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/cassandra/cython_deps.py b/cassandra/cython_deps.py
index 41516426..fdd15464 100644
--- a/cassandra/cython_deps.py
+++ b/cassandra/cython_deps.py
@@ -2,4 +2,10 @@ try:
     from cassandra.rowparser import make_recv_results_rows
     HAVE_CYTHON = True
 except ImportError:
-    HAVE_CYTHON = False
\ No newline at end of file
+    HAVE_CYTHON = False
+
+try:
+    import numpy
+    HAVE_NUMPY = True
+except ImportError:
+    HAVE_NUMPY = False
diff --git a/cassandra/protocol.py b/cassandra/protocol.py
index de8a464d..5ebbfa5c 100644
--- a/cassandra/protocol.py
+++ b/cassandra/protocol.py
@@ -40,7 +40,7 @@ from cassandra.cqltypes import (AsciiType, BytesType, BooleanType,
                                 TupleType, lookup_casstype, SimpleDateType,
                                 TimeType, ByteType, ShortType)
 from cassandra.policies import WriteType
-from cassandra.cython_deps import HAVE_CYTHON
+from cassandra.cython_deps import HAVE_CYTHON, HAVE_NUMPY
 from cassandra import util
 
 log = logging.getLogger(__name__)
@@ -1039,14 +1039,17 @@ def cython_protocol_handler(colparser):
 
 if HAVE_CYTHON:
     from cassandra.objparser import ListParser, LazyParser
-    from cassandra.numpyparser import NumpyParser
-
     ProtocolHandler = cython_protocol_handler(ListParser())
     LazyProtocolHandler = cython_protocol_handler(LazyParser())
-    NumpyProtocolHandler = cython_protocol_handler(NumpyParser())
 else:
     # Use Python-based ProtocolHandler
     LazyProtocolHandler = None
+
+
+if HAVE_CYTHON and HAVE_NUMPY:
+    from cassandra.numpyparser import NumpyParser
+    NumpyProtocolHandler = cython_protocol_handler(NumpyParser())
+else:
     NumpyProtocolHandler = None
 
 
diff --git a/tests/unit/cython/utils.py b/tests/unit/cython/utils.py
index eea4698f..f2598c0e 100644
--- a/tests/unit/cython/utils.py
+++ b/tests/unit/cython/utils.py
@@ -1,9 +1,4 @@
-try:
-    import tests.unit.cython.dummy_module
-except ImportError:
-    have_cython = False
-else:
-    have_cython = True
+from cassandra.cython_deps import HAVE_CYTHON, HAVE_NUMPY
 
 try:
     import unittest2 as unittest
@@ -18,10 +13,11 @@ def cyimport(import_path):
     try:
         return __import__(import_path, fromlist=True)
     except ImportError:
-        if have_cython:
+        if HAVE_CYTHON:
             raise
         return None
 
 # @cythontest
 # def test_something(self): ...
-cythontest = unittest.skipUnless(have_cython, 'Cython is not available')
+cythontest = unittest.skipUnless(HAVE_CYTHON, 'Cython is not available')
+numpytest  = unittest.skipUnless(HAVE_CYTHON and HAVE_NUMPY, 'NumPy is not available')
\ No newline at end of file

From 0b81f4068bdeb9d09259b0db255660e13e12b1fd Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Sat, 8 Aug 2015 12:31:53 +0100
Subject: [PATCH 50/70] Forgot BytesDeserializer, fix small empty string issue

---
 cassandra/deserializers.pyx | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index a0d55340..83582cfc 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -16,21 +16,20 @@ from uuid import UUID
 from cassandra import cqltypes
 from cassandra import util
 
-
 cdef class Deserializer:
     """Cython-based deserializer class for a cqltype"""
 
     def __init__(self, cqltype):
         self.cqltype = cqltype
-        self.empty_binary_ok = False
+        self.empty_binary_ok = cqltype.empty_binary_ok
 
     cdef deserialize(self, Buffer *buf, int protocol_version):
         raise NotImplementedError
 
 
-cdef class DesLongType(Deserializer):
+cdef class DesBytesType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return int64_unpack(buf.ptr)
+        return to_bytes(buf)
 
 
 # TODO: Use libmpdec: http://www.bytereef.org/mpdecimal/index.html
@@ -64,11 +63,6 @@ cdef class DesByteType(Deserializer):
 
 
 cdef class DesAsciiType(Deserializer):
-
-    def __init__(self, cqltype):
-        super().__init__(cqltype)
-        self.empty_binary_ok = True
-
     cdef deserialize(self, Buffer *buf, int protocol_version):
         if six.PY2:
             return to_bytes(buf)
@@ -85,6 +79,11 @@ cdef class DesDoubleType(Deserializer):
         return double_unpack(buf.ptr)
 
 
+cdef class DesLongType(Deserializer):
+    cdef deserialize(self, Buffer *buf, int protocol_version):
+        return int64_unpack(buf.ptr)
+
+
 cdef class DesInt32Type(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
         return int32_unpack(buf.ptr)
@@ -149,10 +148,6 @@ cdef class DesTimeType(Deserializer):
 
 
 cdef class DesUTF8Type(Deserializer):
-    def __init__(self, cqltype):
-        super().__init__(cqltype)
-        self.empty_binary_ok = True
-
     cdef deserialize(self, Buffer *buf, int protocol_version):
         return to_bytes(buf).decode('utf8')
 
@@ -320,21 +315,24 @@ cdef class DesTupleType(_DesParameterizedType):
         cdef Buffer item_buf
         cdef Deserializer deserializer
 
+        # collections inside UDTs are always encoded with at least the
+        # version 3 format
         protocol_version = max(3, protocol_version)
 
         p = 0
         values = []
         for i in range(self.subtypes_len):
             item = None
-            if p != buf.size:
+            if p < buf.size:
                 itemlen = int32_unpack(buf.ptr + p)
                 p += 4
                 if itemlen >= 0:
                     item_buf.ptr = buf.ptr + p
                     item_buf.size = itemlen
+                    p += itemlen
+
                     deserializer = self.deserializers[i]
                     item = from_binary(deserializer, &item_buf, protocol_version)
-                    p += itemlen
 
             tuple_set(res, i, item)
 
@@ -423,6 +421,9 @@ cdef class GenericDeserializer(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
         return self.cqltype.deserialize(to_bytes(buf), protocol_version)
 
+    def __repr__(self):
+        return "GenericDeserializer(%s)" % (self.cqltype,)
+
 #--------------------------------------------------------------------------
 # Helper utilities
 

From 8d45880acb5834275708b358bfa4304455e04b00 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Sat, 8 Aug 2015 12:39:05 +0100
Subject: [PATCH 51/70] Python 3 compatibility for test_schema

---
 tests/integration/long/test_schema.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/long/test_schema.py b/tests/integration/long/test_schema.py
index 7da5203f..6f165cfd 100644
--- a/tests/integration/long/test_schema.py
+++ b/tests/integration/long/test_schema.py
@@ -85,11 +85,11 @@ class SchemaTests(unittest.TestCase):
 
         session = self.session
 
-        for i in xrange(30):
+        for i in range(30):
             execute_until_pass(session, "CREATE KEYSPACE test_{0} WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': 1}}".format(i))
             execute_until_pass(session, "CREATE TABLE test_{0}.cf (key int PRIMARY KEY, value int)".format(i))
 
-            for j in xrange(100):
+            for j in range(100):
                 execute_until_pass(session, "INSERT INTO test_{0}.cf (key, value) VALUES ({1}, {1})".format(i, j))
 
             execute_until_pass(session, "DROP KEYSPACE test_{0}".format(i))
@@ -102,7 +102,7 @@ class SchemaTests(unittest.TestCase):
         cluster = Cluster(protocol_version=PROTOCOL_VERSION)
         session = cluster.connect()
 
-        for i in xrange(30):
+        for i in range(30):
             try:
                 execute_until_pass(session, "CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}")
             except AlreadyExists:
@@ -111,7 +111,7 @@ class SchemaTests(unittest.TestCase):
 
             execute_until_pass(session, "CREATE TABLE test.cf (key int PRIMARY KEY, value int)")
 
-            for j in xrange(100):
+            for j in range(100):
                 execute_until_pass(session, "INSERT INTO test.cf (key, value) VALUES ({0}, {0})".format(j))
 
             execute_until_pass(session, "DROP KEYSPACE test")

From edd5463e7b56a805a1b4eaa86cc6ccdbbfc8fad2 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Sat, 8 Aug 2015 12:55:20 +0100
Subject: [PATCH 52/70] Add test to illustrate non-deterministic query test
 failure

---
 tests/integration/standard/test_query.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/integration/standard/test_query.py b/tests/integration/standard/test_query.py
index 80a0d8e2..8474e62d 100644
--- a/tests/integration/standard/test_query.py
+++ b/tests/integration/standard/test_query.py
@@ -298,8 +298,8 @@ class BatchStatementTests(unittest.TestCase):
             keys.add(result.k)
             values.add(result.v)
 
-        self.assertEqual(set(range(10)), keys)
-        self.assertEqual(set(range(10)), values)
+        self.assertEqual(set(range(10)), keys, msg=results)
+        self.assertEqual(set(range(10)), values, msg=results)
 
     def test_string_statements(self):
         batch = BatchStatement(BatchType.LOGGED)
@@ -367,6 +367,11 @@ class BatchStatementTests(unittest.TestCase):
         self.session.execute(batch)
         self.confirm_results()
 
+    def test_no_parameters_many_times(self):
+        for i in range(1000):
+            self.test_no_parameters()
+            self.session.execute("TRUNCATE test3rf.test")
+
 
 class SerialConsistencyTests(unittest.TestCase):
     def setUp(self):

From c8dfc48ff231e0b9793965790c623daa9402f482 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Sat, 8 Aug 2015 15:58:53 +0100
Subject: [PATCH 53/70] More comprehensive cython and numpy deserializer tests

---
 cassandra/numpyparser.pyx                     |   8 +-
 .../standard/test_custom_protocol_handler.py  |   5 +-
 .../standard/test_cython_protocol_handlers.py | 107 ++++++++++++++----
 tests/integration/standard/utils.py           |  31 +++--
 tests/unit/cython/utils.py                    |   1 +
 5 files changed, 111 insertions(+), 41 deletions(-)

diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 8499d938..0a4e7e3e 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -23,7 +23,7 @@ from cassandra import cqltypes
 from cassandra.util import is_little_endian
 
 import numpy as np
-
+# import pandas as pd
 
 cdef extern from "numpyFlags.h":
     # Include 'numpyFlags.h' into the generated C code to disable the
@@ -74,8 +74,10 @@ cdef class NumpyParser(ColumnParser):
         for i in range(rowcount):
             unpack_row(reader, desc, arrs)
 
-        return [make_native_byteorder(arr) for arr in arrays]
-        # return pd.DataFrame(dict(zip(desc.colnames, arrays)))
+        arrays = [make_native_byteorder(arr) for arr in arrays]
+        result = dict(zip(desc.colnames, arrays))
+        # return pd.DataFrame(result)
+        return result
 
 
 ### Helper functions to create NumPy arrays and array descriptors
diff --git a/tests/integration/standard/test_custom_protocol_handler.py b/tests/integration/standard/test_custom_protocol_handler.py
index edd066be..36965a36 100644
--- a/tests/integration/standard/test_custom_protocol_handler.py
+++ b/tests/integration/standard/test_custom_protocol_handler.py
@@ -107,10 +107,11 @@ class CustomProtocolHandlerTest(unittest.TestCase):
         session.client_protocol_handler = CustomProtocolHandlerResultMessageTracked
         session.row_factory = tuple_factory
 
-        columns_string = create_table_with_all_types("alltypes", session)
+        colnames = create_table_with_all_types("alltypes", session, 1)
+        columns_string = ", ".join(colnames)
 
         # verify data
-        params = get_all_primitive_params()
+        params = get_all_primitive_params(0)
         results = session.execute("SELECT {0} FROM alltypes WHERE primkey=0".format(columns_string))[0]
         for expected, actual in zip(params, results):
             self.assertEqual(actual, expected)
diff --git a/tests/integration/standard/test_cython_protocol_handlers.py b/tests/integration/standard/test_cython_protocol_handlers.py
index ba75cf72..985b7953 100644
--- a/tests/integration/standard/test_cython_protocol_handlers.py
+++ b/tests/integration/standard/test_cython_protocol_handlers.py
@@ -7,23 +7,25 @@ try:
 except ImportError:
     import unittest
 
+from cassandra.query import tuple_factory
 from cassandra.cluster import Cluster
 from cassandra.protocol import ProtocolHandler, LazyProtocolHandler, NumpyProtocolHandler
+
 from tests.integration import use_singledc, PROTOCOL_VERSION
 from tests.integration.datatype_utils import update_datatypes
-from tests.integration.standard.utils import create_table_with_all_types, get_all_primitive_params
-
-from cassandra.cython_deps import HAVE_CYTHON
-if not HAVE_CYTHON:
-    raise unittest.SkipTest("Skipping test, not compiled with Cython enabled")
+from tests.integration.standard.utils import (
+    create_table_with_all_types, get_all_primitive_params, get_primitive_datatypes)
 
+from tests.unit.cython.utils import cythontest, numpytest
 
 def setup_module():
     use_singledc()
     update_datatypes()
 
 
-class CustomProtocolHandlerTest(unittest.TestCase):
+class CythonProtocolHandlerTest(unittest.TestCase):
+
+    N_ITEMS = 10
 
     @classmethod
     def setUpClass(cls):
@@ -32,39 +34,96 @@ class CustomProtocolHandlerTest(unittest.TestCase):
         cls.session.execute("CREATE KEYSPACE testspace WITH replication = "
                             "{ 'class' : 'SimpleStrategy', 'replication_factor': '1'}")
         cls.session.set_keyspace("testspace")
-        create_table_with_all_types("test_table", cls.session)
+        cls.colnames = create_table_with_all_types("test_table", cls.session, cls.N_ITEMS)
 
     @classmethod
     def tearDownClass(cls):
         cls.session.execute("DROP KEYSPACE testspace")
         cls.cluster.shutdown()
 
+    @cythontest
     def test_cython_parser(self):
         """
         Test Cython-based parser that returns a list of tuples
         """
-        self.cython_parser(ProtocolHandler)
+        verify_iterator_data(self.assertEqual, get_data(ProtocolHandler))
 
+    @cythontest
     def test_cython_lazy_parser(self):
         """
-        Test Cython-based parser that returns a list of tuples
+        Test Cython-based parser that returns an iterator of tuples
         """
-        self.cython_parser(LazyProtocolHandler)
+        verify_iterator_data(self.assertEqual, get_data(LazyProtocolHandler))
 
-    def cython_parser(self, protocol_handler):
-        cluster = Cluster(protocol_version=PROTOCOL_VERSION)
-        session = cluster.connect(keyspace="testspace")
+    @numpytest
+    def test_numpy_parser(self):
+        """
+        Test Numpy-based parser that returns a NumPy array
+        """
+        # arrays = { 'a': arr1, 'b': arr2, ... }
+        arrays = get_data(NumpyProtocolHandler)
 
-        # use our custom protocol handler
-        session.client_protocol_handler = protocol_handler
-        # session.row_factory = tuple_factory
+        colnames = self.colnames
+        datatypes = get_primitive_datatypes()
+        for colname, datatype in zip(colnames, datatypes):
+            arr = arrays[colname]
+            self.match_dtype(datatype, arr.dtype)
 
-        # verify data
-        params = get_all_primitive_params()
-        [first_result] = session.execute("SELECT * FROM test_table WHERE primkey=0")
-        self.assertEqual(len(params), len(first_result),
-                         msg="Not the right number of columns?")
-        for expected, actual in zip(params, first_result):
-            self.assertEqual(actual, expected)
+        verify_iterator_data(self.assertEqual, arrays_to_list_of_tuples(arrays, colnames))
 
-        session.shutdown()
+    def match_dtype(self, datatype, dtype):
+        """Match a string cqltype (e.g. 'int' or 'blob') with a numpy dtype"""
+        if datatype == 'smallint':
+            self.match_dtype_props(dtype, 'i', 2)
+        elif datatype == 'int':
+            self.match_dtype_props(dtype, 'i', 4)
+        elif datatype in ('bigint', 'counter'):
+            self.match_dtype_props(dtype, 'i', 8)
+        elif datatype == 'float':
+            self.match_dtype_props(dtype, 'f', 4)
+        elif datatype == 'double':
+            self.match_dtype_props(dtype, 'f', 8)
+        else:
+            self.assertEqual(dtype.kind, 'O', msg=(dtype, datatype))
+
+    def match_dtype_props(self, dtype, kind, size, signed=None):
+        self.assertEqual(dtype.kind, kind, msg=dtype)
+        self.assertEqual(dtype.itemsize, size, msg=dtype)
+
+
+def arrays_to_list_of_tuples(arrays, colnames):
+    """Convert a dict of arrays (as given by the numpy protocol handler) to a list of tuples"""
+    first_array = arrays[colnames[0]]
+    return [tuple(arrays[colname][i] for colname in colnames)
+                for i in range(len(first_array))]
+
+
+def get_data(protocol_handler):
+    """
+    Get some data from the test table.
+
+    :param key: if None, get all results (100.000 results), otherwise get only one result
+    """
+    cluster = Cluster(protocol_version=PROTOCOL_VERSION)
+    session = cluster.connect(keyspace="testspace")
+
+    # use our custom protocol handler
+    session.client_protocol_handler = protocol_handler
+    session.row_factory = tuple_factory
+
+    results = session.execute("SELECT * FROM test_table")
+    session.shutdown()
+    return results
+
+
+def verify_iterator_data(assertEqual, results):
+    """
+    Check the result of get_data() when this is a list or
+    iterator of tuples
+    """
+    for result in results:
+        params = get_all_primitive_params(result[0])
+        assertEqual(len(params), len(result),
+                    msg="Not the right number of columns?")
+        for expected, actual in zip(params, result):
+            assertEqual(actual, expected)
diff --git a/tests/integration/standard/utils.py b/tests/integration/standard/utils.py
index bd0c80b5..fe54f04d 100644
--- a/tests/integration/standard/utils.py
+++ b/tests/integration/standard/utils.py
@@ -4,15 +4,16 @@ Helper module to populate a dummy Cassandra tables with data.
 
 from tests.integration.datatype_utils import PRIMITIVE_DATATYPES, get_sample
 
-def create_table_with_all_types(table_name, session):
+def create_table_with_all_types(table_name, session, N):
     """
     Method that given a table_name and session construct a table that contains
     all possible primitive types.
 
     :param table_name: Name of table to create
     :param session: session to use for table creation
-    :return: a string containing the names of all the columns.
-             This can be used to query the table.
+    :param N: the number of items to insert into the table
+
+    :return: a list of column names
     """
     # create table
     alpha_type_list = ["primkey int PRIMARY KEY"]
@@ -26,21 +27,27 @@ def create_table_with_all_types(table_name, session):
                         table_name, ', '.join(alpha_type_list)), timeout=120)
 
     # create the input
-    params = get_all_primitive_params()
 
-    # insert into table as a simple statement
-    columns_string = ', '.join(col_names)
-    placeholders = ', '.join(["%s"] * len(col_names))
-    session.execute("INSERT INTO {0} ({1}) VALUES ({2})".format(
-                        table_name, columns_string, placeholders), params, timeout=120)
-    return columns_string
+    for key in range(N):
+        params = get_all_primitive_params(key)
+
+        # insert into table as a simple statement
+        columns_string = ', '.join(col_names)
+        placeholders = ', '.join(["%s"] * len(col_names))
+        session.execute("INSERT INTO {0} ({1}) VALUES ({2})".format(
+                            table_name, columns_string, placeholders), params, timeout=120)
+    return col_names
 
 
-def get_all_primitive_params():
+def get_all_primitive_params(key):
     """
     Simple utility method used to give back a list of all possible primitive data sample types.
     """
-    params = [0]
+    params = [key]
     for datatype in PRIMITIVE_DATATYPES:
         params.append(get_sample(datatype))
     return params
+
+
+def get_primitive_datatypes():
+    return ['int'] + list(PRIMITIVE_DATATYPES)
\ No newline at end of file
diff --git a/tests/unit/cython/utils.py b/tests/unit/cython/utils.py
index f2598c0e..9f0a5a87 100644
--- a/tests/unit/cython/utils.py
+++ b/tests/unit/cython/utils.py
@@ -17,6 +17,7 @@ def cyimport(import_path):
             raise
         return None
 
+
 # @cythontest
 # def test_something(self): ...
 cythontest = unittest.skipUnless(HAVE_CYTHON, 'Cython is not available')

From 919ece20f3710289a161bd92841bbbe642e7ce28 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Mon, 10 Aug 2015 10:50:30 +0100
Subject: [PATCH 54/70] Small performance optimization

---
 cassandra/deserializers.pyx |  5 ++++-
 cassandra/numpyparser.pyx   | 13 ++++++++++---
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 83582cfc..a08415e4 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -16,6 +16,9 @@ from uuid import UUID
 from cassandra import cqltypes
 from cassandra import util
 
+cdef bint PY2 = six.PY2
+
+
 cdef class Deserializer:
     """Cython-based deserializer class for a cqltype"""
 
@@ -64,7 +67,7 @@ cdef class DesByteType(Deserializer):
 
 cdef class DesAsciiType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        if six.PY2:
+        if PY2:
             return to_bytes(buf)
         return to_bytes(buf).decode('ascii')
 
diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 0a4e7e3e..89bf18da 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -63,7 +63,7 @@ cdef class NumpyParser(ColumnParser):
     """Decode a ResultMessage into a bunch of NumPy arrays"""
 
     cpdef parse_rows(self, BytesIOReader reader, ParseDesc desc):
-        cdef Py_ssize_t i, rowcount
+        cdef Py_ssize_t rowcount
         cdef ArrDesc[::1] array_descs
         cdef ArrDesc *arrs
 
@@ -71,8 +71,7 @@ cdef class NumpyParser(ColumnParser):
         array_descs, arrays = make_arrays(desc, rowcount)
         arrs = &array_descs[0]
 
-        for i in range(rowcount):
-            unpack_row(reader, desc, arrs)
+        _parse_rows(reader, desc, arrs, rowcount)
 
         arrays = [make_native_byteorder(arr) for arr in arrays]
         result = dict(zip(desc.colnames, arrays))
@@ -80,6 +79,14 @@ cdef class NumpyParser(ColumnParser):
         return result
 
 
+cdef _parse_rows(BytesIOReader reader, ParseDesc desc,
+                 ArrDesc *arrs, Py_ssize_t rowcount):
+    cdef Py_ssize_t i
+
+    for i in range(rowcount):
+        unpack_row(reader, desc, arrs)
+
+
 ### Helper functions to create NumPy arrays and array descriptors
 
 def make_arrays(ParseDesc desc, array_size):

From 07327ea91586a04ac4a93cfb9b4387a59d59b194 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Mon, 10 Aug 2015 13:34:56 +0100
Subject: [PATCH 55/70] Clean up setup.py

---
 setup.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index ce5e5166..bfb9176f 100644
--- a/setup.py
+++ b/setup.py
@@ -262,20 +262,16 @@ if "--no-libev" not in sys.argv and not is_windows:
 if "--no-cython" not in sys.argv:
     try:
         from Cython.Build import cythonize
-        # cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'metadata', 'pool', 'protocol', 'query', 'util']
-        cython_candidates = []
+        cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'metadata',
+                             'pool', 'protocol', 'query', 'util']
         compile_args = [] if is_windows else ['-Wno-unused-function']
-        directives = {'profile': PROFILING} # this seems to have no effect...
         extensions.extend(cythonize(
             [Extension('cassandra.%s' % m, ['cassandra/%s.py' % m],
-                       extra_compile_args=compile_args,
-                       compiler_directives=directives)
+                       extra_compile_args=compile_args)
                 for m in cython_candidates],
             exclude_failures=True))
-        extensions.extend(cythonize("cassandra/*.pyx",
-            compiler_directives=directives))
-        extensions.extend(cythonize("tests/unit/cython/*.pyx",
-            compiler_directives=directives))
+        extensions.extend(cythonize("cassandra/*.pyx"))
+        extensions.extend(cythonize("tests/unit/cython/*.pyx"))
     except ImportError:
         sys.stderr.write("Cython is not installed. Not compiling core driver files as extensions (optional).")
 

From 8d28473695e2be14e77e9db190fe73fd18a3baa2 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Mon, 10 Aug 2015 13:37:10 +0100
Subject: [PATCH 56/70] Remove leftover dummy module

---
 tests/unit/cython/dummy_module.pyx | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 tests/unit/cython/dummy_module.pyx

diff --git a/tests/unit/cython/dummy_module.pyx b/tests/unit/cython/dummy_module.pyx
deleted file mode 100644
index 8bd1206b..00000000
--- a/tests/unit/cython/dummy_module.pyx
+++ /dev/null
@@ -1,2 +0,0 @@
-# This is a dummy module used by utils.py to determine whether
-# cassandra was build with Cython
\ No newline at end of file

From d71b6e769c87025582adc2842cadaee6a434eb91 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Mon, 10 Aug 2015 20:19:12 +0100
Subject: [PATCH 57/70] Disable non-cython extension modules

---
 setup.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/setup.py b/setup.py
index bfb9176f..ce09a23e 100644
--- a/setup.py
+++ b/setup.py
@@ -262,14 +262,14 @@ if "--no-libev" not in sys.argv and not is_windows:
 if "--no-cython" not in sys.argv:
     try:
         from Cython.Build import cythonize
-        cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'metadata',
-                             'pool', 'protocol', 'query', 'util']
-        compile_args = [] if is_windows else ['-Wno-unused-function']
-        extensions.extend(cythonize(
-            [Extension('cassandra.%s' % m, ['cassandra/%s.py' % m],
-                       extra_compile_args=compile_args)
-                for m in cython_candidates],
-            exclude_failures=True))
+        # cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'metadata',
+        #                      'pool', 'protocol', 'query', 'util']
+        # compile_args = [] if is_windows else ['-Wno-unused-function']
+        # extensions.extend(cythonize(
+        #     [Extension('cassandra.%s' % m, ['cassandra/%s.py' % m],
+        #                extra_compile_args=compile_args)
+        #         for m in cython_candidates],
+        #     exclude_failures=True))
         extensions.extend(cythonize("cassandra/*.pyx"))
         extensions.extend(cythonize("tests/unit/cython/*.pyx"))
     except ImportError:

From 40422563492b8c94ef13add47d77f96f4dc92ff1 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Mon, 10 Aug 2015 20:20:00 +0100
Subject: [PATCH 58/70] Do boundschecking when accessing buffer memory

---
 cassandra/buffer.pxd                          |  27 +++--
 cassandra/{marshal.pyx => cython_marshal.pyx} |  64 +++++-----
 cassandra/deserializers.pyx                   | 110 +++++++++---------
 cassandra/ioutils.pyx                         |  16 ++-
 cassandra/marshal.pxd                         |  29 -----
 cassandra/objparser.pyx                       |   2 +
 6 files changed, 121 insertions(+), 127 deletions(-)
 rename cassandra/{marshal.pyx => cython_marshal.pyx} (76%)
 delete mode 100644 cassandra/marshal.pxd

diff --git a/cassandra/buffer.pxd b/cassandra/buffer.pxd
index cfe93e01..f94da139 100644
--- a/cassandra/buffer.pxd
+++ b/cassandra/buffer.pxd
@@ -16,8 +16,6 @@ cdef struct Buffer:
     char *ptr
     Py_ssize_t size
 
-cdef inline Buffer from_bytes(bytes byts):
-    return from_ptr_and_size(PyBytes_AS_STRING(byts), len(byts))
 
 cdef inline bytes to_bytes(Buffer *buf):
     return buf.ptr[:buf.size]
@@ -25,8 +23,23 @@ cdef inline bytes to_bytes(Buffer *buf):
 cdef inline char *buf_ptr(Buffer *buf):
     return buf.ptr
 
-cdef inline Buffer from_ptr_and_size(char *ptr, Py_ssize_t size):
-    cdef Buffer res
-    res.ptr = ptr
-    res.size = size
-    return res
+cdef inline char *buf_read(Buffer *buf, Py_ssize_t size) except NULL:
+    if size > buf.size:
+        raise IndexError("Requested more than length of buffer")
+    return buf.ptr
+
+cdef inline int slice_buffer(Buffer *buf, Buffer *out,
+                             Py_ssize_t start, Py_ssize_t size) except -1:
+    if size < 0:
+        raise ValueError("Length must be positive")
+
+    if start + size > buf.size:
+        raise IndexError("Buffer slice out of bounds")
+
+    out.ptr = buf.ptr + start
+    out.size = size
+    return 0
+
+cdef inline void from_ptr_and_size(char *ptr, Py_ssize_t size, Buffer *out):
+    out.ptr = ptr
+    out.size = size
diff --git a/cassandra/marshal.pyx b/cassandra/cython_marshal.pyx
similarity index 76%
rename from cassandra/marshal.pyx
rename to cassandra/cython_marshal.pyx
index 0ab65c46..00011018 100644
--- a/cassandra/marshal.pyx
+++ b/cassandra/cython_marshal.pyx
@@ -21,6 +21,7 @@ import math
 
 from libc.stdint cimport (int8_t, int16_t, int32_t, int64_t,
                           uint8_t, uint16_t, uint32_t, uint64_t)
+from cassandra.buffer cimport Buffer, buf_read
 
 cdef bint is_little_endian
 from cassandra.util import is_little_endian
@@ -66,82 +67,81 @@ cdef inline Py_ssize_t div2(Py_ssize_t x):
 
 ### Packing and unpacking of signed integers
 
-cpdef inline bytes int64_pack(int64_t x):
+cdef inline bytes int64_pack(int64_t x):
     return pack(<char *> &x, 8)
 
-cpdef inline int64_t int64_unpack(const char *buf):
-    # The 'const' makes sure the buffer is not mutated in-place!
-    cdef int64_t x = (<int64_t *> buf)[0]
+cdef inline int64_t int64_unpack(Buffer *buf):
+    cdef int64_t x = (<int64_t *> buf_read(buf, 8))[0]
     cdef char *p = <char *> &x
     swap_order(<char *> &x, 8)
     return x
 
-cpdef inline bytes int32_pack(int32_t x):
+cdef inline bytes int32_pack(int32_t x):
     return pack(<char *> &x, 4)
 
-cpdef inline int32_t int32_unpack(const char *buf):
-    cdef int32_t x = (<int32_t *> buf)[0]
+cdef inline int32_t int32_unpack(Buffer *buf):
+    cdef int32_t x = (<int32_t *> buf_read(buf, 4))[0]
     cdef char *p = <char *> &x
     swap_order(<char *> &x, 4)
     return x
 
-cpdef inline bytes int16_pack(int16_t x):
+cdef inline bytes int16_pack(int16_t x):
     return pack(<char *> &x, 2)
 
-cpdef inline int16_t int16_unpack(const char *buf):
-    cdef int16_t x = (<int16_t *> buf)[0]
+cdef inline int16_t int16_unpack(Buffer *buf):
+    cdef int16_t x = (<int16_t *> buf_read(buf, 2))[0]
     swap_order(<char *> &x, 2)
     return x
 
-cpdef inline bytes int8_pack(int8_t x):
+cdef inline bytes int8_pack(int8_t x):
     return (<char *> &x)[:1]
 
-cpdef inline int8_t int8_unpack(const char *buf):
-    return (<int8_t *> buf)[0]
+cdef inline int8_t int8_unpack(Buffer *buf):
+    return (<int8_t *> buf_read(buf, 1))[0]
 
-cpdef inline bytes uint64_pack(uint64_t x):
+cdef inline bytes uint64_pack(uint64_t x):
     return pack(<char *> &x, 8)
 
-cpdef inline uint64_t uint64_unpack(const char *buf):
-    cdef uint64_t x = (<uint64_t *> buf)[0]
+cdef inline uint64_t uint64_unpack(Buffer *buf):
+    cdef uint64_t x = (<uint64_t *> buf_read(buf, 8))[0]
     swap_order(<char *> &x, 8)
     return x
 
-cpdef inline bytes uint32_pack(uint32_t x):
+cdef inline bytes uint32_pack(uint32_t x):
     return pack(<char *> &x, 4)
 
-cpdef inline uint32_t uint32_unpack(const char *buf):
-    cdef uint32_t x = (<uint32_t *> buf)[0]
+cdef inline uint32_t uint32_unpack(Buffer *buf):
+    cdef uint32_t x = (<uint32_t *> buf_read(buf, 4))[0]
     swap_order(<char *> &x, 4)
     return x
 
-cpdef inline bytes uint16_pack(uint16_t x):
+cdef inline bytes uint16_pack(uint16_t x):
     return pack(<char *> &x, 2)
 
-cpdef inline uint16_t uint16_unpack(const char *buf):
-    cdef uint16_t x = (<uint16_t *> buf)[0]
+cdef inline uint16_t uint16_unpack(Buffer *buf):
+    cdef uint16_t x = (<uint16_t *> buf_read(buf, 2))[0]
     swap_order(<char *> &x, 2)
     return x
 
-cpdef inline bytes uint8_pack(uint8_t x):
+cdef inline bytes uint8_pack(uint8_t x):
     return pack(<char *> &x, 1)
 
-cpdef inline uint8_t uint8_unpack(const char *buf):
-    return (<uint8_t *> buf)[0]
+cdef inline uint8_t uint8_unpack(Buffer *buf):
+    return (<uint8_t *> buf_read(buf, 1))[0]
 
-cpdef inline bytes double_pack(double x):
+cdef inline bytes double_pack(double x):
     return pack(<char *> &x, 8)
 
-cpdef inline double double_unpack(const char *buf):
-    cdef double x = (<double *> buf)[0]
+cdef inline double double_unpack(Buffer *buf):
+    cdef double x = (<double *> buf_read(buf, 8))[0]
     swap_order(<char *> &x, 8)
     return x
 
-cpdef inline bytes float_pack(float x):
+cdef inline bytes float_pack(float x):
     return pack(<char *> &x, 4)
 
-cpdef inline float float_unpack(const char *buf):
-    cdef float x = (<float *> buf)[0]
+cdef inline float float_unpack(Buffer *buf):
+    cdef float x = (<float *> buf_read(buf, 4))[0]
     swap_order(<char *> &x, 4)
     return x
 
@@ -167,7 +167,7 @@ v3_header_pack = v3_header_struct.pack
 v3_header_unpack = v3_header_struct.unpack
 
 
-cpdef varint_unpack(term):
+cdef varint_unpack(term):
     """Unpack a variable-sized integer"""
     if PY3:
         return varint_unpack_py3(term)
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index a08415e4..cf502691 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -2,8 +2,8 @@
 
 from libc.stdint cimport int32_t, uint16_t
 
-include 'marshal.pyx'
-from cassandra.buffer cimport Buffer, to_bytes
+include 'cython_marshal.pyx'
+from cassandra.buffer cimport Buffer, to_bytes, slice_buffer
 from cassandra.cython_utils cimport datetime_from_timestamp
 
 from cython.view cimport array as cython_array
@@ -39,10 +39,9 @@ cdef class DesBytesType(Deserializer):
 cdef class DesDecimalType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
         cdef Buffer varint_buf
-        varint_buf.ptr = buf.ptr + 4
-        varint_buf.size = buf.size - 4
+        slice_buffer(buf, &varint_buf, 4, buf.size - 4)
 
-        scale = int32_unpack(buf.ptr)
+        scale = int32_unpack(buf)
         unscaled = varint_unpack(to_bytes(&varint_buf))
 
         return Decimal('%de%d' % (unscaled, -scale))
@@ -55,14 +54,14 @@ cdef class DesUUIDType(Deserializer):
 
 cdef class DesBooleanType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        if int8_unpack(buf.ptr):
+        if int8_unpack(buf):
             return True
         return False
 
 
 cdef class DesByteType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return int8_unpack(buf.ptr)
+        return int8_unpack(buf)
 
 
 cdef class DesAsciiType(Deserializer):
@@ -74,22 +73,22 @@ cdef class DesAsciiType(Deserializer):
 
 cdef class DesFloatType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return float_unpack(buf.ptr)
+        return float_unpack(buf)
 
 
 cdef class DesDoubleType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return double_unpack(buf.ptr)
+        return double_unpack(buf)
 
 
 cdef class DesLongType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return int64_unpack(buf.ptr)
+        return int64_unpack(buf)
 
 
 cdef class DesInt32Type(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return int32_unpack(buf.ptr)
+        return int32_unpack(buf)
 
 
 cdef class DesIntegerType(Deserializer):
@@ -116,7 +115,7 @@ cdef class DesCounterColumnType(DesLongType):
 
 cdef class DesDateType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        cdef double timestamp = int64_unpack(buf.ptr) / 1000.0
+        cdef double timestamp = int64_unpack(buf) / 1000.0
         return datetime_from_timestamp(timestamp)
 
 
@@ -136,18 +135,18 @@ EPOCH_OFFSET_DAYS = 2 ** 31
 
 cdef class DesSimpleDateType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        days = uint32_unpack(buf.ptr) - EPOCH_OFFSET_DAYS
+        days = uint32_unpack(buf) - EPOCH_OFFSET_DAYS
         return util.Date(days)
 
 
 cdef class DesShortType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return int16_unpack(buf.ptr)
+        return int16_unpack(buf)
 
 
 cdef class DesTimeType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return util.Time(int64_unpack(to_bytes(buf)))
+        return util.Time(int64_unpack(buf))
 
 
 cdef class DesUTF8Type(Deserializer):
@@ -217,28 +216,40 @@ cdef list _deserialize_list_or_set(itemlen_t dummy_version,
     we can specialize on the protocol version.
     """
     cdef itemlen_t itemlen
-    cdef Buffer sub_buf
+    cdef Buffer itemlen_buf
+    cdef Buffer elem_buf
 
-    cdef itemlen_t numelements = _unpack[itemlen_t](dummy_version, buf.ptr)
-    cdef itemlen_t p = sizeof(itemlen_t)
+    cdef itemlen_t numelements = _unpack_len[itemlen_t](0, buf)
+    cdef itemlen_t idx = sizeof(itemlen_t)
     cdef list result = []
 
     for _ in range(numelements):
-        itemlen = _unpack[itemlen_t](dummy_version, buf.ptr + p)
-        p += sizeof(itemlen_t)
-        sub_buf.ptr = buf.ptr + p
-        sub_buf.size = itemlen
-        p += itemlen
-        result.append(from_binary(deserializer, &sub_buf, protocol_version))
+        idx = subelem(buf, &elem_buf, idx)
+        result.append(from_binary(deserializer, &elem_buf, protocol_version))
 
     return result
 
-cdef itemlen_t _unpack(itemlen_t dummy_version, const char *buf):
+
+cdef inline itemlen_t subelem(
+        Buffer *buf, Buffer *elem_buf, itemlen_t idx):
+    cdef itemlen_t elemlen
+
+    elemlen = _unpack_len[itemlen_t](idx, buf)
+    idx += sizeof(itemlen_t)
+    slice_buffer(buf, elem_buf, idx, elemlen)
+    return idx + elemlen
+
+
+cdef itemlen_t _unpack_len(itemlen_t idx, Buffer *buf):
     cdef itemlen_t result
+    cdef Buffer itemlen_buf
+    slice_buffer(buf, &itemlen_buf, idx, sizeof(itemlen_t))
+
     if itemlen_t is uint16_t:
-        result = uint16_unpack(buf)
+        result = uint16_unpack(&itemlen_buf)
     else:
-        result = int32_unpack(buf)
+        result = int32_unpack(&itemlen_buf)
+
     return result
 
 #--------------------------------------------------------------------------
@@ -278,27 +289,18 @@ cdef _deserialize_map(itemlen_t dummy_version,
                       key_type, val_type):
     cdef itemlen_t itemlen, val_len, key_len
     cdef Buffer key_buf, val_buf
+    cdef Buffer itemlen_buf
 
-    cdef itemlen_t numelements = _unpack[itemlen_t](dummy_version, buf.ptr)
-    cdef itemlen_t p = sizeof(itemlen_t)
+    cdef itemlen_t numelements
+    cdef itemlen_t idx = sizeof(itemlen_t)
     cdef list result = []
 
-    numelements = _unpack[itemlen_t](dummy_version, buf.ptr)
-    p = sizeof(itemlen_t)
+    numelements = _unpack_len[itemlen_t](0, buf)
+    idx = sizeof(itemlen_t)
     themap = util.OrderedMapSerializedKey(key_type, protocol_version)
     for _ in range(numelements):
-        key_len = _unpack[itemlen_t](dummy_version, buf.ptr + p)
-        p += sizeof(itemlen_t)
-        # keybytes = byts[p:p + key_len]
-        key_buf.ptr = buf.ptr + p
-        key_buf.size = key_len
-        p += key_len
-        val_len = _unpack(dummy_version, buf.ptr + p)
-        p += sizeof(itemlen_t)
-        # valbytes = byts[p:p + val_len]
-        val_buf.ptr = buf.ptr + p
-        val_buf.size = val_len
-        p += val_len
+        idx = subelem(buf, &key_buf, idx)
+        idx = subelem(buf, &val_buf, idx)
         key = from_binary(key_deserializer, &key_buf, protocol_version)
         val = from_binary(val_deserializer, &val_buf, protocol_version)
         themap._insert_unchecked(key, to_bytes(&key_buf), val)
@@ -316,6 +318,7 @@ cdef class DesTupleType(_DesParameterizedType):
         cdef int32_t itemlen
         cdef tuple res = tuple_new(self.subtypes_len)
         cdef Buffer item_buf
+        cdef Buffer itemlen_buf
         cdef Deserializer deserializer
 
         # collections inside UDTs are always encoded with at least the
@@ -327,11 +330,11 @@ cdef class DesTupleType(_DesParameterizedType):
         for i in range(self.subtypes_len):
             item = None
             if p < buf.size:
-                itemlen = int32_unpack(buf.ptr + p)
+                slice_buffer(buf, &itemlen_buf, p, 4)
+                itemlen = int32_unpack(&itemlen_buf)
                 p += 4
                 if itemlen >= 0:
-                    item_buf.ptr = buf.ptr + p
-                    item_buf.size = itemlen
+                    slice_buffer(buf, &item_buf, p, itemlen)
                     p += itemlen
 
                     deserializer = self.deserializers[i]
@@ -354,12 +357,13 @@ cdef class DesUserType(DesTupleType):
 
 cdef class DesCompositeType(_DesParameterizedType):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        cdef Py_ssize_t i
+        cdef Py_ssize_t i, idx, start
         cdef Buffer elem_buf
         cdef int16_t element_length
         cdef Deserializer deserializer
         cdef tuple res = tuple_new(self.subtypes_len)
 
+        idx = 0
         for i in range(self.subtypes_len):
             if not buf.size:
                 # CompositeType can have missing elements at the end
@@ -373,17 +377,17 @@ cdef class DesCompositeType(_DesParameterizedType):
                 res = res[:i]
                 break
 
-            element_length = uint16_unpack(buf.ptr)
-            elem_buf.ptr = buf.ptr + 2
-            elem_buf.size = element_length
+            element_length = uint16_unpack(buf)
+            slice_buffer(buf, &elem_buf, 2, element_length)
 
-            # skip element length, element, and the EOC (one byte)
-            buf.ptr = buf.ptr + 2 + element_length + 1
-            buf.size = buf.size - (2 + element_length + 1)
             deserializer = self.deserializers[i]
             item = from_binary(deserializer, &elem_buf, protocol_version)
             tuple_set(res, i, item)
 
+            # skip element length, element, and the EOC (one byte)
+            start = 2 + element_length + 1
+            slice_buffer(buf, buf, start, buf.size - start)
+
         return res
 
 
diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx
index 203997e9..1a11068c 100644
--- a/cassandra/ioutils.pyx
+++ b/cassandra/ioutils.pyx
@@ -1,5 +1,5 @@
-include 'marshal.pyx'
-from cassandra.buffer cimport Buffer
+include 'cython_marshal.pyx'
+from cassandra.buffer cimport Buffer, from_ptr_and_size
 
 from libc.stdint cimport int32_t
 from cassandra.bytesio cimport BytesIOReader
@@ -17,13 +17,17 @@ cdef inline int get_buf(BytesIOReader reader, Buffer *buf_out) except -1:
         strings (which may be empty).
     """
     cdef Py_ssize_t raw_val_size = read_int(reader)
+    cdef char *ptr
     if raw_val_size <= 0:
-        buf_out.ptr = NULL
+        ptr = NULL
     else:
-        buf_out.ptr = reader.read(raw_val_size)
+        ptr = reader.read(raw_val_size)
 
-    buf_out.size = raw_val_size
+    from_ptr_and_size(ptr, raw_val_size, buf_out)
     return 0
 
 cdef inline int32_t read_int(BytesIOReader reader) except ?0xDEAD:
-    return int32_unpack(reader.read(4))
+    cdef Buffer buf
+    buf.ptr = reader.read(4)
+    buf.size = 4
+    return int32_unpack(&buf)
diff --git a/cassandra/marshal.pxd b/cassandra/marshal.pxd
deleted file mode 100644
index ef7d9858..00000000
--- a/cassandra/marshal.pxd
+++ /dev/null
@@ -1,29 +0,0 @@
-from libc.stdint cimport (int8_t, int16_t, int32_t, int64_t,
-                          uint8_t, uint16_t, uint32_t, uint64_t)
-
-cpdef bytes int64_pack(int64_t x)
-cpdef bytes int32_pack(int32_t x)
-cpdef bytes int16_pack(int16_t x)
-cpdef bytes int8_pack(int8_t x)
-
-cpdef int64_t int64_unpack(const char *buf)
-cpdef int32_t int32_unpack(const char *buf)
-cpdef int16_t int16_unpack(const char *buf)
-cpdef int8_t  int8_unpack(const char *buf)
-
-cpdef bytes uint64_pack(uint64_t x)
-cpdef bytes uint32_pack(uint32_t x)
-cpdef bytes uint16_pack(uint16_t x)
-cpdef bytes uint8_pack(uint8_t x)
-
-cpdef uint64_t uint64_unpack(const char *buf)
-cpdef uint32_t uint32_unpack(const char *buf)
-cpdef uint16_t uint16_unpack(const char *buf)
-cpdef uint8_t  uint8_unpack(const char *buf)
-
-cpdef bytes double_pack(double x)
-cpdef bytes float_pack(float x)
-
-cpdef double double_unpack(const char *buf)
-cpdef float float_unpack(const char *buf)
-
diff --git a/cassandra/objparser.pyx b/cassandra/objparser.pyx
index 8aca1427..670f1b4a 100644
--- a/cassandra/objparser.pyx
+++ b/cassandra/objparser.pyx
@@ -40,6 +40,8 @@ cdef class TupleRowParser(RowParser):
     """
 
     cpdef unpack_row(self, BytesIOReader reader, ParseDesc desc):
+        assert desc.rowsize >= 0
+
         cdef Buffer buf
         cdef Py_ssize_t i, rowsize = desc.rowsize
         cdef Deserializer deserializer

From 564d2fdd0fa54194f189abb3eef2c5993084d743 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Mon, 10 Aug 2015 21:33:54 +0100
Subject: [PATCH 59/70] Make sure to propagate exceptions from unpacking
 functions

---
 cassandra/cython_marshal.pyx | 20 ++++++++--------
 cassandra/deserializers.pyx  | 46 +++++++++++++++++++++++-------------
 2 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/cassandra/cython_marshal.pyx b/cassandra/cython_marshal.pyx
index 00011018..ae964474 100644
--- a/cassandra/cython_marshal.pyx
+++ b/cassandra/cython_marshal.pyx
@@ -70,7 +70,7 @@ cdef inline Py_ssize_t div2(Py_ssize_t x):
 cdef inline bytes int64_pack(int64_t x):
     return pack(<char *> &x, 8)
 
-cdef inline int64_t int64_unpack(Buffer *buf):
+cdef inline int64_t int64_unpack(Buffer *buf) except ?0xDEAD:
     cdef int64_t x = (<int64_t *> buf_read(buf, 8))[0]
     cdef char *p = <char *> &x
     swap_order(<char *> &x, 8)
@@ -79,7 +79,7 @@ cdef inline int64_t int64_unpack(Buffer *buf):
 cdef inline bytes int32_pack(int32_t x):
     return pack(<char *> &x, 4)
 
-cdef inline int32_t int32_unpack(Buffer *buf):
+cdef inline int32_t int32_unpack(Buffer *buf) except ?0xDEAD:
     cdef int32_t x = (<int32_t *> buf_read(buf, 4))[0]
     cdef char *p = <char *> &x
     swap_order(<char *> &x, 4)
@@ -88,7 +88,7 @@ cdef inline int32_t int32_unpack(Buffer *buf):
 cdef inline bytes int16_pack(int16_t x):
     return pack(<char *> &x, 2)
 
-cdef inline int16_t int16_unpack(Buffer *buf):
+cdef inline int16_t int16_unpack(Buffer *buf) except ?0xDED:
     cdef int16_t x = (<int16_t *> buf_read(buf, 2))[0]
     swap_order(<char *> &x, 2)
     return x
@@ -96,13 +96,13 @@ cdef inline int16_t int16_unpack(Buffer *buf):
 cdef inline bytes int8_pack(int8_t x):
     return (<char *> &x)[:1]
 
-cdef inline int8_t int8_unpack(Buffer *buf):
+cdef inline int8_t int8_unpack(Buffer *buf) except ?80:
     return (<int8_t *> buf_read(buf, 1))[0]
 
 cdef inline bytes uint64_pack(uint64_t x):
     return pack(<char *> &x, 8)
 
-cdef inline uint64_t uint64_unpack(Buffer *buf):
+cdef inline uint64_t uint64_unpack(Buffer *buf) except ?0xDEAD:
     cdef uint64_t x = (<uint64_t *> buf_read(buf, 8))[0]
     swap_order(<char *> &x, 8)
     return x
@@ -110,7 +110,7 @@ cdef inline uint64_t uint64_unpack(Buffer *buf):
 cdef inline bytes uint32_pack(uint32_t x):
     return pack(<char *> &x, 4)
 
-cdef inline uint32_t uint32_unpack(Buffer *buf):
+cdef inline uint32_t uint32_unpack(Buffer *buf) except ?0xDEAD:
     cdef uint32_t x = (<uint32_t *> buf_read(buf, 4))[0]
     swap_order(<char *> &x, 4)
     return x
@@ -118,7 +118,7 @@ cdef inline uint32_t uint32_unpack(Buffer *buf):
 cdef inline bytes uint16_pack(uint16_t x):
     return pack(<char *> &x, 2)
 
-cdef inline uint16_t uint16_unpack(Buffer *buf):
+cdef inline uint16_t uint16_unpack(Buffer *buf) except ?0xDEAD:
     cdef uint16_t x = (<uint16_t *> buf_read(buf, 2))[0]
     swap_order(<char *> &x, 2)
     return x
@@ -126,13 +126,13 @@ cdef inline uint16_t uint16_unpack(Buffer *buf):
 cdef inline bytes uint8_pack(uint8_t x):
     return pack(<char *> &x, 1)
 
-cdef inline uint8_t uint8_unpack(Buffer *buf):
+cdef inline uint8_t uint8_unpack(Buffer *buf) except ?0xff:
     return (<uint8_t *> buf_read(buf, 1))[0]
 
 cdef inline bytes double_pack(double x):
     return pack(<char *> &x, 8)
 
-cdef inline double double_unpack(Buffer *buf):
+cdef inline double double_unpack(Buffer *buf) except ?1.74:
     cdef double x = (<double *> buf_read(buf, 8))[0]
     swap_order(<char *> &x, 8)
     return x
@@ -140,7 +140,7 @@ cdef inline double double_unpack(Buffer *buf):
 cdef inline bytes float_pack(float x):
     return pack(<char *> &x, 4)
 
-cdef inline float float_unpack(Buffer *buf):
+cdef inline float float_unpack(Buffer *buf) except ?1.74:
     cdef float x = (<float *> buf_read(buf, 4))[0]
     swap_order(<char *> &x, 4)
     return x
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index cf502691..47028bf1 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -219,38 +219,52 @@ cdef list _deserialize_list_or_set(itemlen_t dummy_version,
     cdef Buffer itemlen_buf
     cdef Buffer elem_buf
 
-    cdef itemlen_t numelements = _unpack_len[itemlen_t](0, buf)
-    cdef itemlen_t idx = sizeof(itemlen_t)
+    cdef itemlen_t numelements
+    cdef itemlen_t idx
     cdef list result = []
 
+    _unpack_len[itemlen_t](0, &numelements, buf)
+    idx = sizeof(itemlen_t)
+
     for _ in range(numelements):
-        idx = subelem(buf, &elem_buf, idx)
+        subelem(buf, &elem_buf, &idx)
         result.append(from_binary(deserializer, &elem_buf, protocol_version))
 
     return result
 
 
-cdef inline itemlen_t subelem(
-        Buffer *buf, Buffer *elem_buf, itemlen_t idx):
+cdef inline int subelem(
+        Buffer *buf, Buffer *elem_buf, itemlen_t *idx_p) except -1:
+    """
+    Read the next element from the buffer: first read the size (in bytes) of the
+    element, then fill elem_buf with a newly sliced buffer of this size (and the
+    right offset).
+
+    NOTE:   The handling of 'idx' is somewhat atrocious, as there is a Cython
+            bug with the combination fused types + 'except' clause.
+            So instead, we pass in a pointer to 'idx', namely 'idx_p', and write
+            to this instead.
+    """
     cdef itemlen_t elemlen
 
-    elemlen = _unpack_len[itemlen_t](idx, buf)
-    idx += sizeof(itemlen_t)
-    slice_buffer(buf, elem_buf, idx, elemlen)
-    return idx + elemlen
+    _unpack_len[itemlen_t](idx_p[0], &elemlen, buf)
+    idx_p[0] += sizeof(itemlen_t)
+    slice_buffer(buf, elem_buf, idx_p[0], elemlen)
+    idx_p[0] += elemlen
+    return 0
 
 
-cdef itemlen_t _unpack_len(itemlen_t idx, Buffer *buf):
+cdef int _unpack_len(itemlen_t idx, itemlen_t *elemlen, Buffer *buf) except -1:
     cdef itemlen_t result
     cdef Buffer itemlen_buf
     slice_buffer(buf, &itemlen_buf, idx, sizeof(itemlen_t))
 
     if itemlen_t is uint16_t:
-        result = uint16_unpack(&itemlen_buf)
+        elemlen[0] = uint16_unpack(&itemlen_buf)
     else:
-        result = int32_unpack(&itemlen_buf)
+        elemlen[0] = int32_unpack(&itemlen_buf)
 
-    return result
+    return 0
 
 #--------------------------------------------------------------------------
 # Map deserialization
@@ -295,12 +309,12 @@ cdef _deserialize_map(itemlen_t dummy_version,
     cdef itemlen_t idx = sizeof(itemlen_t)
     cdef list result = []
 
-    numelements = _unpack_len[itemlen_t](0, buf)
+    _unpack_len[itemlen_t](0, &numelements, buf)
     idx = sizeof(itemlen_t)
     themap = util.OrderedMapSerializedKey(key_type, protocol_version)
     for _ in range(numelements):
-        idx = subelem(buf, &key_buf, idx)
-        idx = subelem(buf, &val_buf, idx)
+        subelem(buf, &key_buf, &idx)
+        subelem(buf, &val_buf, &idx)
         key = from_binary(key_deserializer, &key_buf, protocol_version)
         val = from_binary(val_deserializer, &val_buf, protocol_version)
         themap._insert_unchecked(key, to_bytes(&key_buf), val)

From 4956b12acace35e0db7718e9e5632fb1a978d896 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Tue, 11 Aug 2015 11:19:48 +0100
Subject: [PATCH 60/70] Include Cython sources in sdist

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 1825f7bb..7a686a6b 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,2 @@
 include setup.py README.rst MANIFEST.in LICENSE ez_setup.py
+include cassandra/*.pyx

From d91731734c26392b0fd1655912ae3c7adf4fd931 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Tue, 11 Aug 2015 21:04:18 +0100
Subject: [PATCH 61/70] Clean up some leftover code

---
 cassandra/buffer.pxd         |   1 -
 cassandra/cython_marshal.pyx | 112 +++--------------------------------
 cassandra/cython_utils.pyx   |   9 +--
 cassandra/deserializers.pyx  |   4 +-
 cassandra/numpyparser.pyx    |   1 -
 cassandra/typecodes.py       |   4 +-
 tests/unit/cython/utils.py   |   2 +-
 7 files changed, 17 insertions(+), 116 deletions(-)

diff --git a/cassandra/buffer.pxd b/cassandra/buffer.pxd
index f94da139..542cb181 100644
--- a/cassandra/buffer.pxd
+++ b/cassandra/buffer.pxd
@@ -10,7 +10,6 @@ from cpython.bytes cimport PyBytes_AS_STRING
     # checking. Only string objects are supported; no Unicode objects
     # should be passed.
 
-from cassandra.buffer cimport Buffer
 
 cdef struct Buffer:
     char *ptr
diff --git a/cassandra/cython_marshal.pyx b/cassandra/cython_marshal.pyx
index ae964474..1ba11435 100644
--- a/cassandra/cython_marshal.pyx
+++ b/cassandra/cython_marshal.pyx
@@ -15,36 +15,16 @@
 # limitations under the License.
 
 import six
-import sys
-import struct
-import math
 
 from libc.stdint cimport (int8_t, int16_t, int32_t, int64_t,
                           uint8_t, uint16_t, uint32_t, uint64_t)
-from cassandra.buffer cimport Buffer, buf_read
+from cassandra.buffer cimport Buffer, buf_read, to_bytes
 
 cdef bint is_little_endian
 from cassandra.util import is_little_endian
 
 cdef bint PY3 = six.PY3
 
-# cdef extern from "marshal.h":
-#     cdef str c_string_to_python(char *p, Py_ssize_t len)
-
-def _make_packer(format_string):
-    packer = struct.Struct(format_string)
-    pack = packer.pack
-    unpack = lambda s: packer.unpack(s)[0]
-    return pack, unpack
-
-
-cdef inline bytes pack(char *buf, Py_ssize_t size):
-    """
-    Pack a buffer, given as a char *, into Python bytes in byte order.
-    """
-    swap_order(buf, size)
-    return buf[:size]
-
 
 cdef inline void swap_order(char *buf, Py_ssize_t size):
     """
@@ -65,10 +45,7 @@ cdef inline void swap_order(char *buf, Py_ssize_t size):
 cdef inline Py_ssize_t div2(Py_ssize_t x):
     return x >> 1
 
-### Packing and unpacking of signed integers
-
-cdef inline bytes int64_pack(int64_t x):
-    return pack(<char *> &x, 8)
+### Unpacking of signed integers
 
 cdef inline int64_t int64_unpack(Buffer *buf) except ?0xDEAD:
     cdef int64_t x = (<int64_t *> buf_read(buf, 8))[0]
@@ -76,106 +53,58 @@ cdef inline int64_t int64_unpack(Buffer *buf) except ?0xDEAD:
     swap_order(<char *> &x, 8)
     return x
 
-cdef inline bytes int32_pack(int32_t x):
-    return pack(<char *> &x, 4)
-
 cdef inline int32_t int32_unpack(Buffer *buf) except ?0xDEAD:
     cdef int32_t x = (<int32_t *> buf_read(buf, 4))[0]
     cdef char *p = <char *> &x
     swap_order(<char *> &x, 4)
     return x
 
-cdef inline bytes int16_pack(int16_t x):
-    return pack(<char *> &x, 2)
-
 cdef inline int16_t int16_unpack(Buffer *buf) except ?0xDED:
     cdef int16_t x = (<int16_t *> buf_read(buf, 2))[0]
     swap_order(<char *> &x, 2)
     return x
 
-cdef inline bytes int8_pack(int8_t x):
-    return (<char *> &x)[:1]
-
 cdef inline int8_t int8_unpack(Buffer *buf) except ?80:
     return (<int8_t *> buf_read(buf, 1))[0]
 
-cdef inline bytes uint64_pack(uint64_t x):
-    return pack(<char *> &x, 8)
-
 cdef inline uint64_t uint64_unpack(Buffer *buf) except ?0xDEAD:
     cdef uint64_t x = (<uint64_t *> buf_read(buf, 8))[0]
     swap_order(<char *> &x, 8)
     return x
 
-cdef inline bytes uint32_pack(uint32_t x):
-    return pack(<char *> &x, 4)
-
 cdef inline uint32_t uint32_unpack(Buffer *buf) except ?0xDEAD:
     cdef uint32_t x = (<uint32_t *> buf_read(buf, 4))[0]
     swap_order(<char *> &x, 4)
     return x
 
-cdef inline bytes uint16_pack(uint16_t x):
-    return pack(<char *> &x, 2)
-
 cdef inline uint16_t uint16_unpack(Buffer *buf) except ?0xDEAD:
     cdef uint16_t x = (<uint16_t *> buf_read(buf, 2))[0]
     swap_order(<char *> &x, 2)
     return x
 
-cdef inline bytes uint8_pack(uint8_t x):
-    return pack(<char *> &x, 1)
-
 cdef inline uint8_t uint8_unpack(Buffer *buf) except ?0xff:
     return (<uint8_t *> buf_read(buf, 1))[0]
 
-cdef inline bytes double_pack(double x):
-    return pack(<char *> &x, 8)
-
 cdef inline double double_unpack(Buffer *buf) except ?1.74:
     cdef double x = (<double *> buf_read(buf, 8))[0]
     swap_order(<char *> &x, 8)
     return x
 
-cdef inline bytes float_pack(float x):
-    return pack(<char *> &x, 4)
-
 cdef inline float float_unpack(Buffer *buf) except ?1.74:
     cdef float x = (<float *> buf_read(buf, 4))[0]
     swap_order(<char *> &x, 4)
     return x
 
-# int64_pack, int64_unpack = _make_packer('>q')
-# int32_pack, int32_unpack = _make_packer('>i')
-# int16_pack, int16_unpack = _make_packer('>h')
-# int8_pack, int8_unpack = _make_packer('>b')
-# uint64_pack, uint64_unpack = _make_packer('>Q')
-# uint32_pack, uint32_unpack = _make_packer('>I')
-# uint16_pack, uint16_unpack = _make_packer('>H')
-# uint8_pack, uint8_unpack = _make_packer('>B')
-# float_pack, float_unpack = _make_packer('>f')
-# double_pack, double_unpack = _make_packer('>d')
 
-# Special case for cassandra header
-header_struct = struct.Struct('>BBbB')
-header_pack = header_struct.pack
-header_unpack = header_struct.unpack
-
-# in protocol version 3 and higher, the stream ID is two bytes
-v3_header_struct = struct.Struct('>BBhB')
-v3_header_pack = v3_header_struct.pack
-v3_header_unpack = v3_header_struct.unpack
-
-
-cdef varint_unpack(term):
+cdef varint_unpack(Buffer *term):
     """Unpack a variable-sized integer"""
     if PY3:
-        return varint_unpack_py3(term)
+        return varint_unpack_py3(to_bytes(term))
     else:
-        return varint_unpack_py2(term)
+        return varint_unpack_py2(to_bytes(term))
 
 # TODO: Optimize these two functions
-def varint_unpack_py3(term):
+cdef varint_unpack_py3(bytes term):
     cdef int64_t one = 1L
 
     val = int(''.join("%02x" % i for i in term), 16)
@@ -186,36 +115,9 @@ def varint_unpack_py3(term):
         val -= one << (len(term) * 8)
     return val
 
-def varint_unpack_py2(term):  # noqa
+cdef varint_unpack_py2(bytes term):  # noqa
     cdef int64_t one = 1L
     val = int(term.encode('hex'), 16)
     if (ord(term[0]) & 128) != 0:
         val = val - (one << (len(term) * 8))
     return val
-
-
-def bitlength(n):
-    # return int(math.log2(n)) + 1
-    bitlen = 0
-    while n > 0:
-        n >>= 1
-        bitlen += 1
-    return bitlen
-
-
-def varint_pack(big):
-    pos = True
-    if big == 0:
-        return b'\x00'
-    if big < 0:
-        bytelength = bitlength(abs(big) - 1) // 8 + 1
-        big = (1 << bytelength * 8) + big
-        pos = False
-    revbytes = bytearray()
-    while big > 0:
-        revbytes.append(big & 0xff)
-        big >>= 8
-    if pos and revbytes[-1] & 0x80:
-        revbytes.append(0)
-    revbytes.reverse()
-    return six.binary_type(revbytes)
diff --git a/cassandra/cython_utils.pyx b/cassandra/cython_utils.pyx
index de87c1e0..a660f3ee 100644
--- a/cassandra/cython_utils.pyx
+++ b/cassandra/cython_utils.pyx
@@ -16,13 +16,14 @@ from cpython.datetime cimport (
 import datetime
 import sys
 
-DATETIME_EPOC = datetime.datetime(1970, 1, 1)
-
-assert sys.byteorder in ('little', 'big')
-is_little_endian = sys.byteorder == 'little'
+cdef bint is_little_endian
+from cassandra.util import is_little_endian
 
 import_datetime()
 
+DATETIME_EPOC = datetime.datetime(1970, 1, 1)
+
+
 cdef datetime_from_timestamp(double timestamp):
     cdef int seconds = <int> timestamp
     cdef int microseconds = (<int64_t> (timestamp * 1000000)) % 1000000
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 47028bf1..6c2afa22 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -42,7 +42,7 @@ cdef class DesDecimalType(Deserializer):
         slice_buffer(buf, &varint_buf, 4, buf.size - 4)
 
         scale = int32_unpack(buf)
-        unscaled = varint_unpack(to_bytes(&varint_buf))
+        unscaled = varint_unpack(&varint_buf)
 
         return Decimal('%de%d' % (unscaled, -scale))
 
@@ -93,7 +93,7 @@ cdef class DesInt32Type(Deserializer):
 
 cdef class DesIntegerType(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return varint_unpack(to_bytes(buf))
+        return varint_unpack(buf)
 
 
 cdef class DesInetAddressType(Deserializer):
diff --git a/cassandra/numpyparser.pyx b/cassandra/numpyparser.pyx
index 89bf18da..bfde839e 100644
--- a/cassandra/numpyparser.pyx
+++ b/cassandra/numpyparser.pyx
@@ -75,7 +75,6 @@ cdef class NumpyParser(ColumnParser):
 
         arrays = [make_native_byteorder(arr) for arr in arrays]
         result = dict(zip(desc.colnames, arrays))
-        # return pd.DataFrame(result)
         return result
 
 
diff --git a/cassandra/typecodes.py b/cassandra/typecodes.py
index 651c58d7..2f0ce8f5 100644
--- a/cassandra/typecodes.py
+++ b/cassandra/typecodes.py
@@ -3,8 +3,8 @@ Module with constants for Cassandra type codes.
 
 These constants are useful for
 
-    a) mapping messages to cqltypes                 (cassandra/cqltypes.py)
-    b) optimizezd dispatching for (de)serialization (cassandra/encoding.py)
+    a) mapping messages to cqltypes                (cassandra/cqltypes.py)
+    b) optimized dispatching for (de)serialization (cassandra/encoding.py)
 
 Type codes are repeated here from the Cassandra binary protocol specification:
 
diff --git a/tests/unit/cython/utils.py b/tests/unit/cython/utils.py
index 9f0a5a87..c493e17b 100644
--- a/tests/unit/cython/utils.py
+++ b/tests/unit/cython/utils.py
@@ -11,7 +11,7 @@ def cyimport(import_path):
     (and skip any relevant tests).
     """
     try:
-        return __import__(import_path, fromlist=True)
+        return __import__(import_path, fromlist=[True])
     except ImportError:
         if HAVE_CYTHON:
             raise

From 2a568d1ebb967b406ebd63a8414a3e527470c6da Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Tue, 11 Aug 2015 21:09:53 +0100
Subject: [PATCH 62/70] Use underscore to break up long module names

---
 .../{numpyparser.pyx => numpy_parser.pyx}      |  0
 cassandra/{objparser.pyx => obj_parser.pyx}    |  0
 cassandra/protocol.py                          | 18 +++++++++---------
 cassandra/{rowparser.pyx => row_parser.pyx}    |  0
 cassandra/{typecodes.pxd => type_codes.pxd}    |  0
 cassandra/{typecodes.py => type_codes.py}      |  0
 6 files changed, 9 insertions(+), 9 deletions(-)
 rename cassandra/{numpyparser.pyx => numpy_parser.pyx} (100%)
 rename cassandra/{objparser.pyx => obj_parser.pyx} (100%)
 rename cassandra/{rowparser.pyx => row_parser.pyx} (100%)
 rename cassandra/{typecodes.pxd => type_codes.pxd} (100%)
 rename cassandra/{typecodes.py => type_codes.py} (100%)

diff --git a/cassandra/numpyparser.pyx b/cassandra/numpy_parser.pyx
similarity index 100%
rename from cassandra/numpyparser.pyx
rename to cassandra/numpy_parser.pyx
diff --git a/cassandra/objparser.pyx b/cassandra/obj_parser.pyx
similarity index 100%
rename from cassandra/objparser.pyx
rename to cassandra/obj_parser.pyx
diff --git a/cassandra/protocol.py b/cassandra/protocol.py
index 5ebbfa5c..25311911 100644
--- a/cassandra/protocol.py
+++ b/cassandra/protocol.py
@@ -22,7 +22,7 @@ import six
 from six.moves import range
 import io
 
-from cassandra import typecodes
+from cassandra import type_codes
 from cassandra import (Unavailable, WriteTimeout, ReadTimeout,
                        WriteFailure, ReadFailure, FunctionFailure,
                        AlreadyExists, InvalidRequest, Unauthorized,
@@ -548,7 +548,7 @@ class ResultMessage(_MessageType):
     paging_state = None
 
     # Names match type name in module scope. Most are imported from cassandra.cqltypes (except CUSTOM_TYPE)
-    type_codes = _cqltypes_by_code = dict((v, globals()[k]) for k, v in typecodes.__dict__.items() if not k.startswith('_'))
+    type_codes = _cqltypes_by_code = dict((v, globals()[k]) for k, v in type_codes.__dict__.items() if not k.startswith('_'))
 
     _FLAGS_GLOBAL_TABLES_SPEC = 0x0001
     _HAS_MORE_PAGES_FLAG = 0x0002
@@ -1001,20 +1001,20 @@ def cython_protocol_handler(colparser):
 
     There are three Cython-based protocol handlers (least to most performant):
 
-        1. objparser.ListParser
+        1. obj_parser.ListParser
             this parser decodes result messages into a list of tuples
 
-        2. objparser.LazyParser
+        2. obj_parser.LazyParser
             this parser decodes result messages lazily by returning an iterator
 
-        3. numpyparser.NumPyParser
+        3. numpy_parser.NumPyParser
             this parser decodes result messages into NumPy arrays
 
-    The default is to use objparser.ListParser
+    The default is to use obj_parser.ListParser
     """
     # TODO: It may be cleaner to turn ProtocolHandler and ResultMessage into
     # TODO:     instances and use methods instead of class methods
-    from cassandra.rowparser import make_recv_results_rows
+    from cassandra.row_parser import make_recv_results_rows
 
     class FastResultMessage(ResultMessage):
         """
@@ -1038,7 +1038,7 @@ def cython_protocol_handler(colparser):
 
 
 if HAVE_CYTHON:
-    from cassandra.objparser import ListParser, LazyParser
+    from cassandra.obj_parser import ListParser, LazyParser
     ProtocolHandler = cython_protocol_handler(ListParser())
     LazyProtocolHandler = cython_protocol_handler(LazyParser())
 else:
@@ -1047,7 +1047,7 @@ else:
 
 
 if HAVE_CYTHON and HAVE_NUMPY:
-    from cassandra.numpyparser import NumpyParser
+    from cassandra.numpy_parser import NumpyParser
     NumpyProtocolHandler = cython_protocol_handler(NumpyParser())
 else:
     NumpyProtocolHandler = None
diff --git a/cassandra/rowparser.pyx b/cassandra/row_parser.pyx
similarity index 100%
rename from cassandra/rowparser.pyx
rename to cassandra/row_parser.pyx
diff --git a/cassandra/typecodes.pxd b/cassandra/type_codes.pxd
similarity index 100%
rename from cassandra/typecodes.pxd
rename to cassandra/type_codes.pxd
diff --git a/cassandra/typecodes.py b/cassandra/type_codes.py
similarity index 100%
rename from cassandra/typecodes.py
rename to cassandra/type_codes.py

From 9240c71c02c27a6f20c73870a92ccdafab238df7 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Tue, 11 Aug 2015 21:20:45 +0100
Subject: [PATCH 63/70] Add license to top of new files

---
 cassandra/buffer.pxd                     | 14 ++++++++++++++
 cassandra/bytesio.pxd                    | 14 ++++++++++++++
 cassandra/bytesio.pyx                    | 14 +++++++++++++-
 cassandra/cython_utils.pyx               | 14 ++++++++++++++
 cassandra/deserializers.pxd              | 14 +++++++++++++-
 cassandra/deserializers.pyx              | 15 ++++++++++++++-
 cassandra/ioutils.pyx                    | 14 ++++++++++++++
 cassandra/numpy_parser.pyx               | 14 +++++++++++++-
 cassandra/obj_parser.pyx                 | 14 ++++++++++++++
 cassandra/parsing.pxd                    | 14 ++++++++++++++
 cassandra/parsing.pyx                    | 14 ++++++++++++++
 cassandra/protocol.py                    |  2 +-
 cassandra/row_parser.pyx                 | 14 +++++++++++++-
 cassandra/tuple.pxd                      | 14 ++++++++++++++
 cassandra/type_codes.pxd                 | 14 ++++++++++++++
 cassandra/util.py                        | 14 ++++++++++++++
 tests/unit/cython/__init__.py            | 14 ++++++++++++++
 tests/unit/cython/bytesio_testhelper.pyx | 14 ++++++++++++++
 tests/unit/cython/test_bytesio.py        | 14 ++++++++++++++
 tests/unit/cython/utils.py               | 16 +++++++++++++++-
 20 files changed, 264 insertions(+), 7 deletions(-)

diff --git a/cassandra/buffer.pxd b/cassandra/buffer.pxd
index 542cb181..2f40ced0 100644
--- a/cassandra/buffer.pxd
+++ b/cassandra/buffer.pxd
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """
 Simple buffer data structure that provides a view on existing memory
 (e.g. from a bytes object). This memory must stay alive while the
diff --git a/cassandra/bytesio.pxd b/cassandra/bytesio.pxd
index 64bbdcca..2bcda361 100644
--- a/cassandra/bytesio.pxd
+++ b/cassandra/bytesio.pxd
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 cdef class BytesIOReader:
     cdef bytes buf
     cdef char *buf_ptr
diff --git a/cassandra/bytesio.pyx b/cassandra/bytesio.pyx
index eb81c2fe..68a15baf 100644
--- a/cassandra/bytesio.pyx
+++ b/cassandra/bytesio.pyx
@@ -1,4 +1,16 @@
-# -- cython: profile=True
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 cdef class BytesIOReader:
     """
diff --git a/cassandra/cython_utils.pyx b/cassandra/cython_utils.pyx
index a660f3ee..1d16d47d 100644
--- a/cassandra/cython_utils.pyx
+++ b/cassandra/cython_utils.pyx
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """
 Duplicate module of util.py, with some accelerated functions
 used for deserialization.
diff --git a/cassandra/deserializers.pxd b/cassandra/deserializers.pxd
index 015fda37..26b4429a 100644
--- a/cassandra/deserializers.pxd
+++ b/cassandra/deserializers.pxd
@@ -1,4 +1,16 @@
-# -- cython: profile=True
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from cassandra.buffer cimport Buffer
 
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 6c2afa22..54ce1daf 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -1,4 +1,17 @@
-# -- cython: profile=True
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 
 from libc.stdint cimport int32_t, uint16_t
 
diff --git a/cassandra/ioutils.pyx b/cassandra/ioutils.pyx
index 1a11068c..c38b311a 100644
--- a/cassandra/ioutils.pyx
+++ b/cassandra/ioutils.pyx
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 include 'cython_marshal.pyx'
 from cassandra.buffer cimport Buffer, from_ptr_and_size
 
diff --git a/cassandra/numpy_parser.pyx b/cassandra/numpy_parser.pyx
index bfde839e..6702cfcc 100644
--- a/cassandra/numpy_parser.pyx
+++ b/cassandra/numpy_parser.pyx
@@ -1,4 +1,16 @@
-# -- cython: profile=True
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 """
 This module provider an optional protocol parser that returns
diff --git a/cassandra/obj_parser.pyx b/cassandra/obj_parser.pyx
index 670f1b4a..8aa5b394 100644
--- a/cassandra/obj_parser.pyx
+++ b/cassandra/obj_parser.pyx
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 include "ioutils.pyx"
 
 from cassandra.bytesio cimport BytesIOReader
diff --git a/cassandra/parsing.pxd b/cassandra/parsing.pxd
index 9daecad9..278c6e71 100644
--- a/cassandra/parsing.pxd
+++ b/cassandra/parsing.pxd
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from cassandra.bytesio cimport BytesIOReader
 from cassandra.deserializers cimport Deserializer
 
diff --git a/cassandra/parsing.pyx b/cassandra/parsing.pyx
index c9afd4b5..c44d7f5a 100644
--- a/cassandra/parsing.pyx
+++ b/cassandra/parsing.pyx
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """
 Module containing the definitions and declarations (parsing.pxd) for parsers.
 """
diff --git a/cassandra/protocol.py b/cassandra/protocol.py
index 25311911..3cb13351 100644
--- a/cassandra/protocol.py
+++ b/cassandra/protocol.py
@@ -894,7 +894,7 @@ class ProtocolHandler(object):
     result decoding implementations.
     """
 
-    @classmethod
+    @classmethod    
     def encode_message(cls, msg, stream_id, protocol_version, compressor):
         """
         Encodes a message using the specified frame parameters, and compressor
diff --git a/cassandra/row_parser.pyx b/cassandra/row_parser.pyx
index 1c855769..fc7bce15 100644
--- a/cassandra/row_parser.pyx
+++ b/cassandra/row_parser.pyx
@@ -1,4 +1,16 @@
-# -- cython: profile=True
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from cassandra.parsing cimport ParseDesc, ColumnParser
 from cassandra.deserializers import make_deserializers
diff --git a/cassandra/tuple.pxd b/cassandra/tuple.pxd
index 185e8364..746205e2 100644
--- a/cassandra/tuple.pxd
+++ b/cassandra/tuple.pxd
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from cpython.tuple cimport (
         PyTuple_New,
         # Return value: New reference.
diff --git a/cassandra/type_codes.pxd b/cassandra/type_codes.pxd
index b0405284..90f29bc9 100644
--- a/cassandra/type_codes.pxd
+++ b/cassandra/type_codes.pxd
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 cdef enum:
     CUSTOM_TYPE
     AsciiType
diff --git a/cassandra/util.py b/cassandra/util.py
index 0e8a818b..c71822c2 100644
--- a/cassandra/util.py
+++ b/cassandra/util.py
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import with_statement
 import calendar
 import datetime
diff --git a/tests/unit/cython/__init__.py b/tests/unit/cython/__init__.py
index e69de29b..e4b89e5f 100644
--- a/tests/unit/cython/__init__.py
+++ b/tests/unit/cython/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/tests/unit/cython/bytesio_testhelper.pyx b/tests/unit/cython/bytesio_testhelper.pyx
index 7f898c4c..d557c037 100644
--- a/tests/unit/cython/bytesio_testhelper.pyx
+++ b/tests/unit/cython/bytesio_testhelper.pyx
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from cassandra.bytesio cimport BytesIOReader
 
 def test_read1(assert_equal, assert_raises):
diff --git a/tests/unit/cython/test_bytesio.py b/tests/unit/cython/test_bytesio.py
index 65cc463a..2dbf1311 100644
--- a/tests/unit/cython/test_bytesio.py
+++ b/tests/unit/cython/test_bytesio.py
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from tests.unit.cython.utils import cyimport, cythontest
 bytesio_testhelper = cyimport('tests.unit.cython.bytesio_testhelper')
 
diff --git a/tests/unit/cython/utils.py b/tests/unit/cython/utils.py
index c493e17b..788212ac 100644
--- a/tests/unit/cython/utils.py
+++ b/tests/unit/cython/utils.py
@@ -1,3 +1,17 @@
+# Copyright 2013-2015 DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from cassandra.cython_deps import HAVE_CYTHON, HAVE_NUMPY
 
 try:
@@ -21,4 +35,4 @@ def cyimport(import_path):
 # @cythontest
 # def test_something(self): ...
 cythontest = unittest.skipUnless(HAVE_CYTHON, 'Cython is not available')
-numpytest  = unittest.skipUnless(HAVE_CYTHON and HAVE_NUMPY, 'NumPy is not available')
\ No newline at end of file
+numpytest  = unittest.skipUnless(HAVE_CYTHON and HAVE_NUMPY, 'NumPy is not available')

From 02be9f441ae4731d3247120084b9b2d3003732ee Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Tue, 11 Aug 2015 21:30:32 +0100
Subject: [PATCH 64/70] Add some API documentation for Cython-based
 deserializers

---
 cassandra/protocol.py           |  2 +-
 docs/api/cassandra/protocol.rst | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/cassandra/protocol.py b/cassandra/protocol.py
index 3cb13351..25311911 100644
--- a/cassandra/protocol.py
+++ b/cassandra/protocol.py
@@ -894,7 +894,7 @@ class ProtocolHandler(object):
     result decoding implementations.
     """
 
-    @classmethod    
+    @classmethod
     def encode_message(cls, msg, stream_id, protocol_version, compressor):
         """
         Encodes a message using the specified frame parameters, and compressor
diff --git a/docs/api/cassandra/protocol.rst b/docs/api/cassandra/protocol.rst
index cabf2b59..0d4df101 100644
--- a/docs/api/cassandra/protocol.rst
+++ b/docs/api/cassandra/protocol.rst
@@ -24,3 +24,17 @@ See :meth:`.Session.execute`, ::meth:`.Session.execute_async`, :attr:`.ResponseF
     .. automethod:: encode_message
 
     .. automethod:: decode_message
+
+Faster Deserialization
+----------------------
+When python-driver is compiled with Cython, it uses a Cython-based deserialization path
+to deserialize messages. There are two additional ProtocolHandler classes that can be
+used to deserialize response messages: the first is ``LazyProtocolHandler`` and the
+second is ``NumpyProtocolHandler``.They can be used as follows:
+
+.. code:: python
+
+    from cassandra.protocol import NumpyProtocolHandler, LazyProtocolHandler
+    s.client_protocol_handler = LazyProtocolHandler   # for a result iterator
+    s.client_protocol_handler = NumpyProtocolHandler  # for a dict of NumPy arrays as result
+

From 0924df80d8a8bbff9c69cef3feb6840a34d9b8e3 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Tue, 11 Aug 2015 21:34:50 +0100
Subject: [PATCH 65/70] Also include .pxd files in sdist

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 7a686a6b..4e072d1c 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,3 @@
 include setup.py README.rst MANIFEST.in LICENSE ez_setup.py
 include cassandra/*.pyx
+include cassandra/*.pxd

From c821333fb721ffb96e1456604c985c584aa02cc0 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Tue, 11 Aug 2015 21:38:00 +0100
Subject: [PATCH 66/70] Re-enable cythonized pure-python modules

---
 setup.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/setup.py b/setup.py
index ce09a23e..bfb9176f 100644
--- a/setup.py
+++ b/setup.py
@@ -262,14 +262,14 @@ if "--no-libev" not in sys.argv and not is_windows:
 if "--no-cython" not in sys.argv:
     try:
         from Cython.Build import cythonize
-        # cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'metadata',
-        #                      'pool', 'protocol', 'query', 'util']
-        # compile_args = [] if is_windows else ['-Wno-unused-function']
-        # extensions.extend(cythonize(
-        #     [Extension('cassandra.%s' % m, ['cassandra/%s.py' % m],
-        #                extra_compile_args=compile_args)
-        #         for m in cython_candidates],
-        #     exclude_failures=True))
+        cython_candidates = ['cluster', 'concurrent', 'connection', 'cqltypes', 'metadata',
+                             'pool', 'protocol', 'query', 'util']
+        compile_args = [] if is_windows else ['-Wno-unused-function']
+        extensions.extend(cythonize(
+            [Extension('cassandra.%s' % m, ['cassandra/%s.py' % m],
+                       extra_compile_args=compile_args)
+                for m in cython_candidates],
+            exclude_failures=True))
         extensions.extend(cythonize("cassandra/*.pyx"))
         extensions.extend(cythonize("tests/unit/cython/*.pyx"))
     except ImportError:

From dd76d15b5db5d24214e5e7457bff6fd4b6841599 Mon Sep 17 00:00:00 2001
From: Mark Florisson <markflorisson88@gmail.com>
Date: Tue, 11 Aug 2015 21:41:48 +0100
Subject: [PATCH 67/70] Remove leftover TODO comments

---
 cassandra/protocol.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cassandra/protocol.py b/cassandra/protocol.py
index 25311911..4ebeab5d 100644
--- a/cassandra/protocol.py
+++ b/cassandra/protocol.py
@@ -1012,8 +1012,6 @@ def cython_protocol_handler(colparser):
 
     The default is to use obj_parser.ListParser
     """
-    # TODO: It may be cleaner to turn ProtocolHandler and ResultMessage into
-    # TODO:     instances and use methods instead of class methods
     from cassandra.row_parser import make_recv_results_rows
 
     class FastResultMessage(ResultMessage):

From fade6487e9d859e087be067d7a375b82bfa3eeca Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Wed, 12 Aug 2015 10:40:08 +0100
Subject: [PATCH 68/70] Add cython unit tests to sdist

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 4e072d1c..e3cb20eb 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,4 @@
 include setup.py README.rst MANIFEST.in LICENSE ez_setup.py
 include cassandra/*.pyx
 include cassandra/*.pxd
+include tests/unit/cython/*.pyx

From 1f985cf1a1e2f821e57a44f2b2a2e83b93a03c46 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Wed, 12 Aug 2015 12:13:09 +0100
Subject: [PATCH 69/70] Fix typo in import

---
 cassandra/cython_deps.py    | 2 +-
 cassandra/deserializers.pyx | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/cassandra/cython_deps.py b/cassandra/cython_deps.py
index fdd15464..5cc86fe7 100644
--- a/cassandra/cython_deps.py
+++ b/cassandra/cython_deps.py
@@ -1,5 +1,5 @@
 try:
-    from cassandra.rowparser import make_recv_results_rows
+    from cassandra.row_parser import make_recv_results_rows
     HAVE_CYTHON = True
 except ImportError:
     HAVE_CYTHON = False
diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index 54ce1daf..ca5dc6d8 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -164,7 +164,8 @@ cdef class DesTimeType(Deserializer):
 
 cdef class DesUTF8Type(Deserializer):
     cdef deserialize(self, Buffer *buf, int protocol_version):
-        return to_bytes(buf).decode('utf8')
+        cdef val = to_bytes(buf)
+        return val.decode('utf8')
 
 
 cdef class DesVarcharType(DesUTF8Type):
@@ -502,6 +503,7 @@ cpdef Deserializer find_deserializer(cqltype):
 def obj_array(list objs):
     """Create a (Cython) array of objects given a list of objects"""
     cdef object[:] arr
+    cdef Py_ssize_t i
     arr = cython_array(shape=(len(objs),), itemsize=sizeof(void *), format="O")
     # arr[:] = objs # This does not work (segmentation faults)
     for i, obj in enumerate(objs):

From ccc7c8b19ee8895ebf85050a7d37e644c71e6b83 Mon Sep 17 00:00:00 2001
From: Mark Florisson <mark.florisson@cl.cam.ac.uk>
Date: Wed, 12 Aug 2015 14:28:25 +0100
Subject: [PATCH 70/70] Reduce some noise in valgrind

---
 cassandra/deserializers.pyx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cassandra/deserializers.pyx b/cassandra/deserializers.pyx
index ca5dc6d8..e2f23284 100644
--- a/cassandra/deserializers.pyx
+++ b/cassandra/deserializers.pyx
@@ -31,7 +31,6 @@ from cassandra import util
 
 cdef bint PY2 = six.PY2
 
-
 cdef class Deserializer:
     """Cython-based deserializer class for a cqltype"""
 
@@ -468,12 +467,14 @@ def make_deserializers(cqltypes):
     return obj_array([find_deserializer(ct) for ct in cqltypes])
 
 
+cdef dict classes = globals()
+
 cpdef Deserializer find_deserializer(cqltype):
     """Find a deserializer for a cqltype"""
     name = 'Des' + cqltype.__name__
 
     if name in globals():
-        cls = globals()[name]
+        cls = classes[name]
     elif issubclass(cqltype, cqltypes.ListType):
         cls = DesListType
     elif issubclass(cqltype, cqltypes.SetType):