diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7d55a8b0..4cd3052c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -46,6 +46,24 @@ Bug Fixes * cqlengine: class.MultipleObjectsReturned has DoesNotExist as base class (PYTHON-489) * cqlengine: Typo in cqlengine UserType __len__ breaks attribute assignment (PYTHON-502) + +Other +----- + +* cqlengine: a major improvement on queryset has been introduced. It + is a lot more efficient to iterate large datasets: the rows are + now fetched on demand using the driver pagination. + +* cqlengine: the queryset len() and count() behaviors have changed. It + now executes a "SELECT COUNT(*)" of the query rather than returning + the size of the internal result_cache (loaded rows). On large + queryset, you might want to avoid using them due to the performance + cost. Note that trying to access objects using list index/slicing + with negative indices also requires a count to be + executed. + + + 3.0.0 ===== November 24, 2015 diff --git a/cassandra/cqlengine/query.py b/cassandra/cqlengine/query.py index 085971f7..4b2230c9 100644 --- a/cassandra/cqlengine/query.py +++ b/cassandra/cqlengine/query.py @@ -700,7 +700,9 @@ class AbstractQuerySet(object): def count(self): """ - Returns the number of rows matched by this query + Returns the number of rows matched by this query. + + *Note: This function executes a SELECT COUNT() and has a performance cost on large datasets* """ if self._batch: raise CQLEngineException("Only inserts, updates, and deletes are available in batch mode") @@ -751,14 +753,19 @@ class AbstractQuerySet(object): def limit(self, v): """ - Limits the number of results returned by Cassandra. + Limits the number of results returned by Cassandra. Use *0* or *None* to disable. *Note that CQL's default limit is 10,000, so all queries without a limit set explicitly will have an implicit limit of 10,000* .. code-block:: python + # Fetch 100 users for user in User.objects().limit(100): print(user) + + # Fetch all users + for user in User.objects().limit(None): + print(user) """ if v is None: @@ -780,7 +787,7 @@ class AbstractQuerySet(object): """ Sets the number of rows that are fetched at a time. - *Note that driver's default fetch size is 5000. + *Note that driver's default fetch size is 5000.* .. code-block:: python diff --git a/docs/api/cassandra/cqlengine/query.rst b/docs/api/cassandra/cqlengine/query.rst index bd45b3b9..ad5489f2 100644 --- a/docs/api/cassandra/cqlengine/query.rst +++ b/docs/api/cassandra/cqlengine/query.rst @@ -18,6 +18,12 @@ The methods here are used to filter, order, and constrain results. .. automethod:: count + .. method:: len(queryset) + + Returns the number of rows matched by this query. This function uses :meth:`~.cassandra.cqlengine.query.ModelQuerySet.count` internally. + + *Note: This function executes a SELECT COUNT() and has a performance cost on large datasets* + .. automethod:: distinct .. automethod:: filter @@ -26,6 +32,8 @@ The methods here are used to filter, order, and constrain results. .. automethod:: limit + .. automethod:: fetch_size + .. automethod:: if_not_exists .. automethod:: if_exists diff --git a/docs/cqlengine/queryset.rst b/docs/cqlengine/queryset.rst index 0732a244..18287f92 100644 --- a/docs/cqlengine/queryset.rst +++ b/docs/cqlengine/queryset.rst @@ -79,6 +79,10 @@ Accessing objects in a QuerySet q[0] #returns the first result q[1] #returns the second result + .. note:: + + * CQL does not support specifying a start position in it's queries. Therefore, accessing elements using array indexing will load every result up to the index value requested + * Using negative indices requires a "SELECT COUNT()" to be executed. This has a performance cost on large datasets. * list slicing .. code-block:: python @@ -87,7 +91,10 @@ Accessing objects in a QuerySet q[1:] #returns all results except the first q[1:9] #returns a slice of the results - *Note: CQL does not support specifying a start position in it's queries. Therefore, accessing elements using array indexing / slicing will load every result up to the index value requested* + .. note:: + + * CQL does not support specifying a start position in it's queries. Therefore, accessing elements using array slicing will load every result up to the index value requested + * Using negative indices requires a "SELECT COUNT()" to be executed. This has a performance cost on large datasets. * calling :attr:`get() ` on the queryset .. code-block:: python