Allow batch_size=None in Table.scan() to avoid filter incompatibilities

Allow None as a valid value for the batch_size argument to Table.scan(),
since HBase does not support specifying a batch size when some scanner
filters are used.

Fixes issue #54.
This commit is contained in:
Wouter Bolsterlee
2014-01-25 21:39:49 +01:00
parent 87275df09b
commit 8481d31780
3 changed files with 33 additions and 12 deletions

View File

@@ -14,6 +14,11 @@ Release date: *not yet released*
to :py:meth:`Table.scan` (`issue #39
<https://github.com/wbolster/happybase/issues/39>`_).
* Allow `None` as a valid value for the `batch_size` argument to
:py:meth:`Table.scan`, since HBase does not support specifying a batch size
when some scanner filters are used. (`issue #54
<https://github.com/wbolster/happybase/issues/54>`_).
HappyBase 0.7
-------------

View File

@@ -254,6 +254,15 @@ class Table(object):
this to a low value (or even 1) if your data is large, since a low
batch size results in added round-trips to the server.
.. warning::
Not all HBase filters can be used in combination with a batch
size. Explicitly specify `None` for the `batch_size` argument
in those cases to override the default value. Failure to do
so can result in hard to debug errors (not HappyBase's
fault), such as a non-responsive connection. The HBase logs
may contain more useful information in these situations.
**Compatibility notes:**
* The `filter` argument is only available when using HBase 0.92
@@ -280,11 +289,11 @@ class Table(object):
:return: generator yielding the rows matching the scan
:rtype: iterable of `(row_key, row_data)` tuples
"""
if batch_size < 1:
raise ValueError("'batch_size' must be >= 1")
if batch_size is not None and batch_size < 1:
raise ValueError("'batch_size' must be >= 1 (or None)")
if limit is not None and limit < 1:
raise ValueError("'limit' must be >= 1")
raise ValueError("'limit' must be >= 1 (or None)")
if sorted_columns and self.connection.compat < '0.96':
raise NotImplementedError(
@@ -349,16 +358,16 @@ class Table(object):
n_returned = n_fetched = 0
try:
while True:
if limit is None:
if batch_size is None:
how_many = 1
else:
how_many = batch_size
else:
how_many = min(batch_size, limit - n_returned)
if how_many == 1:
items = self.connection.client.scannerGet(scan_id)
else:
items = self.connection.client.scannerGetList(
scan_id, how_many)
if limit is not None:
how_many = min(how_many, limit - n_returned)
items = self.connection.client.scannerGetList(
scan_id, how_many)
n_fetched += len(items)

View File

@@ -357,7 +357,7 @@ def test_scan():
list(table.scan(row_prefix='foobar', row_start='xyz'))
with assert_raises(ValueError):
list(table.scan(batch_size=None))
list(table.scan(batch_size=0))
if connection.compat == '0.90':
with assert_raises(NotImplementedError):
@@ -446,6 +446,13 @@ def test_scan_sorting():
row.items())
def test_scan_filter_and_batch_size():
# See issue #54
filter = "SingleColumnValueFilter ('cf1', 'qual1', =, 'binary:val1')"
for k, v in table.scan(filter=filter, batch_size=None):
print v
def test_delete():
row_key = 'row-test-delete'
data = {'cf1:col1': 'v1',