pyeclib/test/test_pyeclib_c.py
2016-01-12 15:52:13 -07:00

441 lines
17 KiB
Python

# Copyright (c) 2013, Kevin Greenan (kmgreen2@gmail.com)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution. THIS SOFTWARE IS
# PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
# NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import random
from string import ascii_letters
import sys
import tempfile
import time
import unittest
import pyeclib_c
from pyeclib.ec_iface import PyECLib_EC_Types
from pyeclib.ec_iface import VALID_EC_TYPES
class Timer:
def __init__(self):
self.start_time = 0
self.end_time = 0
def start(self):
self.start_time = time.time()
def stop(self):
self.end_time = time.time()
def curr_delta(self):
return self.end_time - self.start_time
def stop_and_return(self):
self.end_time = time.time()
return self.curr_delta()
class TestPyECLib(unittest.TestCase):
def __init__(self, *args):
self.num_datas = [12, 12, 12]
self.num_parities = [2, 3, 4]
self.iterations = 100
# EC algorithm and config parameters
self.rs_types = [(PyECLib_EC_Types.jerasure_rs_vand),
(PyECLib_EC_Types.jerasure_rs_cauchy),
(PyECLib_EC_Types.isa_l_rs_vand),
(PyECLib_EC_Types.liberasurecode_rs_vand)]
self.xor_types = [(PyECLib_EC_Types.flat_xor_hd, 12, 6, 4),
(PyECLib_EC_Types.flat_xor_hd, 10, 5, 4),
(PyECLib_EC_Types.flat_xor_hd, 10, 5, 3)]
self.shss = [(PyECLib_EC_Types.shss, 6, 3),
(PyECLib_EC_Types.shss, 10, 4),
(PyECLib_EC_Types.shss, 20, 4),
(PyECLib_EC_Types.shss, 11, 7)]
# Input temp files for testing
self.sizes = ["101-K", "202-K", "303-K"]
self.files = {}
self._create_tmp_files()
unittest.TestCase.__init__(self, *args)
def _create_tmp_files(self):
"""
Create the temporary files needed for testing. Use the tempfile
package so that the files will be automatically removed during
garbage collection.
"""
for size_str in self.sizes:
# Determine the size of the file to create
size_desc = size_str.split("-")
size = int(size_desc[0])
if size_desc[1] == 'M':
size *= 1000000
elif size_desc[1] == 'K':
size *= 1000
# Create the dictionary of files to test with
buf = ''.join(random.choice(ascii_letters) for i in range(size))
if sys.version_info >= (3,):
buf = buf.encode('ascii')
tmp_file = tempfile.NamedTemporaryFile('w+b')
tmp_file.write(buf)
self.files[size_str] = tmp_file
def get_tmp_file(self, name):
"""
Acquire a temp file from the dictionary of pre-built, random files
with the seek position to the head of the file.
"""
tmp_file = self.files.get(name, None)
if tmp_file:
tmp_file.seek(0, 0)
return tmp_file
def setUp(self):
# Ensure that the file offset is set to the head of the file
for _, tmp_file in self.files.items():
tmp_file.seek(0, 0)
def tearDown(self):
pass
def time_encode(self, num_data, num_parity, ec_type, hd,
file_size, iterations):
"""
:return average encode time
"""
timer = Timer()
tsum = 0
handle = pyeclib_c.init(num_data, num_parity, ec_type, hd)
whole_file_bytes = self.get_tmp_file(file_size).read()
timer.start()
for l in range(iterations):
fragments = pyeclib_c.encode(handle, whole_file_bytes)
tsum = timer.stop_and_return()
return tsum / iterations
def time_decode(self,
num_data, num_parity, ec_type, hd,
file_size, iterations):
"""
:return 2-tuple, (success, average decode time)
"""
timer = Timer()
tsum = 0
handle = pyeclib_c.init(num_data, num_parity, ec_type, hd)
whole_file_bytes = self.get_tmp_file(file_size).read()
success = True
fragments = pyeclib_c.encode(handle, whole_file_bytes)
orig_fragments = fragments[:]
for i in range(iterations):
missing_idxs = []
num_missing = hd - 1
for j in range(num_missing):
num_frags_left = len(fragments)
idx = random.randint(0, num_frags_left - 1)
fragments.pop(idx)
timer.start()
decoded_file_bytes = pyeclib_c.decode(handle,
fragments,
len(fragments[0]))
tsum += timer.stop_and_return()
fragments = orig_fragments[:]
if whole_file_bytes != decoded_file_bytes:
success = False
return success, tsum / iterations
def time_range_decode(self,
num_data, num_parity, ec_type, hd,
file_size, iterations):
"""
:return 2-tuple, (success, average decode time)
"""
timer = Timer()
tsum = 0
handle = pyeclib_c.init(num_data, num_parity, ec_type, hd)
whole_file_bytes = self.get_tmp_file(file_size).read()
success = True
begins = [int(random.randint(0, len(whole_file_bytes) - 1)) for i in range(3)]
ends = [int(random.randint(begins[i], len(whole_file_bytes))) for i in range(3)]
ranges = list(zip(begins, ends))
fragments = pyeclib_c.encode(handle, whole_file_bytes)
orig_fragments = fragments[:]
for i in range(iterations):
missing_idxs = []
num_missing = hd - 1
for j in range(num_missing):
num_frags_left = len(fragments)
idx = random.randint(0, num_frags_left - 1)
fragments.pop(idx)
timer.start()
decoded_file_ranges = pyeclib_c.decode(handle,
fragments,
len(fragments[0]),
ranges)
tsum += timer.stop_and_return()
fragments = orig_fragments[:]
range_offset = 0
for r in ranges:
if whole_file_bytes[r[0]:r[1]+1] != decoded_file_ranges[range_offset]:
success = False
range_offset += 1
return success, tsum / iterations
def time_reconstruct(self,
num_data, num_parity, ec_type, hd,
file_size, iterations):
"""
:return 2-tuple, (success, average reconstruct time)
"""
timer = Timer()
tsum = 0
handle = pyeclib_c.init(num_data, num_parity, ec_type, hd)
whole_file_bytes = self.get_tmp_file(file_size).read()
success = True
orig_fragments = pyeclib_c.encode(handle, whole_file_bytes)
for i in range(iterations):
fragments = orig_fragments[:]
num_missing = 1
missing_idxs = []
for j in range(num_missing):
num_frags_left = len(fragments)
idx = random.randint(0, num_frags_left - 1)
while idx in missing_idxs:
idx = random.randint(0, num_frags_left - 1)
missing_idxs.append(idx)
fragments.pop(idx)
timer.start()
reconstructed_fragment = pyeclib_c.reconstruct(handle,
fragments,
len(fragments[0]),
missing_idxs[0])
tsum += timer.stop_and_return()
if orig_fragments[missing_idxs[0]] != reconstructed_fragment:
success = False
# Output the fragments for debugging
with open("orig_fragments", "wb") as fd_orig:
fd_orig.write(orig_fragments[missing_idxs[0]])
with open("decoded_fragments", "wb") as fd_decoded:
fd_decoded.write(reconstructed_fragment)
print(("Fragment %d was not reconstructed!!!" % missing_idxs[0]))
sys.exit(2)
return success, tsum / iterations
def get_throughput(self, avg_time, size_str):
size_desc = size_str.split("-")
size = float(size_desc[0])
if size_desc[1] == 'M':
throughput = size / avg_time
elif size_desc[1] == 'K':
throughput = (size / 1000.0) / avg_time
return format(throughput, '.10g')
def test_xor_code(self):
if "flat_xor_hd_3" not in VALID_EC_TYPES:
print("xor backend is not available in your enviromnet, skipping test")
return
for (ec_type, k, m, hd) in self.xor_types:
print(("\nRunning tests for flat_xor_hd k=%d, m=%d, hd=%d" % (k, m, hd)))
for size_str in self.sizes:
avg_time = self.time_encode(k, m, ec_type.value, hd,
size_str,
self.iterations)
print("Encode (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str)))
for size_str in self.sizes:
success, avg_time = self.time_decode(k, m, ec_type.value, hd,
size_str,
self.iterations)
self.assertTrue(success)
print("Decode (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str)))
for size_str in self.sizes:
success, avg_time = self.time_reconstruct(k, m, ec_type.value, hd,
size_str,
self.iterations)
self.assertTrue(success)
print("Reconstruct (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str)))
def test_shss(self):
if "shss" not in VALID_EC_TYPES:
print("shss backend is not available in your enviromnet, skipping test")
return
for (ec_type, k, m) in self.shss:
print(("\nRunning tests for %s k=%d, m=%d" % (ec_type, k, m)))
success = self._test_get_required_fragments(k, m, ec_type)
self.assertTrue(success)
for size_str in self.sizes:
avg_time = self.time_encode(k, m, ec_type.value, 0,
size_str,
self.iterations)
print("Encode (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str)))
for size_str in self.sizes:
success, avg_time = self.time_decode(k, m, ec_type.value, 0,
size_str,
self.iterations)
self.assertTrue(success)
print("Decode (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str)))
for size_str in self.sizes:
success, avg_time = self.time_reconstruct(k, m, ec_type.value, 0,
size_str,
self.iterations)
self.assertTrue(success)
print("Reconstruct (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str)))
def _test_get_required_fragments(self, num_data, num_parity, ec_type):
"""
:return boolean, True if all tests passed
"""
handle = pyeclib_c.init(num_data, num_parity, ec_type.value)
success = True
#
# MDS codes need any k fragments
#
if ec_type in ["jerasure_rs_vand", "jerasure_rs_cauchy",
"liberasurecode_rs_vand"]:
expected_fragments = [i for i in range(num_data + num_parity)]
missing_fragments = []
#
# Remove between 1 and num_parity
#
for i in range(random.randint(0, num_parity - 1)):
missing_fragment = random.sample(expected_fragments, 1)[0]
missing_fragments.append(missing_fragment)
expected_fragments.remove(missing_fragment)
expected_fragments = expected_fragments[:num_data]
required_fragments = pyeclib_c.get_required_fragments(
handle,
missing_fragments, [])
if expected_fragments != required_fragments:
success = False
print(("Unexpected required fragments list "
"(exp != req): %s != %s" %
(expected_fragments, required_fragments)))
return success
def test_codes(self):
for ec_type in self.rs_types:
if ec_type.name not in VALID_EC_TYPES:
print("%s backend is not available in your enviromnet, skipping test" % ec_type.name)
continue
print(("\nRunning tests for %s" % (ec_type)))
for i in range(len(self.num_datas)):
success = self._test_get_required_fragments(self.num_datas[i],
self.num_parities[i],
ec_type)
self.assertTrue(success)
for i in range(len(self.num_datas)):
for size_str in self.sizes:
avg_time = self.time_encode(self.num_datas[i],
self.num_parities[i],
ec_type.value, self.num_parities[i] + 1,
size_str, self.iterations)
print(("Encode (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str))))
for i in range(len(self.num_datas)):
for size_str in self.sizes:
success, avg_time = self.time_decode(self.num_datas[i],
self.num_parities[i],
ec_type.value, self.num_parities[i] + 1,
size_str, self.iterations)
self.assertTrue(success)
print(("Decode (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str))))
for i in range(len(self.num_datas)):
for size_str in self.sizes:
success, avg_time = self.time_range_decode(self.num_datas[i],
self.num_parities[i],
ec_type.value, self.num_parities[i] + 1,
size_str, self.iterations)
self.assertTrue(success)
print(("Range Decode (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str))))
for i in range(len(self.num_datas)):
for size_str in self.sizes:
success, avg_time = self.time_reconstruct(self.num_datas[i],
self.num_parities[i],
ec_type.value, self.num_parities[i] + 1,
size_str,
self.iterations)
self.assertTrue(success)
print(("Reconstruct (%s): %s" %
(size_str, self.get_throughput(avg_time, size_str))))
if __name__ == "__main__":
unittest.main()