Plugging liberasurecode functions into pyeclib

Python changes needed to plug-in liberasurecode.

Changes needed to get PyECLib working with liberasurecode and some of the unit tests passing.
This commit is contained in:
Kevin Greenan 2014-07-30 21:54:07 -07:00
parent f81a59b592
commit 9d67b8726d
9 changed files with 268 additions and 1790 deletions

4
README
View File

@ -39,8 +39,8 @@ PyEClib initialization::
Supported ``ec_type`` values:
* ``rs_vand`` => Vandermonde Reed-Solomon encoding
* ``flat_xor_3``, ``flat_xor_4`` => Flat-XOR based HD combination codes
* ``jerasure_rs_vand`` => Vandermonde Reed-Solomon encoding
* ``flat_xor_hd_3``, ``flat_xor_hd_4`` => Flat-XOR based HD combination codes
A configuration utility is provided to help compare available EC schemes in
terms of performance and redundancy:: tools/pyeclib_conf_tool.py

File diff suppressed because it is too large Load Diff

View File

@ -30,122 +30,14 @@
/* For exact-width integer types */
#include <stdint.h>
/*
* Make sure these enum values match those exposed from the Python EC interface
* src/python/pyeclib/ec_iface.py
*/
#define PYECLIB_MAX_DATA 32
#define PYECLIB_MAX_PARITY 32
typedef enum {
PYECC_NOT_FOUND = 0,
PYECC_RS_VAND = 1,
PYECC_RS_CAUCHY_ORIG = 2,
PYECC_XOR_HD_3 = 3,
PYECC_XOR_HD_4 = 4,
PYECC_NUM_TYPES = 4,
} pyeclib_type_t;
const char *pyeclib_type_str[] = {
"not_found",
"jerasure_rs_vand",
"jerasure_rs_cauchy_orig",
"flat_xor_3",
"flat_xor_4",
};
const int pyeclib_type_word_size_bytes[] = {
0,
sizeof(long), /* rs_vand */
sizeof(long), /* rs_cauchy_orig */
sizeof(long), /* flat_xor_3 */
sizeof(long) /* flat_xor_4 */
};
const int pyeclib_type_best_w_size_bytes[] = {
32, /* default */
16, /* rs_vand */
4, /* rs_cauchy_orig */
32, /* flat_xor_3 */
32 /* flat_xor_4 */
};
// Unconditionally enforce alignment for now.
// This is needed for the SIMD extentions.
// TODO (kmg): Parse cpuinfo and determine if it is necessary...
const int pyeclib_type_needs_addr_align[] = {
-1,
1,
1,
1,
1
};
typedef struct pyeclib_s
{
int k;
int m;
int w;
int *matrix;
int *bitmatrix;
int **schedule;
xor_code_t *xor_code_desc;
alg_sig_t *alg_sig_desc;
pyeclib_type_t type;
int inline_chksum;
int algsig_chksum;
int ec_desc;
struct ec_args ec_args;
} pyeclib_t;
/*
* Convert the string ECC type to the enum value
* Start lookup at index 1
*/
static inline
pyeclib_type_t get_ecc_type(const char *str_type)
{
int i;
for (i = 1; i <= PYECC_NUM_TYPES; i++) {
if (strcmp(str_type, pyeclib_type_str[i]) == 0) {
return i;
}
}
return PYECC_NOT_FOUND;
}
/*
* Convert the string ECC type to the w value
* Start lookup at index 1
*/
static inline
unsigned int get_best_w_for_ecc_type(pyeclib_type_t type)
{
if (type > PYECC_NUM_TYPES)
return -1;
return pyeclib_type_best_w_size_bytes[type];
}
#define PYECC_FLAGS_MASK 0x1
#define PYECC_FLAGS_READ_VERIFY 0x1
#define PYECC_HANDLE_NAME "pyeclib_handle"
#define PYECC_CAUCHY_PACKETSIZE (sizeof(long) * 128)
#define PYCC_MAX_SIG_LEN 8
typedef struct fragment_metadata_s
{
int size;
int idx;
char signature[PYCC_MAX_SIG_LEN];
int chksum_mismatch;
} fragment_metadata_t;
#define FRAGSIZE_2_BLOCKSIZE(fragment_size) (fragment_size - sizeof(fragment_header_t))
#define PYECLIB_WORD_SIZE(type) pyeclib_type_word_size_bytes[type]
#define PYECLIB_NEEDS_ADDR_ALIGNMENT(type) pyeclib_type_needs_addr_align[type]
#endif

View File

@ -47,6 +47,7 @@ class ECPyECLibDriver(object):
self.m = m
self.ec_type = ec_type
self.chksum_type = chksum_type
hd = m
self.inline_chksum = 0
self.algsig_chksum = 0
@ -56,39 +57,53 @@ class ECPyECLibDriver(object):
elif self.chksum_type is PyECLib_FRAGHDRCHKSUM_Types.algsig:
self.algsig_chksum = 1
name = self.ec_type.name
if name == "flat_xor_hd_3":
hd = 3
name = "flat_xor_hd"
elif name == "flat_xor_hd_4":
hd = 4
name = "flat_xor_hd"
self.handle = pyeclib_c.init(
self.k,
self.m,
self.ec_type.name,
name,
hd,
self.inline_chksum,
self.algsig_chksum)
def encode(self, data_bytes):
return pyeclib_c.encode(self.handle, data_bytes)
def _validate_and_return_fragment_size(self, fragments):
if len(fragments) > 0 and len(fragments[0]) == 0:
return -1
fragment_len = len(fragments[0])
for fragment in fragments[1:]:
if len(fragment) != fragment_len:
return -1
return fragment_len
def decode(self, fragment_payloads):
try:
ret_string = pyeclib_c.fragments_to_string(
self.handle,
fragment_payloads)
except Exception as e:
raise ECPyECLibException("Error in ECPyECLibDriver.decode")
fragment_len = self._validate_and_return_fragment_size(fragment_payloads)
if fragment_len < 0:
raise ECPyECLibException("Invalid fragment payload in ECPyECLibDriver.decode")
if ret_string is None:
(data_frags,
parity_frags,
missing_idxs) = pyeclib_c.get_fragment_partition(
self.handle, fragment_payloads)
decoded_fragments = pyeclib_c.decode(
self.handle, data_frags, parity_frags, missing_idxs,
len(data_frags[0]))
ret_string = pyeclib_c.fragments_to_string(
self.handle,
decoded_fragments)
if len(fragment_payloads) < self.k:
raise ECPyECLibException("Not enough fragments given in ECPyECLibDriver.decode")
return ret_string
return pyeclib_c.decode(self.handle, fragment_payloads, fragment_len)
def reconstruct(self, fragment_payloads, indexes_to_reconstruct):
fragment_len = self._validate_and_return_fragment_size(fragment_payloads)
if fragment_len < 0:
raise ECPyECLibException("Invalid fragment payload in ECPyECLibDriver.reconstruct")
reconstructed_data = []
# Reconstruct the data, then the parity
@ -99,13 +114,8 @@ class ECPyECLibDriver(object):
while len(_indexes_to_reconstruct) > 0:
index = _indexes_to_reconstruct.pop(0)
(data_frags,
parity_frags,
missing_idxs) = pyeclib_c.get_fragment_partition(
self.handle, fragment_payloads)
reconstructed = pyeclib_c.reconstruct(
self.handle, data_frags, parity_frags, missing_idxs,
index, len(data_frags[0]))
self.handle, fragment_payloads, index, fragment_len)
reconstructed_data.append(reconstructed)
return reconstructed_data
@ -118,7 +128,11 @@ class ECPyECLibDriver(object):
return pyeclib_c.get_metadata(self.handle, fragment)
def verify_stripe_metadata(self, fragment_metadata_list):
return pyeclib_c.check_metadata(self.handle, fragment_metadata_list)
metadata_len = self._validate_and_return_fragment_size(fragment_payloads)
if metadata_len < 0:
raise ECPyECLibException("Invalid fragment payload in ECPyECLibDriver.verify_metadata")
return pyeclib_c.check_metadata(self.handle, fragment_metadata_list, metadata_len)
def get_segment_info(self, data_len, segment_size):
return pyeclib_c.get_segment_info(self.handle, data_len, segment_size)

View File

@ -82,10 +82,10 @@ class PyECLibEnum(Enum):
class PyECLib_EC_Types(PyECLibEnum):
# Note: the Enum start value defaults to 1 as the starting value and not 0
# 0 is False in the boolean sense but enum members evaluate to True
rs_vand = 1
rs_cauchy_orig = 2
flat_xor_3 = 3
flat_xor_4 = 4
jerasure_rs_vand = 1
jerasure_rs_cauchy = 2
flat_xor_hd_3 = 3
flat_xor_hd_4 = 4
# Output of Erasure (en)Coding process are data "fragments". Fragment data

View File

@ -37,7 +37,7 @@ if [ ! -d ${FRAGMENT_DIR} ]; then
mkdir ${FRAGMENT_DIR}
fi
TYPES="flat_xor_4 flat_xor_3 rs_vand rs_cauchy_orig"
TYPES="flat_xor_hd_4 flat_xor_hd_3 jerasure_rs_vand jerasure_rs_cauchy"
NUM_DATAS="10 11 12"
RS_NUM_PARITIES="2 3 4"
XOR_NUM_PARITIES="6"
@ -51,16 +51,16 @@ for TYPE in ${TYPES}; do
rm ${DECODED_DIR}/*
rm ${FRAGMENT_DIR}/*
NUM_PARITIES=${RS_NUM_PARITIES}
if [[ `echo flat_xor_4 flat_xor_3 | grep ${TYPE}` ]]; then
if [[ `echo flat_xor_hd_4 flat_xor_hd_3 | grep ${TYPE}` ]]; then
NUM_PARITIES=${XOR_NUM_PARITIES}
fi
for NUM_PARITY in ${NUM_PARITIES}; do
let NUM_TOTAL=$(( NUM_DATA + NUM_PARITY))
FAULT_TOL=${NUM_PARITY}
if [[ ${TYPE} == "flat_xor_4" ]]; then
if [[ ${TYPE} == "flat_xor_hd_4" ]]; then
FAULT_TOL="3"
fi
if [[ ${TYPE} == "flat_xor_3" ]]; then
if [[ ${TYPE} == "flat_xor_hd_3" ]]; then
FAULT_TOL="2"
fi
for file in `cd ${FILES}; echo *; cd ..`; do

View File

@ -103,11 +103,11 @@ class TestPyECLibDriver(unittest.TestCase):
def test_small_encode(self):
pyeclib_drivers = []
pyeclib_drivers.append(ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_vand"))
k=12, m=2, ec_type="jerasure_rs_vand"))
pyeclib_drivers.append(ECDriver("pyeclib.core.ECPyECLibDriver",
k=11, m=2, ec_type="rs_vand"))
k=11, m=2, ec_type="jerasure_rs_vand"))
pyeclib_drivers.append(ECDriver("pyeclib.core.ECPyECLibDriver",
k=10, m=2, ec_type="rs_vand"))
k=10, m=2, ec_type="jerasure_rs_vand"))
encode_strs = [b"a", b"hello", b"hellohyhi", b"yo"]
@ -122,19 +122,19 @@ class TestPyECLibDriver(unittest.TestCase):
pyeclib_drivers = []
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_vand", chksum_type="algsig"))
k=12, m=2, ec_type="jerasure_rs_vand", chksum_type="algsig"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=3, ec_type="rs_vand", chksum_type="algsig"))
k=12, m=3, ec_type="jerasure_rs_vand", chksum_type="algsig"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=6, ec_type="flat_xor_4", chksum_type="algsig"))
k=12, m=6, ec_type="flat_xor_hd_4", chksum_type="algsig"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=10, m=5, ec_type="flat_xor_4", chksum_type="algsig"))
k=10, m=5, ec_type="flat_xor_hd_4", chksum_type="algsig"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=10, m=5, ec_type="flat_xor_3", chksum_type="algsig"))
k=10, m=5, ec_type="flat_xor_hd_3", chksum_type="algsig"))
filesize = 1024 * 1024 * 3
file_str = ''.join(random.choice(ascii_letters) for i in range(filesize))
@ -163,19 +163,19 @@ class TestPyECLibDriver(unittest.TestCase):
pyeclib_drivers = []
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_vand", chksum_type="algsig"))
k=12, m=2, ec_type="jerasure_rs_vand", chksum_type="algsig"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=3, ec_type="rs_vand", chksum_type="algsig"))
k=12, m=3, ec_type="jerasure_rs_vand", chksum_type="algsig"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=6, ec_type="flat_xor_4", chksum_type="algsig"))
k=12, m=6, ec_type="flat_xor_hd_4", chksum_type="algsig"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=10, m=5, ec_type="flat_xor_4", chksum_type="algsig"))
k=10, m=5, ec_type="flat_xor_hd_4", chksum_type="algsig"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=10, m=5, ec_type="flat_xor_3", chksum_type="algsig"))
k=10, m=5, ec_type="flat_xor_hd_3", chksum_type="algsig"))
filesize = 1024 * 1024 * 3
file_str = ''.join(random.choice(ascii_letters) for i in range(filesize))
@ -197,16 +197,16 @@ class TestPyECLibDriver(unittest.TestCase):
pyeclib_drivers = []
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_vand", chksum_type="inline_crc32"))
k=12, m=2, ec_type="jerasure_rs_vand", chksum_type="inline_crc32"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=3, ec_type="rs_vand", chksum_type="inline_crc32"))
k=12, m=3, ec_type="jerasure_rs_vand", chksum_type="inline_crc32"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=4, ec_type="rs_vand", chksum_type="inline_crc32"))
k=12, m=4, ec_type="jerasure_rs_vand", chksum_type="inline_crc32"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_cauchy_orig", chksum_type="inline_crc32"))
k=12, m=2, ec_type="jerasure_rs_cauchy", chksum_type="inline_crc32"))
filesize = 1024 * 1024 * 3
file_str = ''.join(random.choice(ascii_letters) for i in range(filesize))
@ -240,16 +240,16 @@ class TestPyECLibDriver(unittest.TestCase):
pyeclib_drivers = []
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_vand", chksum_type="inline_crc32"))
k=12, m=2, ec_type="jerasure_rs_vand", chksum_type="inline_crc32"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=3, ec_type="rs_vand", chksum_type="inline_crc32"))
k=12, m=3, ec_type="jerasure_rs_vand", chksum_type="inline_crc32"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=4, ec_type="rs_vand", chksum_type="inline_crc32"))
k=12, m=4, ec_type="jerasure_rs_vand", chksum_type="inline_crc32"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_cauchy_orig", chksum_type="inline_crc32"))
k=12, m=2, ec_type="jerasure_rs_cauchy", chksum_type="inline_crc32"))
filesize = 1024 * 1024 * 3
file_str = ''.join(random.choice(ascii_letters) for i in range(filesize))
@ -271,13 +271,13 @@ class TestPyECLibDriver(unittest.TestCase):
pyeclib_drivers = []
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_vand"))
k=12, m=2, ec_type="jerasure_rs_vand"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=11, m=2, ec_type="rs_vand"))
k=11, m=2, ec_type="jerasure_rs_vand"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=10, m=2, ec_type="rs_vand"))
k=10, m=2, ec_type="jerasure_rs_vand"))
file_sizes = [
1024 * 1024,
@ -346,28 +346,28 @@ class TestPyECLibDriver(unittest.TestCase):
pyeclib_drivers = []
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_vand"))
k=12, m=2, ec_type="jerasure_rs_vand"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=2, ec_type="rs_cauchy_orig"))
k=12, m=2, ec_type="jerasure_rs_cauchy"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=3, ec_type="rs_vand"))
k=12, m=3, ec_type="jerasure_rs_vand"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=3, ec_type="rs_cauchy_orig"))
k=12, m=3, ec_type="jerasure_rs_cauchy"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=12, m=6, ec_type="flat_xor_4"))
k=12, m=6, ec_type="flat_xor_hd_4"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=10, m=5, ec_type="flat_xor_4"))
k=10, m=5, ec_type="flat_xor_hd_4"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=10, m=5, ec_type="flat_xor_3"))
k=10, m=5, ec_type="flat_xor_hd_3"))
pyeclib_drivers.append(
ECDriver("pyeclib.core.ECPyECLibDriver",
k=9, m=5, ec_type="flat_xor_3"))
k=9, m=5, ec_type="flat_xor_hd_3"))
for pyeclib_driver in pyeclib_drivers:
for file_size in self.file_sizes:

View File

@ -58,10 +58,10 @@ class TestPyECLib(unittest.TestCase):
self.iterations = 100
# EC algorithm and config parameters
self.rs_types = [("rs_vand"), ("rs_cauchy_orig")]
self.xor_types = [("flat_xor_4", 12, 6, 4),
("flat_xor_4", 10, 5, 4),
("flat_xor_3", 10, 5, 3)]
self.rs_types = [("jerasure_rs_vand"), ("jerasure_rs_cauchy")]
self.xor_types = [("flat_xor_hd_4", 12, 6, 4),
("flat_xor_hd_4", 10, 5, 4),
("flat_xor_hd_3", 10, 5, 3)]
# Input temp files for testing
self.sizes = ["101-K", "202-K", "303-K"]
@ -332,7 +332,7 @@ class TestPyECLib(unittest.TestCase):
#
# MDS codes need any k fragments
#
if ec_type in ["rs_vand", "rs_cauchy_orig"]:
if ec_type in ["jerasure_rs_vand", "jerasure_rs_cauchy"]:
expected_fragments = [i for i in range(num_data + num_parity)]
missing_fragments = []

View File

@ -44,8 +44,8 @@
# ======== swift.conf ============
# [storage-policy:10]
# type = erasure_coding
# name = ec_rs_cauchy_orig_12_2
# ec_type = rs_cauchy_orig
# name = ec_jerasure_rs_cauchy_12_2
# ec_type = jerasure_rs_cauchy
# ec_k = 12
# ec_m = 2
# ============================
@ -109,11 +109,11 @@ class ECScheme:
def __str__(self):
return "k=%d m=%d w=%d ec_type=%s" % (self.k, self.m, self.w, self.ec_type)
valid_flat_xor_3 = [(6, 6), (7, 6), (8, 6), (9, 6),
valid_flat_xor_hd_3 = [(6, 6), (7, 6), (8, 6), (9, 6),
(10, 6), (11, 6), (12, 6), (13, 6),
(14, 6), (15, 6)]
valid_flat_xor_4 = [(6, 6), (7, 6), (8, 6), (9, 6),
valid_flat_xor_hd_4 = [(6, 6), (7, 6), (8, 6), (9, 6),
(10, 6), (11, 6), (12, 6), (13, 6),
(14, 6), (15, 6), (16, 6), (17, 6),
(18, 6), (19, 6), (20, 6)]
@ -150,11 +150,11 @@ def get_viable_schemes(
#
for w in [8, 16, 32]:
list_of_schemes.append(
ECScheme(k, max_num_frags - k, w, "rs_vand_%d" % w))
ECScheme(k, max_num_frags - k, w, "jerasure_rs_vand_%d" % w))
for w in [4, 8]:
list_of_schemes.append(
ECScheme(k, max_num_frags - k, w, "rs_cauchy_orig_%d" % w))
ECScheme(k, max_num_frags - k, w, "jerasure_rs_cauchy_%d" % w))
#
# The XOR codes are a little tricker
@ -163,24 +163,24 @@ def get_viable_schemes(
# Constraint for 2: k <= (m choose 2)
# Constraint for 3: k <= (m choose 3)
#
# The '3' flat_xor_3 (and '4' in flat_xor_4) refers to the Hamming
# The '3' flat_xor_hd_3 (and '4' in flat_xor_hd_4) refers to the Hamming
# distance, which means the code guarantees the reconstruction of any
# 2 lost fragments (or 3 in the case of flat_xor_4).
# 2 lost fragments (or 3 in the case of flat_xor_hd_4).
#
# So, only consider the XOR code if the fault_tolerance matches and
# the additional constraint is met
#
if fault_tolerance == 2:
max_k = nCr(max_num_frags - k, 2)
if k <= max_k and (k, max_num_frags - k) in valid_flat_xor_3:
if k <= max_k and (k, max_num_frags - k) in valid_flat_xor_hd_3:
list_of_schemes.append(
ECScheme(k, max_num_frags - k, 0, "flat_xor_3"))
ECScheme(k, max_num_frags - k, 0, "flat_xor_hd_3"))
if fault_tolerance == 3:
max_k = nCr(max_num_frags - k, 3)
if k <= max_k and (k, max_num_frags - k) in valid_flat_xor_4:
if k <= max_k and (k, max_num_frags - k) in valid_flat_xor_hd_4:
list_of_schemes.append(
ECScheme(k, max_num_frags - k, 0, "flat_xor_4"))
ECScheme(k, max_num_frags - k, 0, "flat_xor_hd_4"))
return list_of_schemes