0155a20b64
Fedora packaging mandates support for py3 nowadays, so I have to fix stuff like this. Interestingly enough, we don't seem to need from future import __print_statement__ here.
267 lines
8.0 KiB
Python
267 lines
8.0 KiB
Python
# Copyright (c) 2013, Kevin Greenan (kmgreen2@gmail.com)
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# Redistributions of source code must retain the above copyright notice, this
|
|
# list of conditions and the following disclaimer.
|
|
#
|
|
# Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution. THIS SOFTWARE IS
|
|
# PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
|
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
|
|
# NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
# PyEClib Companion tool
|
|
# Goal: When defining an EC pool, help cluster admin make an informed choice
|
|
# between available EC implementations. Generate sample swift.conf + swift-
|
|
# ring-builder hints.
|
|
#
|
|
# Suggested features:
|
|
#
|
|
# - List the "EC types" supported - EC algorithms
|
|
# - List implementations of each EC type available on the platform
|
|
# (dumb-software-only, software with SIMD acceleration,
|
|
# specialized hardware, etc).
|
|
# - Benchmark each algorithm with possible implementation and display
|
|
# performance numbers.
|
|
# - Generate sample EC policy entry (for inclusion in swift.conf) for the
|
|
# best performing algorithm + implementation. (And optionally provide swift-
|
|
# ring-builder hints).
|
|
#
|
|
# Suggested EC policy entry format:
|
|
#
|
|
# ======== swift.conf ============
|
|
# [storage-policy:10]
|
|
# type = erasure_coding
|
|
# name = ec_jerasure_rs_cauchy_12_2
|
|
# ec_type = jerasure_rs_cauchy
|
|
# ec_k = 12
|
|
# ec_m = 2
|
|
# ============================
|
|
#
|
|
# (ec_type values are one of those available within PyEClib)
|
|
|
|
#
|
|
# User input: Num data, num parity, average file size
|
|
# Output: Ordered list of options and their corresponding conf entries
|
|
# (limit 10)
|
|
#
|
|
|
|
from pyeclib.ec_iface import ECDriver
|
|
import random
|
|
import string
|
|
import sys
|
|
import argparse
|
|
import time
|
|
import math
|
|
|
|
|
|
class Timer:
|
|
|
|
def __init__(self):
|
|
self.start_time = 0
|
|
self.end_time = 0
|
|
|
|
def reset(self):
|
|
self.start_time = 0
|
|
self.end_time = 0
|
|
|
|
def start(self):
|
|
self.start_time = time.time()
|
|
|
|
def stop(self):
|
|
self.end_time = time.time()
|
|
|
|
def curr_delta(self):
|
|
return self.end_time - self.start_time
|
|
|
|
def stop_and_return(self):
|
|
self.end_time = time.time()
|
|
return self.curr_delta()
|
|
|
|
|
|
def nCr(n, r):
|
|
f = math.factorial
|
|
return f(n) / f(r) / f(n - r)
|
|
|
|
|
|
class ECScheme:
|
|
|
|
def __init__(self, k, m, ec_type):
|
|
self.k = k
|
|
self.m = m
|
|
self.ec_type = ec_type
|
|
|
|
def __str__(self):
|
|
return "k=%d m=%d ec_type=%s" % (self.k, self.m, self.ec_type)
|
|
|
|
valid_flat_xor_hd_3 = [(6, 6), (7, 6), (8, 6), (9, 6),
|
|
(10, 6), (11, 6), (12, 6), (13, 6),
|
|
(14, 6), (15, 6)]
|
|
|
|
valid_flat_xor_hd_4 = [(6, 6), (7, 6), (8, 6), (9, 6),
|
|
(10, 6), (11, 6), (12, 6), (13, 6),
|
|
(14, 6), (15, 6), (16, 6), (17, 6),
|
|
(18, 6), (19, 6), (20, 6)]
|
|
|
|
|
|
def get_viable_schemes(
|
|
max_num_frags, minimum_rate, avg_stripe_size, fault_tolerance):
|
|
|
|
list_of_schemes = []
|
|
|
|
#
|
|
# Get min_k from (minimum_rate * max_num_frags)
|
|
#
|
|
min_k = int(math.ceil(minimum_rate * max_num_frags))
|
|
|
|
#
|
|
# Get min_m from the fault tolerance
|
|
#
|
|
min_m = fault_tolerance
|
|
|
|
#
|
|
# Is not information theoretically possible
|
|
#
|
|
if (min_k + min_m) > max_num_frags:
|
|
return list_of_schemes
|
|
|
|
#
|
|
# Iterate over EC(k, max_num_frags-k) k \in [min_k, n-min_m]
|
|
#
|
|
for k in range(min_k, max_num_frags - min_m + 1):
|
|
list_of_schemes.append(
|
|
ECScheme(k, max_num_frags - k, "jerasure_rs_vand"))
|
|
|
|
list_of_schemes.append(
|
|
ECScheme(k, max_num_frags - k, "jerasure_rs_cauchy"))
|
|
|
|
#
|
|
# The XOR codes are a little tricker
|
|
# (only check if fault_tolerance = 2 or 3)
|
|
#
|
|
# Constraint for 2: k <= (m choose 2)
|
|
# Constraint for 3: k <= (m choose 3)
|
|
#
|
|
# The '3' flat_xor_hd_3 (and '4' in flat_xor_hd_4) refers to the Hamming
|
|
# distance, which means the code guarantees the reconstruction of any
|
|
# 2 lost fragments (or 3 in the case of flat_xor_hd_4).
|
|
#
|
|
# So, only consider the XOR code if the fault_tolerance matches and
|
|
# the additional constraint is met
|
|
#
|
|
if fault_tolerance == 2:
|
|
max_k = nCr(max_num_frags - k, 2)
|
|
if k <= max_k and (k, max_num_frags - k) in valid_flat_xor_hd_3:
|
|
list_of_schemes.append(
|
|
ECScheme(k, max_num_frags - k, "flat_xor_hd_3"))
|
|
|
|
if fault_tolerance == 3:
|
|
max_k = nCr(max_num_frags - k, 3)
|
|
if k <= max_k and (k, max_num_frags - k) in valid_flat_xor_hd_4:
|
|
list_of_schemes.append(
|
|
ECScheme(k, max_num_frags - k, "flat_xor_hd_4"))
|
|
|
|
return list_of_schemes
|
|
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='PyECLib tool to evaluate viable EC options, benchmark them '
|
|
'and report results with the appropriate conf entries.')
|
|
parser.add_argument(
|
|
'-n',
|
|
type=int,
|
|
help='max number of fragments',
|
|
required=True)
|
|
parser.add_argument('-f', type=int, help='fault tolerance', required=True)
|
|
parser.add_argument(
|
|
'-r',
|
|
type=float,
|
|
help='minimum coding rate (num_data / num_data+num_parity)',
|
|
required=True)
|
|
parser.add_argument('-s', type=int, help='average stripe size', required=True)
|
|
parser.add_argument(
|
|
'-l',
|
|
type=int,
|
|
help='set limit on number of entries returned (default = 10)',
|
|
default=10,
|
|
)
|
|
|
|
args = parser.parse_args(sys.argv[1:])
|
|
|
|
MB = 1024 * 1024
|
|
|
|
# Generate a buffer of size 's'
|
|
if args.s > 10 * MB:
|
|
print("s must be smaller than 10 MB.")
|
|
sys.exit(1)
|
|
|
|
# Instantiate the timer
|
|
timer = Timer()
|
|
|
|
return_limit = args.l
|
|
|
|
schemes = get_viable_schemes(args.n, args.r, args.s, args.f)
|
|
|
|
# Results will be List[(ec_type, throughput)]
|
|
results = []
|
|
|
|
# Num iterations
|
|
num_iterations = 10
|
|
|
|
for scheme in schemes:
|
|
print(scheme)
|
|
|
|
# Generate a new string for each test
|
|
file_str = ''.join(
|
|
random.choice(
|
|
string.ascii_uppercase + string.digits) for x in range(args.s))
|
|
|
|
try:
|
|
ec_driver = ECDriver(k=scheme.k, m=scheme.m, ec_type=scheme.ec_type)
|
|
except Exception as e:
|
|
print("Scheme %s is not defined (%s)." % (scheme, e))
|
|
continue
|
|
|
|
timer.start()
|
|
|
|
for i in range(num_iterations):
|
|
ec_driver.encode(file_str)
|
|
|
|
duration = timer.stop_and_return()
|
|
|
|
results.append((scheme, duration))
|
|
|
|
timer.reset()
|
|
|
|
print(results)
|
|
results.sort(lambda x, y: (int)((1000 * x[1]) - (1000 * y[1])))
|
|
|
|
for i in range(len(results)):
|
|
if i > return_limit:
|
|
break
|
|
|
|
print("\n\nPerf Rank #%d:" % i)
|
|
print(" ======== To Use this Policy, Copy and Paste Text (not including "
|
|
"this header and footer) to Swift Conf ========")
|
|
print(" type = erasure_coding")
|
|
print(" name = %s_%d_%d" % (results[i][0].ec_type,
|
|
results[i][0].k, results[i][0].m))
|
|
print(" ec_type = %s" % results[i][0].ec_type)
|
|
print(" ec_k = %s" % results[i][0].k)
|
|
print(" ec_m = %s" % results[i][0].m)
|
|
print(" ================================================================"
|
|
"==============================================")
|
|
results[i]
|