pyeclib/tools/pyeclib_conf_tool.py
Pete Zaitcev 0155a20b64 Fix a few print statements for py3
Fedora packaging mandates support for py3 nowadays, so I have to
fix stuff like this. Interestingly enough, we don't seem to need
from future import __print_statement__ here.
2015-04-02 21:47:18 -06:00

267 lines
8.0 KiB
Python

# Copyright (c) 2013, Kevin Greenan (kmgreen2@gmail.com)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution. THIS SOFTWARE IS
# PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
# NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# PyEClib Companion tool
# Goal: When defining an EC pool, help cluster admin make an informed choice
# between available EC implementations. Generate sample swift.conf + swift-
# ring-builder hints.
#
# Suggested features:
#
# - List the "EC types" supported - EC algorithms
# - List implementations of each EC type available on the platform
# (dumb-software-only, software with SIMD acceleration,
# specialized hardware, etc).
# - Benchmark each algorithm with possible implementation and display
# performance numbers.
# - Generate sample EC policy entry (for inclusion in swift.conf) for the
# best performing algorithm + implementation. (And optionally provide swift-
# ring-builder hints).
#
# Suggested EC policy entry format:
#
# ======== swift.conf ============
# [storage-policy:10]
# type = erasure_coding
# name = ec_jerasure_rs_cauchy_12_2
# ec_type = jerasure_rs_cauchy
# ec_k = 12
# ec_m = 2
# ============================
#
# (ec_type values are one of those available within PyEClib)
#
# User input: Num data, num parity, average file size
# Output: Ordered list of options and their corresponding conf entries
# (limit 10)
#
from pyeclib.ec_iface import ECDriver
import random
import string
import sys
import argparse
import time
import math
class Timer:
def __init__(self):
self.start_time = 0
self.end_time = 0
def reset(self):
self.start_time = 0
self.end_time = 0
def start(self):
self.start_time = time.time()
def stop(self):
self.end_time = time.time()
def curr_delta(self):
return self.end_time - self.start_time
def stop_and_return(self):
self.end_time = time.time()
return self.curr_delta()
def nCr(n, r):
f = math.factorial
return f(n) / f(r) / f(n - r)
class ECScheme:
def __init__(self, k, m, ec_type):
self.k = k
self.m = m
self.ec_type = ec_type
def __str__(self):
return "k=%d m=%d ec_type=%s" % (self.k, self.m, self.ec_type)
valid_flat_xor_hd_3 = [(6, 6), (7, 6), (8, 6), (9, 6),
(10, 6), (11, 6), (12, 6), (13, 6),
(14, 6), (15, 6)]
valid_flat_xor_hd_4 = [(6, 6), (7, 6), (8, 6), (9, 6),
(10, 6), (11, 6), (12, 6), (13, 6),
(14, 6), (15, 6), (16, 6), (17, 6),
(18, 6), (19, 6), (20, 6)]
def get_viable_schemes(
max_num_frags, minimum_rate, avg_stripe_size, fault_tolerance):
list_of_schemes = []
#
# Get min_k from (minimum_rate * max_num_frags)
#
min_k = int(math.ceil(minimum_rate * max_num_frags))
#
# Get min_m from the fault tolerance
#
min_m = fault_tolerance
#
# Is not information theoretically possible
#
if (min_k + min_m) > max_num_frags:
return list_of_schemes
#
# Iterate over EC(k, max_num_frags-k) k \in [min_k, n-min_m]
#
for k in range(min_k, max_num_frags - min_m + 1):
list_of_schemes.append(
ECScheme(k, max_num_frags - k, "jerasure_rs_vand"))
list_of_schemes.append(
ECScheme(k, max_num_frags - k, "jerasure_rs_cauchy"))
#
# The XOR codes are a little tricker
# (only check if fault_tolerance = 2 or 3)
#
# Constraint for 2: k <= (m choose 2)
# Constraint for 3: k <= (m choose 3)
#
# The '3' flat_xor_hd_3 (and '4' in flat_xor_hd_4) refers to the Hamming
# distance, which means the code guarantees the reconstruction of any
# 2 lost fragments (or 3 in the case of flat_xor_hd_4).
#
# So, only consider the XOR code if the fault_tolerance matches and
# the additional constraint is met
#
if fault_tolerance == 2:
max_k = nCr(max_num_frags - k, 2)
if k <= max_k and (k, max_num_frags - k) in valid_flat_xor_hd_3:
list_of_schemes.append(
ECScheme(k, max_num_frags - k, "flat_xor_hd_3"))
if fault_tolerance == 3:
max_k = nCr(max_num_frags - k, 3)
if k <= max_k and (k, max_num_frags - k) in valid_flat_xor_hd_4:
list_of_schemes.append(
ECScheme(k, max_num_frags - k, "flat_xor_hd_4"))
return list_of_schemes
parser = argparse.ArgumentParser(
description='PyECLib tool to evaluate viable EC options, benchmark them '
'and report results with the appropriate conf entries.')
parser.add_argument(
'-n',
type=int,
help='max number of fragments',
required=True)
parser.add_argument('-f', type=int, help='fault tolerance', required=True)
parser.add_argument(
'-r',
type=float,
help='minimum coding rate (num_data / num_data+num_parity)',
required=True)
parser.add_argument('-s', type=int, help='average stripe size', required=True)
parser.add_argument(
'-l',
type=int,
help='set limit on number of entries returned (default = 10)',
default=10,
)
args = parser.parse_args(sys.argv[1:])
MB = 1024 * 1024
# Generate a buffer of size 's'
if args.s > 10 * MB:
print("s must be smaller than 10 MB.")
sys.exit(1)
# Instantiate the timer
timer = Timer()
return_limit = args.l
schemes = get_viable_schemes(args.n, args.r, args.s, args.f)
# Results will be List[(ec_type, throughput)]
results = []
# Num iterations
num_iterations = 10
for scheme in schemes:
print(scheme)
# Generate a new string for each test
file_str = ''.join(
random.choice(
string.ascii_uppercase + string.digits) for x in range(args.s))
try:
ec_driver = ECDriver(k=scheme.k, m=scheme.m, ec_type=scheme.ec_type)
except Exception as e:
print("Scheme %s is not defined (%s)." % (scheme, e))
continue
timer.start()
for i in range(num_iterations):
ec_driver.encode(file_str)
duration = timer.stop_and_return()
results.append((scheme, duration))
timer.reset()
print(results)
results.sort(lambda x, y: (int)((1000 * x[1]) - (1000 * y[1])))
for i in range(len(results)):
if i > return_limit:
break
print("\n\nPerf Rank #%d:" % i)
print(" ======== To Use this Policy, Copy and Paste Text (not including "
"this header and footer) to Swift Conf ========")
print(" type = erasure_coding")
print(" name = %s_%d_%d" % (results[i][0].ec_type,
results[i][0].k, results[i][0].m))
print(" ec_type = %s" % results[i][0].ec_type)
print(" ec_k = %s" % results[i][0].k)
print(" ec_m = %s" % results[i][0].m)
print(" ================================================================"
"==============================================")
results[i]