5766ff9893
1. Enhancements to support mixed read/write tests; 2. Add a name field for each storage test cases; 3. Support to run customized storage tests by supplying 'extra_opts'; 4. Fixed the get status API error when staging resources; Change-Id: Iecd4001268757ed24fedbc17813f0c0fb8110c68
401 lines
15 KiB
Python
401 lines
15 KiB
Python
# Copyright 2015 Cisco Systems, Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import time
|
|
|
|
from hdrh.histogram import HdrHistogram
|
|
import redis
|
|
|
|
# Define the version of the KloudBuster agent and VM image
|
|
#
|
|
# When VM is up running, the agent will send the READY message to the
|
|
# KloudBuster main program, along with its version. The main program
|
|
# will check the version to see whether the image meets the minimum
|
|
# requirements to run, and stopped with an error if not.
|
|
#
|
|
# This version must be incremented if the interface changes or if new features
|
|
# are added to the agent VM
|
|
__version__ = '5'
|
|
|
|
# TODO(Logging on Agent)
|
|
|
|
def get_image_name():
|
|
'''Return the versioned VM image name that corresponds to this
|
|
agent code. This string must match the way DIB names the kloudbuster image.
|
|
Return:
|
|
the versioned image name without the extension ('.qcow2' is implicit)
|
|
'''
|
|
return 'kloudbuster_v' + __version__
|
|
|
|
def get_image_version():
|
|
return __version__
|
|
|
|
def exec_command(cmd, cwd=None):
|
|
p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
(stdout, stderr) = p.communicate()
|
|
|
|
return p.returncode
|
|
|
|
class KB_Instance(object):
|
|
|
|
# Check whether the HTTP Service is up running
|
|
@staticmethod
|
|
def check_http_service(target_url):
|
|
cmd = 'while true; do\n'
|
|
cmd += 'curl --head %s --connect-timeout 2 --silent\n' % (target_url)
|
|
cmd += 'if [ $? -eq 0 ]; then break; fi\n'
|
|
cmd += 'done'
|
|
return cmd
|
|
|
|
# Add static route
|
|
@staticmethod
|
|
def add_static_route(network, next_hop_ip, if_name=None):
|
|
debug_msg = "Adding static route %s with next hop %s" % (network, next_hop_ip)
|
|
cmd = "sudo ip route add %s via %s" % (network, next_hop_ip)
|
|
if if_name:
|
|
debug_msg += " and %s" % if_name
|
|
cmd += " dev %s" % if_name
|
|
print debug_msg
|
|
return cmd
|
|
|
|
# Get static route
|
|
@staticmethod
|
|
def get_static_route(network, next_hop_ip=None, if_name=None):
|
|
cmd = "ip route show %s" % network
|
|
if next_hop_ip:
|
|
cmd += " via %s" % next_hop_ip
|
|
if if_name:
|
|
cmd += " dev %s" % if_name
|
|
return cmd
|
|
|
|
# Delete static route
|
|
@staticmethod
|
|
def delete_static_route(network, next_hop_ip=None, if_name=None):
|
|
debug_msg = "Deleting static route %s" % network
|
|
cmd = "sudo ip route del %s" % network
|
|
if next_hop_ip:
|
|
debug_msg = " with next hop %s" % next_hop_ip
|
|
cmd += " via %s" % next_hop_ip
|
|
if if_name:
|
|
if next_hop_ip:
|
|
debug_msg = " and %s" % if_name
|
|
else:
|
|
debug_msg = "with next hop %s" % if_name
|
|
cmd += " dev %s" % if_name
|
|
print debug_msg
|
|
return cmd
|
|
|
|
# Run the HTTP benchmarking tool
|
|
@staticmethod
|
|
def run_wrk2(dest_path, target_url, threads, connections,
|
|
rate_limit, duration, timeout, connection_type,
|
|
report_interval):
|
|
if not rate_limit:
|
|
rate_limit = 65535
|
|
|
|
cmd = '%s -t%d -c%d -R%d -d%ds -p%ds --timeout %ds -D2 -e %s' % \
|
|
(dest_path, threads, connections, rate_limit, duration,
|
|
report_interval, timeout, target_url)
|
|
return cmd
|
|
|
|
# Init volume
|
|
@staticmethod
|
|
def init_volume(size):
|
|
cmd = 'if [ ! -e /mnt/volume ]; then\n'
|
|
cmd += 'mkfs.xfs /dev/vdb && '
|
|
cmd += 'mkdir -p /mnt/volume && '
|
|
cmd += 'mount /dev/vdb /mnt/volume && '
|
|
cmd += 'dd if=/dev/zero of=/mnt/volume/kb_storage_test.bin bs=%s count=1\n' % size
|
|
cmd += 'fi'
|
|
return cmd
|
|
|
|
# Run fio
|
|
@staticmethod
|
|
def run_fio(dest_path, name, description, mode, block_size, iodepth, runtime,
|
|
rate_iops=None, rate=None, rwmixread=None, status_interval=None, extra_opts=None):
|
|
fixed_opt = '--thread --ioengine=libaio --output-format=json+ --direct=1 '
|
|
fixed_opt += '--filename=/mnt/volume/kb_storage_test.bin '
|
|
required_opt = '--name=%s --rw=%s --bs=%s --iodepth=%s --runtime=%s ' %\
|
|
(name, mode, block_size, iodepth, runtime)
|
|
optional_opt = ''
|
|
optional_opt += '--rate_iops=%s ' % rate_iops if rate_iops else ''
|
|
optional_opt += '--rate=%s ' % rate if rate else ''
|
|
optional_opt += '--rwmixread=%s ' % rwmixread if rwmixread else ''
|
|
optional_opt += '--status-interval=%s ' % status_interval if status_interval else ''
|
|
optional_opt += extra_opts if extra_opts else ''
|
|
cmd = '%s %s %s %s' % (dest_path, fixed_opt, required_opt, optional_opt)
|
|
return cmd
|
|
|
|
|
|
class KBA_Client(object):
|
|
|
|
def __init__(self, user_data):
|
|
host = user_data['redis_server']
|
|
port = user_data['redis_server_port']
|
|
self.user_data = user_data
|
|
self.redis_obj = redis.StrictRedis(host=host, port=port)
|
|
self.pubsub = self.redis_obj.pubsub(ignore_subscribe_messages=True)
|
|
self.hello_thread = None
|
|
self.stop_hello = threading.Event()
|
|
self.vm_name = user_data['vm_name']
|
|
self.orches_chan_name = "kloudbuster_orches"
|
|
self.report_chan_name = "kloudbuster_report"
|
|
self.last_cmd = None
|
|
self.last_process = None
|
|
|
|
def setup_channels(self):
|
|
# Check for connections to redis server
|
|
while (True):
|
|
try:
|
|
self.redis_obj.get("test")
|
|
except (redis.exceptions.ConnectionError):
|
|
time.sleep(1)
|
|
continue
|
|
break
|
|
|
|
# Subscribe to orchestration channel
|
|
self.pubsub.subscribe(self.orches_chan_name)
|
|
|
|
def report(self, cmd, client_type, data):
|
|
message = {'cmd': cmd, 'sender-id': self.vm_name,
|
|
'client-type': client_type, 'data': data}
|
|
self.redis_obj.publish(self.report_chan_name, message)
|
|
|
|
def send_hello(self):
|
|
# Sending "hello" message to master node every 2 seconds
|
|
while not self.stop_hello.is_set():
|
|
self.report('READY', None, __version__)
|
|
time.sleep(2)
|
|
|
|
def post_processing(self, p_output):
|
|
# If the result is coming from storage testing tool (FIO), compress
|
|
# the buckets from the output using HdrHistogram, and send it back
|
|
# to kb-master node.
|
|
if self.__class__.__name__ == 'KBA_Storage_Client':
|
|
return self.encode_bins(p_output)
|
|
else:
|
|
return p_output
|
|
|
|
def exec_command(self, cmd):
|
|
# Execute the command, and returns the outputs
|
|
cmds = ['bash', '-c']
|
|
cmds.append(cmd)
|
|
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
self.last_process = p
|
|
(stdout, stderr) = p.communicate()
|
|
|
|
return (p.returncode, stdout, stderr)
|
|
|
|
def exec_command_report(self, cmd):
|
|
# Execute the command, reporting periodically, and returns the outputs
|
|
cmd_res_dict = None
|
|
cmds = ['bash', '-c']
|
|
cmds.append(cmd)
|
|
p_output = ''
|
|
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
self.last_process = p
|
|
|
|
lines_iterator = iter(p.stdout.readline, b"")
|
|
for line in lines_iterator:
|
|
# One exception, if this is the very last report, we will send it
|
|
# through "DONE" command, not "REPORT". So what's happening here
|
|
# is to determine whether this is the last report.
|
|
if cmd_res_dict:
|
|
self.report('REPORT', 'http', cmd_res_dict)
|
|
cmd_res_dict = None
|
|
p_output = line
|
|
else:
|
|
p_output += line
|
|
if line.rstrip() == "}":
|
|
p_output = self.post_processing(p_output)
|
|
cmd_res_dict = dict(zip(("status", "stdout", "stderr"), (0, p_output, '')))
|
|
|
|
stderr = p.communicate()[1]
|
|
return (p.returncode, p_output, stderr)
|
|
|
|
def work(self):
|
|
for item in self.pubsub.listen():
|
|
if item['type'] != 'message':
|
|
continue
|
|
# Convert the string representation of dict to real dict obj
|
|
message = eval(item['data'])
|
|
if message['cmd'] == 'ABORT':
|
|
try:
|
|
self.last_process.kill()
|
|
except Exception:
|
|
pass
|
|
else:
|
|
work_thread = threading.Thread(target=agent.process_cmd, args=[message])
|
|
work_thread.daemon = True
|
|
work_thread.start()
|
|
|
|
def process_cmd(self, message):
|
|
if message['cmd'] == 'ACK':
|
|
# When 'ACK' is received, means the master node
|
|
# acknowledged the current VM. So stopped sending more
|
|
# "hello" packet to the master node.
|
|
# Unfortunately, there is no thread.stop() in Python 2.x
|
|
self.stop_hello.set()
|
|
elif message['cmd'] == 'EXEC':
|
|
self.last_cmd = ""
|
|
arange = message['data']['active_range']
|
|
my_id = int(self.vm_name[self.vm_name.rindex('I') + 1:])
|
|
if (not arange) or (my_id >= arange[0] and my_id <= arange[1]):
|
|
try:
|
|
par = message['data'].get('parameter', '')
|
|
str_par = 'par' if par else ''
|
|
cmd_res_tuple = eval('self.exec_%s(%s)' % (message['data']['cmd'], str_par))
|
|
cmd_res_dict = dict(zip(("status", "stdout", "stderr"), cmd_res_tuple))
|
|
except Exception as exc:
|
|
cmd_res_dict = {
|
|
"status": 1,
|
|
"stdout": self.last_cmd,
|
|
"stderr": str(exc)
|
|
}
|
|
self.report('DONE', message['client-type'], cmd_res_dict)
|
|
else:
|
|
# Unexpected
|
|
print 'ERROR: Unexpected command received!'
|
|
|
|
class KBA_HTTP_Client(KBA_Client):
|
|
|
|
def exec_setup_static_route(self):
|
|
self.last_cmd = KB_Instance.get_static_route(self.user_data['target_subnet_ip'])
|
|
result = self.exec_command(self.last_cmd)
|
|
if (self.user_data['target_subnet_ip'] not in result[1]):
|
|
self.last_cmd = KB_Instance.add_static_route(
|
|
self.user_data['target_subnet_ip'],
|
|
self.user_data['target_shared_interface_ip'])
|
|
return self.exec_command(self.last_cmd)
|
|
else:
|
|
return (0, '', '')
|
|
|
|
def exec_check_http_service(self):
|
|
self.last_cmd = KB_Instance.check_http_service(self.user_data['target_url'])
|
|
return self.exec_command(self.last_cmd)
|
|
|
|
def exec_run_http_test(self, http_tool_configs):
|
|
self.last_cmd = KB_Instance.run_wrk2(
|
|
dest_path='/usr/local/bin/wrk2',
|
|
target_url=self.user_data['target_url'],
|
|
**http_tool_configs)
|
|
return self.exec_command_report(self.last_cmd)
|
|
|
|
class KBA_Storage_Client(KBA_Client):
|
|
|
|
def encode_bins(self, p_output):
|
|
p_output = json.loads(p_output)
|
|
test_list = ['read', 'write', 'trim']
|
|
|
|
for test in test_list:
|
|
histogram = HdrHistogram(1, 5 * 3600 * 1000, 3)
|
|
clat = p_output['jobs'][0][test]['clat']['bins']
|
|
total_buckets = clat['FIO_IO_U_PLAT_NR']
|
|
grp_msb_bits = clat['FIO_IO_U_PLAT_BITS']
|
|
buckets_per_grp = clat['FIO_IO_U_PLAT_VAL']
|
|
|
|
for bucket in xrange(total_buckets):
|
|
if clat[str(bucket)]:
|
|
grp = bucket / buckets_per_grp
|
|
subbucket = bucket % buckets_per_grp
|
|
if grp == 0:
|
|
val = subbucket - 1
|
|
else:
|
|
base = 2 ** (grp_msb_bits + grp - 1)
|
|
val = int(base + (base / buckets_per_grp) * (subbucket - 0.5))
|
|
histogram.record_value(val, clat[str(bucket)])
|
|
|
|
p_output['jobs'][0][test]['clat']['hist'] = histogram.encode()
|
|
p_output['jobs'][0][test]['clat'].pop('bins')
|
|
p_output['jobs'][0][test]['clat'].pop('percentile')
|
|
|
|
return json.dumps(p_output)
|
|
|
|
def exec_init_volume(self, size):
|
|
self.last_cmd = KB_Instance.init_volume(size)
|
|
return self.exec_command(self.last_cmd)
|
|
|
|
def exec_run_storage_test(self, fio_configs):
|
|
self.last_cmd = KB_Instance.run_fio(
|
|
dest_path='/usr/local/bin/fio',
|
|
name='kb_storage_test',
|
|
**fio_configs)
|
|
return self.exec_command_report(self.last_cmd)
|
|
|
|
|
|
class KBA_Server(object):
|
|
|
|
def __init__(self, user_data):
|
|
self.user_data = user_data
|
|
|
|
def config_nginx_server(self):
|
|
# Generate the HTML file with specified size
|
|
html_size = self.user_data['http_server_configs']['html_size']
|
|
cmd_str = 'dd if=/dev/zero of=/data/www/index.html bs=%s count=1' % html_size
|
|
cmd = cmd_str.split()
|
|
return False if exec_command(cmd) else True
|
|
|
|
def start_nginx_server(self):
|
|
cmd = ['sudo', 'service', 'nginx', 'start']
|
|
return exec_command(cmd)
|
|
|
|
# def start_nuttcp_server(self):
|
|
# cmd = ['/usr/bin/nuttcp', '-P5002', '-S', '--single-threaded']
|
|
# return exec_command(cmd)
|
|
|
|
class KBA_Proxy(object):
|
|
|
|
def start_redis_server(self):
|
|
cmd = ['sudo', 'service', 'redis-server', 'start']
|
|
return exec_command(cmd)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
with open('user-data', 'r') as f:
|
|
user_data = dict(eval(f.read()))
|
|
except Exception as e:
|
|
# KloudBuster starts without user-data
|
|
cwd = 'kloudbuster/kb_server'
|
|
cmd = ['python', 'setup.py', 'develop']
|
|
rc = exec_command(cmd, cwd=cwd)
|
|
if not rc:
|
|
cmd = ['/usr/local/bin/pecan', 'serve', 'config.py']
|
|
sys.exit(exec_command(cmd, cwd=cwd))
|
|
|
|
if user_data.get('role') == 'KB-PROXY':
|
|
agent = KBA_Proxy()
|
|
sys.exit(agent.start_redis_server())
|
|
if user_data.get('role') == 'Server':
|
|
agent = KBA_Server(user_data)
|
|
if agent.config_nginx_server():
|
|
sys.exit(agent.start_nginx_server())
|
|
else:
|
|
sys.exit(1)
|
|
elif user_data.get('role')[-6:] == 'Client':
|
|
agent = KBA_HTTP_Client(user_data) if user_data['role'][:-7] == 'HTTP'\
|
|
else KBA_Storage_Client(user_data)
|
|
agent.setup_channels()
|
|
agent.hello_thread = threading.Thread(target=agent.send_hello)
|
|
agent.hello_thread.daemon = True
|
|
agent.hello_thread.start()
|
|
agent.work()
|
|
else:
|
|
sys.exit(1)
|