Xen: Add race-condition troubleshooting script.
This stress-test runs builds and migrations concurrently making it easier to uncovers certain types of race-conditions in the virt-layer code. Change-Id: I89c382b85aa6d0eb3dc957803f3ea34e3a36e9d4
This commit is contained in:
parent
9468508efe
commit
196a4892fc
172
tools/xenserver/stress_test.py
Normal file
172
tools/xenserver/stress_test.py
Normal file
@ -0,0 +1,172 @@
|
||||
"""
|
||||
This script concurrently builds and migrates instances. This can be useful when
|
||||
troubleshooting race-conditions in virt-layer code.
|
||||
|
||||
Expects:
|
||||
|
||||
novarc to be sourced in the environment
|
||||
|
||||
Helper Script for Xen Dom0:
|
||||
|
||||
# cat /tmp/destroy_cache_vdis
|
||||
#!/bin/bash
|
||||
xe vdi-list | grep "Glance Image" -C1 | grep "^uuid" | awk '{print $5}' |
|
||||
xargs -n1 -I{} xe vdi-destroy uuid={}
|
||||
"""
|
||||
import argparse
|
||||
import contextlib
|
||||
import multiprocessing
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
DOM0_CLEANUP_SCRIPT = "/tmp/destroy_cache_vdis"
|
||||
|
||||
|
||||
def run(cmd):
|
||||
ret = subprocess.call(cmd, shell=True)
|
||||
if ret != 0:
|
||||
print >> sys.stderr, "Command exited non-zero: %s" % cmd
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def server_built(server_name, image_name, flavor=1, cleanup=True):
|
||||
run("nova boot --image=%(image_name)s --flavor=%(flavor)s"
|
||||
" --poll %(server_name)s" % locals())
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if cleanup:
|
||||
run("nova delete %(server_name)s" % locals())
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def snapshot_taken(server_name, snapshot_name, cleanup=True):
|
||||
run("nova image-create %(server_name)s %(snapshot_name)s"
|
||||
" --poll" % locals())
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if cleanup:
|
||||
run("nova image-delete %(snapshot_name)s" % locals())
|
||||
|
||||
|
||||
def migrate_server(server_name):
|
||||
run("nova migrate %(server_name)s --poll" % locals())
|
||||
|
||||
cmd = "nova list | grep %(server_name)s | awk '{print $6}'" % locals()
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
||||
stdout, stderr = proc.communicate()
|
||||
status = stdout.strip()
|
||||
if status.upper() != 'VERIFY_RESIZE':
|
||||
print >> sys.stderr, "Server %(server_name)s failed to rebuild"\
|
||||
% locals()
|
||||
return False
|
||||
|
||||
# Confirm the resize
|
||||
run("nova resize-confirm %(server_name)s" % locals())
|
||||
return True
|
||||
|
||||
|
||||
def test_migrate(context):
|
||||
count, args = context
|
||||
server_name = "server%d" % count
|
||||
cleanup = args.cleanup
|
||||
with server_built(server_name, args.image, cleanup=cleanup):
|
||||
# Migrate A -> B
|
||||
result = migrate_server(server_name)
|
||||
if not result:
|
||||
return False
|
||||
|
||||
# Migrate B -> A
|
||||
return migrate_server(server_name)
|
||||
|
||||
|
||||
def rebuild_server(server_name, snapshot_name):
|
||||
run("nova rebuild %(server_name)s %(snapshot_name)s --poll" % locals())
|
||||
|
||||
cmd = "nova list | grep %(server_name)s | awk '{print $6}'" % locals()
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
||||
stdout, stderr = proc.communicate()
|
||||
status = stdout.strip()
|
||||
if status != 'ACTIVE':
|
||||
print >> sys.stderr, "Server %(server_name)s failed to rebuild"\
|
||||
% locals()
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def test_rebuild(context):
|
||||
count, args = context
|
||||
server_name = "server%d" % count
|
||||
snapshot_name = "snap%d" % count
|
||||
cleanup = args.cleanup
|
||||
with server_built(server_name, args.image, cleanup=cleanup):
|
||||
with snapshot_taken(server_name, snapshot_name, cleanup=cleanup):
|
||||
return rebuild_server(server_name, snapshot_name)
|
||||
|
||||
|
||||
def _parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Test Nova for Race Conditions.')
|
||||
|
||||
parser.add_argument('tests', metavar='TESTS', type=str, nargs='*',
|
||||
default=['rebuild', 'migrate'],
|
||||
help='tests to run: [rebuilt|migrate]')
|
||||
|
||||
parser.add_argument('-i', '--image', help="image to build from",
|
||||
required=True)
|
||||
parser.add_argument('-n', '--num-runs', type=int, help="number of runs",
|
||||
default=1)
|
||||
parser.add_argument('-c', '--concurrency', type=int, default=5,
|
||||
help="number of concurrent processes")
|
||||
parser.add_argument('--no-cleanup', action='store_false', dest="cleanup",
|
||||
default=True)
|
||||
parser.add_argument('-d', '--dom0-ips',
|
||||
help="IP of dom0's to run cleanup script")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
dom0_cleanup_script = DOM0_CLEANUP_SCRIPT
|
||||
args = _parse_args()
|
||||
|
||||
if args.dom0_ips:
|
||||
dom0_ips = args.dom0_ips.split(',')
|
||||
else:
|
||||
dom0_ips = []
|
||||
|
||||
start_time = time.time()
|
||||
batch_size = min(args.num_runs, args.concurrency)
|
||||
pool = multiprocessing.Pool(processes=args.concurrency)
|
||||
|
||||
results = []
|
||||
for test in args.tests:
|
||||
test_func = globals().get("test_%s" % test)
|
||||
if not test_func:
|
||||
print >> sys.stderr, "test '%s' not found" % test
|
||||
sys.exit(1)
|
||||
|
||||
contexts = [(x, args) for x in range(args.num_runs)]
|
||||
|
||||
try:
|
||||
results += pool.map(test_func, contexts)
|
||||
finally:
|
||||
if args.cleanup:
|
||||
for dom0_ip in dom0_ips:
|
||||
run("ssh root@%(dom0_ip)s %(dom0_cleanup_script)s"
|
||||
% locals())
|
||||
|
||||
success = all(results)
|
||||
result = "SUCCESS" if success else "FAILED"
|
||||
|
||||
duration = time.time() - start_time
|
||||
print "%s, finished in %.2f secs" % (result, duration)
|
||||
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user