196a4892fc
This stress-test runs builds and migrations concurrently making it easier to uncovers certain types of race-conditions in the virt-layer code. Change-Id: I89c382b85aa6d0eb3dc957803f3ea34e3a36e9d4
173 lines
5.0 KiB
Python
173 lines
5.0 KiB
Python
"""
|
|
This script concurrently builds and migrates instances. This can be useful when
|
|
troubleshooting race-conditions in virt-layer code.
|
|
|
|
Expects:
|
|
|
|
novarc to be sourced in the environment
|
|
|
|
Helper Script for Xen Dom0:
|
|
|
|
# cat /tmp/destroy_cache_vdis
|
|
#!/bin/bash
|
|
xe vdi-list | grep "Glance Image" -C1 | grep "^uuid" | awk '{print $5}' |
|
|
xargs -n1 -I{} xe vdi-destroy uuid={}
|
|
"""
|
|
import argparse
|
|
import contextlib
|
|
import multiprocessing
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
DOM0_CLEANUP_SCRIPT = "/tmp/destroy_cache_vdis"
|
|
|
|
|
|
def run(cmd):
|
|
ret = subprocess.call(cmd, shell=True)
|
|
if ret != 0:
|
|
print >> sys.stderr, "Command exited non-zero: %s" % cmd
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def server_built(server_name, image_name, flavor=1, cleanup=True):
|
|
run("nova boot --image=%(image_name)s --flavor=%(flavor)s"
|
|
" --poll %(server_name)s" % locals())
|
|
try:
|
|
yield
|
|
finally:
|
|
if cleanup:
|
|
run("nova delete %(server_name)s" % locals())
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def snapshot_taken(server_name, snapshot_name, cleanup=True):
|
|
run("nova image-create %(server_name)s %(snapshot_name)s"
|
|
" --poll" % locals())
|
|
try:
|
|
yield
|
|
finally:
|
|
if cleanup:
|
|
run("nova image-delete %(snapshot_name)s" % locals())
|
|
|
|
|
|
def migrate_server(server_name):
|
|
run("nova migrate %(server_name)s --poll" % locals())
|
|
|
|
cmd = "nova list | grep %(server_name)s | awk '{print $6}'" % locals()
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
|
stdout, stderr = proc.communicate()
|
|
status = stdout.strip()
|
|
if status.upper() != 'VERIFY_RESIZE':
|
|
print >> sys.stderr, "Server %(server_name)s failed to rebuild"\
|
|
% locals()
|
|
return False
|
|
|
|
# Confirm the resize
|
|
run("nova resize-confirm %(server_name)s" % locals())
|
|
return True
|
|
|
|
|
|
def test_migrate(context):
|
|
count, args = context
|
|
server_name = "server%d" % count
|
|
cleanup = args.cleanup
|
|
with server_built(server_name, args.image, cleanup=cleanup):
|
|
# Migrate A -> B
|
|
result = migrate_server(server_name)
|
|
if not result:
|
|
return False
|
|
|
|
# Migrate B -> A
|
|
return migrate_server(server_name)
|
|
|
|
|
|
def rebuild_server(server_name, snapshot_name):
|
|
run("nova rebuild %(server_name)s %(snapshot_name)s --poll" % locals())
|
|
|
|
cmd = "nova list | grep %(server_name)s | awk '{print $6}'" % locals()
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
|
stdout, stderr = proc.communicate()
|
|
status = stdout.strip()
|
|
if status != 'ACTIVE':
|
|
print >> sys.stderr, "Server %(server_name)s failed to rebuild"\
|
|
% locals()
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def test_rebuild(context):
|
|
count, args = context
|
|
server_name = "server%d" % count
|
|
snapshot_name = "snap%d" % count
|
|
cleanup = args.cleanup
|
|
with server_built(server_name, args.image, cleanup=cleanup):
|
|
with snapshot_taken(server_name, snapshot_name, cleanup=cleanup):
|
|
return rebuild_server(server_name, snapshot_name)
|
|
|
|
|
|
def _parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description='Test Nova for Race Conditions.')
|
|
|
|
parser.add_argument('tests', metavar='TESTS', type=str, nargs='*',
|
|
default=['rebuild', 'migrate'],
|
|
help='tests to run: [rebuilt|migrate]')
|
|
|
|
parser.add_argument('-i', '--image', help="image to build from",
|
|
required=True)
|
|
parser.add_argument('-n', '--num-runs', type=int, help="number of runs",
|
|
default=1)
|
|
parser.add_argument('-c', '--concurrency', type=int, default=5,
|
|
help="number of concurrent processes")
|
|
parser.add_argument('--no-cleanup', action='store_false', dest="cleanup",
|
|
default=True)
|
|
parser.add_argument('-d', '--dom0-ips',
|
|
help="IP of dom0's to run cleanup script")
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def main():
|
|
dom0_cleanup_script = DOM0_CLEANUP_SCRIPT
|
|
args = _parse_args()
|
|
|
|
if args.dom0_ips:
|
|
dom0_ips = args.dom0_ips.split(',')
|
|
else:
|
|
dom0_ips = []
|
|
|
|
start_time = time.time()
|
|
batch_size = min(args.num_runs, args.concurrency)
|
|
pool = multiprocessing.Pool(processes=args.concurrency)
|
|
|
|
results = []
|
|
for test in args.tests:
|
|
test_func = globals().get("test_%s" % test)
|
|
if not test_func:
|
|
print >> sys.stderr, "test '%s' not found" % test
|
|
sys.exit(1)
|
|
|
|
contexts = [(x, args) for x in range(args.num_runs)]
|
|
|
|
try:
|
|
results += pool.map(test_func, contexts)
|
|
finally:
|
|
if args.cleanup:
|
|
for dom0_ip in dom0_ips:
|
|
run("ssh root@%(dom0_ip)s %(dom0_cleanup_script)s"
|
|
% locals())
|
|
|
|
success = all(results)
|
|
result = "SUCCESS" if success else "FAILED"
|
|
|
|
duration = time.time() - start_time
|
|
print "%s, finished in %.2f secs" % (result, duration)
|
|
|
|
sys.exit(0 if success else 1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|