Simplified stress tests.

Change-Id: I3cc6a14f32d81fa81adb13aed97049c9df2abbb2
This commit is contained in:
David Kranz 2013-05-01 15:55:04 -04:00
parent 96e9e785b5
commit b9d9750a6f
10 changed files with 401 additions and 1 deletions

View File

@ -411,7 +411,23 @@ StressGroup = [
help='Maximum number of instances to create during test.'),
cfg.StrOpt('controller',
default=None,
help='Controller host.')
help='Controller host.'),
# new stress options
cfg.StrOpt('target_controller',
default=None,
help='Controller host.'),
cfg.StrOpt('target_ssh_user',
default=None,
help='ssh user.'),
cfg.StrOpt('target_private_key_path',
default=None,
help='Path to private key.'),
cfg.StrOpt('target_logfiles',
default=None,
help='regexp for list of log files.'),
cfg.StrOpt('log_check_interval',
default=60,
help='time between log file error checks.')
]

47
tempest/stress/README.rst Normal file
View File

@ -0,0 +1,47 @@
Quanta Research Cambridge OpenStack Stress Test System
======================================================
Nova is a distributed, asynchronous system that is prone to race condition
bugs. These bugs will not be easily found during
functional testing but will be encountered by users in large deployments in a
way that is hard to debug. The stress test tries to cause these bugs to happen
in a more controlled environment.
Environment
------------
This particular framework assumes your working Nova cluster understands Nova
API 2.0. The stress tests can read the logs from the cluster. To enable this
you have to provide the hostname to call 'nova-manage' and
the private key and user name for ssh to the cluster in the
[stress] section of tempest.conf. You also need to provide the
location of the log files:
target_logfiles = "regexp to all log files to be checked for errors"
target_private_key_path = "private ssh key for controller and log file nodes"
target_ssh_user = "username for controller and log file nodes"
target_controller = "hostname or ip of controller node (for nova-manage)
log_check_interval = "time between checking logs for errors (default 60s)"
Running the sample test
-----------------------
To test installation, do the following (from the tempest/stress directory):
./run_stress.py etc/sample-test.json -d 30
This sample test tries to create a few VMs and kill a few VMs.
Additional Tools
----------------
Sometimes the tests don't finish, or there are failures. In these
cases, you may want to clean out the nova cluster. We have provided
some scripts to do this in the ``tools`` subdirectory.
You can use the following script to destroy any keypairs,
floating ips, and servers:
tempest/stress/tools/cleanup.py

View File

@ -0,0 +1,13 @@
# Copyright 2013 Quanta Research Cambridge, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -0,0 +1,13 @@
# Copyright 2013 Quanta Research Cambridge, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -0,0 +1,34 @@
# Copyright 2013 Quanta Research Cambridge, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from tempest.common.utils.data_utils import rand_name
def create_destroy(manager, logger):
image = manager.config.compute.image_ref
flavor = manager.config.compute.flavor_ref
while True:
name = rand_name("instance")
logger.info("creating %s" % name)
resp, server = manager.servers_client.create_server(
name, image, flavor)
server_id = server['id']
assert(resp.status == 202)
manager.servers_client.wait_for_server_status(server_id, 'ACTIVE')
logger.info("created %s" % server_id)
logger.info("deleting %s" % name)
resp, _ = manager.servers_client.delete_server(server_id)
assert(resp.status == 204)
manager.servers_client.wait_for_server_termination(server_id)
logger.info("deleted %s" % server_id)

60
tempest/stress/cleanup.py Normal file
View File

@ -0,0 +1,60 @@
#!/usr/bin/env python
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2013 Quanta Research Cambridge, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from tempest import clients
def cleanup():
admin_manager = clients.AdminManager()
_, body = admin_manager.servers_client.list_servers({"all_tenants": True})
for s in body['servers']:
try:
admin_manager.servers_client.delete_server(s['id'])
except Exception:
pass
for s in body['servers']:
try:
admin_manager.servers_client.wait_for_server_termination(s['id'])
except Exception:
pass
_, keypairs = admin_manager.keypairs_client.list_keypairs()
for k in keypairs:
try:
admin_manager.keypairs_client.delete_keypair(k['name'])
except Exception:
pass
_, floating_ips = admin_manager.floating_ips_client.list_floating_ips()
for f in floating_ips:
try:
admin_manager.floating_ips_client.delete_floating_ip(f['id'])
except Exception:
pass
_, users = admin_manager.identity_client.get_users()
for user in users:
if user['name'].startswith("stress_user"):
admin_manager.identity_client.delete_user(user['id'])
_, tenants = admin_manager.identity_client.list_tenants()
for tenant in tenants:
if tenant['name'].startswith("stress_tenant"):
admin_manager.identity_client.delete_tenant(tenant['id'])

156
tempest/stress/driver.py Normal file
View File

@ -0,0 +1,156 @@
# Copyright 2013 Quanta Research Cambridge, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import logging
import multiprocessing
import time
from tempest import clients
from tempest.common import ssh
from tempest.common.utils.data_utils import rand_name
from tempest import exceptions
from tempest.stress import cleanup
admin_manager = clients.AdminManager()
# setup logging to file
logging.basicConfig(
format='%(asctime)s %(process)d %(name)-20s %(levelname)-8s %(message)s',
datefmt='%m-%d %H:%M:%S',
filename="stress.debug.log",
filemode="w",
level=logging.DEBUG,
)
# define a Handler which writes INFO messages or higher to the sys.stdout
_console = logging.StreamHandler()
_console.setLevel(logging.INFO)
# set a format which is simpler for console use
format_str = '%(asctime)s %(process)d %(name)-20s: %(levelname)-8s %(message)s'
_formatter = logging.Formatter(format_str)
# tell the handler to use this format
_console.setFormatter(_formatter)
# add the handler to the root logger
logger = logging.getLogger('tempest.stress')
logger.addHandler(_console)
def do_ssh(command, host):
username = admin_manager.config.stress.target_ssh_user
key_filename = admin_manager.config.stress.target_private_key_path
if not (username and key_filename):
return None
ssh_client = ssh.Client(host, username, key_filename=key_filename)
try:
return ssh_client.exec_command(command)
except exceptions.SSHExecCommandFailed:
return None
def _get_compute_nodes(controller):
"""
Returns a list of active compute nodes. List is generated by running
nova-manage on the controller.
"""
nodes = []
cmd = "nova-manage service list | grep ^nova-compute"
output = do_ssh(cmd, controller)
if not output:
return nodes
# For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46
# This is fragile but there is, at present, no other way to get this info.
for line in output.split('\n'):
words = line.split()
if len(words) > 0 and words[4] == ":-)":
nodes.append(words[1])
return nodes
def _error_in_logs(logfiles, nodes):
"""
Detect errors in the nova log files on the controller and compute nodes.
"""
grep = 'egrep "ERROR|TRACE" %s' % logfiles
for node in nodes:
errors = do_ssh(grep, node)
if not errors:
return None
if len(errors) > 0:
logger.error('%s: %s' % (node, errors))
return errors
return None
def get_action_function(path):
(module_part, _, function) = path.rpartition('.')
return getattr(importlib.import_module(module_part), function)
def stress_openstack(tests, duration):
"""
Workload driver. Executes an action function against a nova-cluster.
"""
logfiles = admin_manager.config.stress.target_logfiles
log_check_interval = int(admin_manager.config.stress.log_check_interval)
if logfiles:
controller = admin_manager.config.stress.target_controller
computes = _get_compute_nodes(controller)
for node in computes:
do_ssh("rm -f %s" % logfiles, node)
processes = []
for test in tests:
if test.get('use_admin', False):
manager = admin_manager
else:
manager = clients.Manager()
for _ in xrange(test.get('threads', 1)):
if test.get('use_isolated_tenants', False):
username = rand_name("stress_user")
tenant_name = rand_name("stress_tenant")
password = "pass"
identity_client = admin_manager.identity_client
_, tenant = identity_client.create_tenant(name=tenant_name)
identity_client.create_user(username,
password,
tenant['id'],
"email")
manager = clients.Manager(username=username,
password="pass",
tenant_name=tenant_name)
target = get_action_function(test['action'])
p = multiprocessing.Process(target=target,
args=(manager, logger),
kwargs=test.get('kwargs', {}))
processes.append(p)
p.start()
end_time = time.time() + duration
had_errors = False
while True:
remaining = end_time - time.time()
if remaining <= 0:
break
time.sleep(min(remaining, log_check_interval))
if not logfiles:
continue
errors = _error_in_logs(logfiles, computes)
if errors:
had_errors = True
break
for p in processes:
p.terminate()
if not had_errors:
logger.info("cleaning up")
cleanup.cleanup()

View File

@ -0,0 +1,7 @@
[{"action": "tempest.stress.actions.create_destroy_server.create_destroy",
"threads": 8,
"use_admin": false,
"use_isolated_tenants": false,
"kwargs": {}
}
]

34
tempest/stress/run_stress.py Executable file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2013 Quanta Research Cambridge, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
from tempest.stress import driver
def main(ns):
tests = json.load(open(ns.tests, 'r'))
driver.stress_openstack(tests, ns.duration)
parser = argparse.ArgumentParser(description='Run stress tests. ')
parser.add_argument('-d', '--duration', default=300, type=int,
help="Duration of test.")
parser.add_argument('tests', help="Name of the file with test description.")
main(parser.parse_args())

20
tempest/stress/tools/cleanup.py Executable file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env python
# Copyright 2013 Quanta Research Cambridge, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from tempest.stress import cleanup
cleanup.cleanup()