Merge "Add rolling downtime simulation tools"

This commit is contained in:
Jenkins 2017-03-10 13:54:23 +00:00 committed by Gerrit Code Review
commit eacb01c0a0
3 changed files with 252 additions and 0 deletions

62
bowling_ball/README.rst Normal file
View File

@ -0,0 +1,62 @@
Bowling Ball - OpenStack-Ansible Rolling Downtime Simulator
###########################################################
:date: 2017-03-09
:tags: rackspace, openstack, ansible
:category: \*openstack, \*nix
About
-----
This project aims to test for issues with rolling downtime on
OpenStack-Ansible deployments. It's comprised of two main components:
* The ``rolling_restart.py`` script
* The ``tests`` directory
The ``rolling_restart.py`` script will stop containers from a specified group
in a rolling fashion - node 1 will stop, then start, then node 2, then
node 3 and so on. This script runs from the *deployment host*.
The ``tests`` directory contains scripts to generate traffic against the
target services. These vary per service, but attempt to apply usage to a
system that will be restarted by ``rolling_restart.py`` in order to
measure the effects. These scripts run from a *utility container*.
Usage
-----
#. Start your test script from the utility container. ``keystone.py``
will request a session and a list of projects on an infinite loop, for
example.
#. From the deployment node, run ``rolling_restart.py`` in the playbooks
directory (necessary to find the inventory script). Specify the service
you're targeting with the ``-s`` parameter.
``rolling_restart.py -s keystone_container``
You can specify a wait time in seconds between stopping and starting
individual nodes.
``rolling_restart.py -s keystone_container -w 60``
Assumptions
-----------
These tools are currently coupled to OSA, and they assume paths to files
as specified by the ``multi-node-aio`` scripts.
Container stopping and starting is done with an ansible command, and the
physical host to target is derivced from the current inventory.
``rolling_restart.py`` must currently be run from the ``playbooks``
directory. This will be fixed later.
You must source ``openrc`` before running ``keystone.py``.
Why the name?
-------------
It sets 'em up and knocks em down.

View File

@ -0,0 +1,128 @@
#!/usr/bin/env python
# Copyright 2017, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# (c) 2017, Nolan Brubaker <nolan.brubaker@rackspace.com>
import argparse
import json
import os
import subprocess
import sys
import time
CONF_DIR = os.path.join('/', 'etc', 'openstack_deploy')
INVENTORY_FILE = os.path.join(CONF_DIR, 'openstack_inventory.json')
CONF_FILE = os.path.join(CONF_DIR, 'openstack_user_config.yml')
PLAYBOOK_DIR = os.path.join('/', 'opt', 'openstack_ansible', 'playbooks')
STOP_TEMPLATE = 'ansible -i inventory -m shell -a\
"lxc-stop -n {container}" {host}'
START_TEMPLATE = 'ansible -i inventory -m shell -a\
"lxc-start -dn {container}" {host}'
def args(arg_list):
parser = argparse.ArgumentParser(
usage='%(prog)s',
description='OpenStack-Ansible Rolling Update Simulator',
epilog='Licensed "Apache 2.0"')
parser.add_argument(
'-s',
'--service',
help='Name of the service to rolling restart.',
required=True,
default=None,
)
parser.add_argument(
'-w',
'--wait',
help=("Number of seconds to wait between stopping and starting. "
"Default: 120"),
default=120,
)
return vars(parser.parse_args(arg_list))
def read_inventory(inventory_file):
"""Parse inventory file into a python dictionary"""
with open(inventory_file, 'r') as f:
inventory = json.load(f)
return inventory
def get_similar_groups(target_group, inventory):
"""
Find group suggestions
"""
suggestions = []
for key in inventory.keys():
if target_group in key:
suggestions.append(key)
return suggestions
def get_containers(target_group, inventory):
"""Get container names in the relevant group"""
group = inventory.get(target_group, None)
if group is None:
groups = get_similar_groups(target_group, inventory)
print("No group {} found.".format(target_group))
if groups:
print("Maybe try one of these:")
print("\n".join(groups))
sys.exit(1)
containers = group['hosts']
containers.sort()
return containers
def rolling_restart(containers, inventory, wait=120):
"""Restart containers in numerical order, one at a time.
wait is the number of seconds to wait between stopping and starting a
container
"""
for container in containers:
host = inventory['_meta']['hostvars'][container]['physical_host']
stop_cmd = STOP_TEMPLATE.format(container=container, host=host)
print("Stopping {container}".format(container=container))
subprocess.check_call(stop_cmd, shell=True)
time.sleep(wait)
start_cmd = START_TEMPLATE.format(container=container, host=host)
subprocess.check_call(start_cmd, shell=True)
print("Started {container}".format(container=container))
def main():
all_args = args(sys.argv[1:])
service = all_args['service']
wait = all_args['wait']
inventory = read_inventory(INVENTORY_FILE)
containers = get_containers(service, inventory)
rolling_restart(containers, inventory, wait)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python
# Copyright 2017, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# (c) 2017, Nolan Brubaker <nolan.brubaker@rackspace.com>
import datetime
from keystoneauth1.identity import v3
from keystoneauth1 import session
from keystoneauth1.exceptions.connection import ConnectFailure
from keystoneauth1.exceptions.http import InternalServerError
from keystoneclient.v3 import client
import os
import sys
import time
auth_url = os.environ['OS_AUTH_URL']
password = os.environ['OS_PASSWORD']
auth = v3.Password(auth_url=auth_url, username="admin",
password=password, project_name="admin",
user_domain_id="default", project_domain_id="default")
disconnected = None
try:
while True:
try:
# Pause for a bit so we're not generating more data than we
# can handle
time.sleep(1)
start_time = datetime.datetime.now()
sess = session.Session(auth=auth)
keystone = client.Client(session=sess)
keystone.projects.list()
end_time = datetime.datetime.now()
if disconnected:
dis_delta = end_time - disconnected
disconnected = None
print("Reconnect {}s".format(dis_delta.total_seconds()))
delta = end_time - start_time
print("New list: {]s.".format(delta.total_seconds()))
except (ConnectFailure, InternalServerError):
if not disconnected:
disconnected = datetime.datetime.now()
except KeyboardInterrupt:
sys.exit()