Add YODA for undercloud and overcloud workloads
Yoda is a Browbeat workload for Ironic and TripleO It can perform and monitor the following * Introspection bulk or batch * Cloud deployment with varying node types and numbers * Baremetal node import timing (actually done during introspection tests) Metrics that are gathered inclue * Time to pxe * Time till pingable * Success/Failure rate ahd times * Overcloud Metadata after each deploy Potential issues Change-Id: I89809cc35db2cfaa39f8ede49ec853572c0e468e
This commit is contained in:
parent
41681ebcbd
commit
b21cd3cebc
@ -1,10 +1,14 @@
|
||||
[
|
||||
{% for host in groups['controller'] %}
|
||||
{{hostvars[host]| to_nice_json}},
|
||||
{% endfor %}
|
||||
{% for host in groups['compute'] %}
|
||||
{{hostvars[host]| to_nice_json}},
|
||||
{% endfor %}
|
||||
{% if groups['controller'] is defined %}
|
||||
{% for host in groups['controller'] %}
|
||||
{{hostvars[host]| to_nice_json}},
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
{% if groups['compute'] is defined %}
|
||||
{% for host in groups['compute'] %}
|
||||
{{hostvars[host]| to_nice_json}},
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
{% for host in groups['undercloud'] %}
|
||||
{{hostvars[host]| to_nice_json}}
|
||||
{% endfor %}
|
||||
|
@ -19,6 +19,7 @@
|
||||
- perfkitbenchmarker
|
||||
- rally
|
||||
- shaker
|
||||
- yoda
|
||||
- flavors
|
||||
- images
|
||||
environment: "{{proxy_env}}"
|
||||
|
@ -39,6 +39,9 @@ shaker_venv: "{{home_dir}}/shaker-venv"
|
||||
# Shaker version to Install
|
||||
shaker_version: 0.0.17
|
||||
|
||||
# The default YODA venv
|
||||
yoda_venv: /home/stack/yoda-venv
|
||||
|
||||
# PerfKitBenchmarker Settings
|
||||
perfkit_venv: "{{home_dir}}/perfkit-venv"
|
||||
perfkit_version: v1.12.0
|
||||
|
18
ansible/install/roles/yoda/tasks/main.yml
Normal file
18
ansible/install/roles/yoda/tasks/main.yml
Normal file
@ -0,0 +1,18 @@
|
||||
---
|
||||
#
|
||||
# YODA Install
|
||||
#
|
||||
|
||||
- name: Create yoda virtualenv
|
||||
command: virtualenv {{ yoda_venv }} creates={{ yoda_venv }}
|
||||
|
||||
- name: Install yoda requirements
|
||||
pip: name={{item}} virtualenv={{yoda_venv}}
|
||||
with_items:
|
||||
- openstacksdk
|
||||
- python-heatclient
|
||||
- python-tripleoclient
|
||||
- elasticsearch
|
||||
- pykwalify
|
||||
- python-dateutil
|
||||
- git+https://github.com/jkilpatr/ostag/#egg=ostag
|
@ -31,6 +31,7 @@
|
||||
- browbeat/perfkitbenchmarker
|
||||
- browbeat/rally
|
||||
- browbeat/shaker
|
||||
- browbeat/yoda
|
||||
- browbeat/flavors
|
||||
- browbeat/images
|
||||
- browbeat/browbeat-network
|
||||
@ -59,6 +60,5 @@
|
||||
- name: Run Browbeat
|
||||
hosts: undercloud
|
||||
roles:
|
||||
- browbeat/bug-check
|
||||
- browbeat/grafana-dashboard-setup
|
||||
- browbeat/browbeat-run
|
||||
|
@ -20,6 +20,7 @@
|
||||
- browbeat/perfkitbenchmarker
|
||||
- browbeat/rally
|
||||
- browbeat/shaker
|
||||
- browbeat/yoda
|
||||
- browbeat/flavors
|
||||
- browbeat/images
|
||||
- browbeat/browbeat-network
|
||||
|
@ -4,4 +4,4 @@
|
||||
shell:
|
||||
"source {{ ansible_env.HOME }}/browbeat-venv/bin/activate; \
|
||||
cd {{ ansible_env.HOME }}/browbeat/; \
|
||||
python browbeat.py rally > {{ ansible_env.HOME }}/browbeat/results/browbeat_run.log"
|
||||
python browbeat.py all > {{ ansible_env.HOME }}/browbeat/results/browbeat_run.log"
|
||||
|
@ -40,23 +40,6 @@ grafana:
|
||||
snapshot:
|
||||
enabled: false
|
||||
snapshot_compute: false
|
||||
perfkit:
|
||||
enabled: true
|
||||
sleep_before: 0
|
||||
sleep_after: 0
|
||||
venv: /home/stack/perfkit-venv/bin/activate
|
||||
default:
|
||||
image: centos7
|
||||
machine_type: m1.small
|
||||
os_type: rhel
|
||||
openstack_image_username: centos
|
||||
openstack_floating_ip_pool: browbeat_public
|
||||
openstack_network: nova_test_net_name.stdout
|
||||
benchmarks:
|
||||
- name: fio-centos-m1-small
|
||||
enabled: false
|
||||
benchmarks: fio
|
||||
data_disk_size: 4
|
||||
rally:
|
||||
enabled: true
|
||||
sleep_before: 5
|
||||
@ -138,104 +121,3 @@ rally:
|
||||
sla_max_avg_duration: 12
|
||||
sla_max_seconds: 30
|
||||
sla_max_failure: 0
|
||||
#shaker scenarios require at least 2 compute nodes
|
||||
shaker:
|
||||
enabled: true
|
||||
server: localhost
|
||||
port: 5555
|
||||
flavor: m1.small
|
||||
join_timeout: 600
|
||||
sleep_before: 5
|
||||
sleep_after: 5
|
||||
venv: /home/stack/shaker-venv
|
||||
dns_nameserver: 192.168.23.1
|
||||
shaker_region: regionOne
|
||||
scenarios:
|
||||
- name: l2-4-1
|
||||
enabled: true
|
||||
density: 4
|
||||
compute: 1
|
||||
progression: linear
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l2.yaml
|
||||
- name: l2-8-1
|
||||
enabled: true
|
||||
density: 8
|
||||
compute: 1
|
||||
progression: linear
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l2.yaml
|
||||
- name: l2-4-2
|
||||
enabled: true
|
||||
density: 4
|
||||
compute: 2
|
||||
progression: linear
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l2.yaml
|
||||
- name: l2-4-8
|
||||
enabled: true
|
||||
density: 8
|
||||
compute: 2
|
||||
progression: linear
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l2.yaml
|
||||
- name: l3-north-south-4-1
|
||||
enabled: true
|
||||
placement: single_room
|
||||
density: 4
|
||||
compute: 1
|
||||
progression: null
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l3_north_south.yaml
|
||||
- name: l3-north-south-8-1
|
||||
enabled: false
|
||||
placement: single_room
|
||||
density: 8
|
||||
compute: 1
|
||||
progression: null
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l3_north_south.yaml
|
||||
- name: l3-north-south-4-2
|
||||
enabled: true
|
||||
placement: single_room
|
||||
density: 4
|
||||
compute: 2
|
||||
progression: null
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l3_north_south.yaml
|
||||
- name: l3-north-south-8-2
|
||||
enabled: true
|
||||
placement: single_room
|
||||
density: 8
|
||||
compute: 2
|
||||
progression: null
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l3_north_south.yaml
|
||||
- name: l3-east-west-4-1
|
||||
enabled: true
|
||||
density: 4
|
||||
compute: 1
|
||||
placement: single_room
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l3_east_west.yaml
|
||||
- name: l3-east-west-8-1
|
||||
enabled: true
|
||||
density: 8
|
||||
compute: 1
|
||||
placement: single_room
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l3_east_west.yaml
|
||||
- name: l3-east-west-4-2
|
||||
enabled: true
|
||||
density: 4
|
||||
compute: 2
|
||||
placement: single_room
|
||||
time: 60
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l3_east_west.yaml
|
||||
- name: l3-east-west-8-2
|
||||
enabled: true
|
||||
density: 8
|
||||
compute: 2
|
||||
time: 60
|
||||
placement: single_room
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l3_east_west.yaml
|
||||
|
||||
|
@ -0,0 +1,182 @@
|
||||
# Tests to be compleated for the install-and-check.sh script minimal and short workloads are performed
|
||||
# to confirm functionality.
|
||||
browbeat:
|
||||
results : results/
|
||||
rerun: 1
|
||||
cloud_name: {{ browbeat_cloud_name }}
|
||||
elasticsearch:
|
||||
enabled: {{ elastic_enabled_template }}
|
||||
host: {{ elastic_host_template }}
|
||||
port: 9200
|
||||
regather: true
|
||||
metadata_files:
|
||||
- name: hardware-metadata
|
||||
file: metadata/hardware-metadata.json
|
||||
- name: environment-metadata
|
||||
file: metadata/environment-metadata.json
|
||||
- name: software-metadata
|
||||
file: metadata/software-metadata.json
|
||||
- name: version
|
||||
file: metadata/version.json
|
||||
ansible:
|
||||
ssh_config: ansible/ssh-config
|
||||
hosts: ansible/hosts
|
||||
adjust:
|
||||
keystone_token: ansible/browbeat/adjustment-keystone-token.yml
|
||||
neutron_l3: ansible/browbeat/adjustment-l3.yml
|
||||
nova_db: ansible/browbeat/adjustment-db.yml
|
||||
workers: ansible/browbeat/adjustment-workers.yml
|
||||
grafana_snapshot: ansible/browbeat/snapshot-general-performance-dashboard.yml
|
||||
metadata: ansible/gather/site.yml
|
||||
connmon:
|
||||
enabled: {{ connmon_enabled_template }}
|
||||
sudo: true
|
||||
grafana:
|
||||
enabled: {{ grafana_enabled_template }}
|
||||
grafana_ip: {{ grafana_host_template }}
|
||||
grafana_port: 3000
|
||||
dashboards:
|
||||
- openstack-general-system-performance
|
||||
snapshot:
|
||||
enabled: false
|
||||
snapshot_compute: false
|
||||
yoda:
|
||||
enabled: true
|
||||
instackenv: "/home/stack/instackenv.json"
|
||||
stackrc: "/home/stack/stackrc"
|
||||
venv: "/home/stack/yoda-venv/bin/activate"
|
||||
benchmarks:
|
||||
- name: introspect-{{ overcloud_size }}-10-individual-batch-2
|
||||
type: introspection
|
||||
enabled: true
|
||||
method: individual
|
||||
times: 10
|
||||
timeout: 900
|
||||
batch_size: 2
|
||||
- name: introspect-{{ overcloud_size }}-10-individual-batch-4
|
||||
type: introspection
|
||||
enabled: true
|
||||
method: individual
|
||||
times: 10
|
||||
timeout: 900
|
||||
batch_size: 4
|
||||
- name: introspect-{{ overcloud_size }}-10-individual-batch-8
|
||||
type: introspection
|
||||
enabled: true
|
||||
method: individual
|
||||
times: 10
|
||||
timeout: 900
|
||||
batch_size: 8
|
||||
- name: introspect-{{ overcloud_size }}-10-individual-batch-16
|
||||
type: introspection
|
||||
enabled: true
|
||||
method: individual
|
||||
times: 10
|
||||
timeout: 900
|
||||
batch_size: 16
|
||||
- name: introspect-{{ overcloud_size }}-10-individual-batch-32
|
||||
type: introspection
|
||||
enabled: true
|
||||
method: individual
|
||||
times: 10
|
||||
timeout: 900
|
||||
batch_size: 32
|
||||
- name: introspect-{{ overcloud_size }}-10-individual-batch-{{ overcloud_size }}
|
||||
type: introspection
|
||||
enabled: true
|
||||
method: individual
|
||||
times: 10
|
||||
timeout: 900
|
||||
batch_size: {{ overcloud_size }}
|
||||
- name: introspect-{{ overcloud_size }}-50-bulk
|
||||
type: introspection
|
||||
enabled: true
|
||||
method: bulk
|
||||
times: 50
|
||||
timeout: 900
|
||||
- name: No-HA-Max-Compute-{{ overcloud_size }}-full-deploy
|
||||
type: overcloud
|
||||
ntp_server: clock01.util.phx2.redhat.com
|
||||
timeout: 600
|
||||
templates:
|
||||
- ""
|
||||
enabled: true
|
||||
step: 5
|
||||
keep_stack: false
|
||||
times: 2
|
||||
cloud:
|
||||
- node: "compute"
|
||||
start_scale: 1
|
||||
end_scale: {{ overcloud_size | int - 1 }}
|
||||
- node: "control"
|
||||
start_scale: 1
|
||||
end_scale: 1
|
||||
- name: No-HA-Max-Compute-{{ overcloud_size }}-stack-update
|
||||
type: overcloud
|
||||
ntp_server: clock01.util.phx2.redhat.com
|
||||
timeout: 600
|
||||
templates:
|
||||
- ""
|
||||
instackenv: "/home/stack/instackenv.json"
|
||||
enabled: true
|
||||
step: 5
|
||||
keep_stack: true
|
||||
times: 2
|
||||
cloud:
|
||||
- node: "compute"
|
||||
start_scale: 1
|
||||
end_scale: {{ overcloud_size | int - 1 }}
|
||||
- node: "control"
|
||||
start_scale: 1
|
||||
end_scale: 1
|
||||
- name: HA-Max-Compute-{{ overcloud_size }}-full-deploy
|
||||
type: overcloud
|
||||
ntp_server: clock01.util.phx2.redhat.com
|
||||
timeout: 600
|
||||
templates:
|
||||
- ""
|
||||
enabled: true
|
||||
step: 5
|
||||
keep_stack: false
|
||||
times: 2
|
||||
cloud:
|
||||
- node: "compute"
|
||||
start_scale: 1
|
||||
end_scale: {{ overcloud_size | int - 3 }}
|
||||
- node: "control"
|
||||
start_scale: 3
|
||||
end_scale: 3
|
||||
- name: HA-Max-Compute-{{ overcloud_size }}-stack-update
|
||||
type: overcloud
|
||||
ntp_server: clock01.util.phx2.redhat.com
|
||||
timeout: 600
|
||||
templates:
|
||||
- ""
|
||||
enabled: true
|
||||
step: 5
|
||||
keep_stack: true
|
||||
times: 2
|
||||
cloud:
|
||||
- node: "compute"
|
||||
start_scale: 1
|
||||
end_scale: {{ overcloud_size | int - 3 }}
|
||||
- node: "control"
|
||||
start_scale: 3
|
||||
end_scale: 3
|
||||
- name: HA-Max-Compute-{{ overcloud_size }}-stack-update
|
||||
type: overcloud
|
||||
ntp_server: clock01.util.phx2.redhat.com
|
||||
timeout: 600
|
||||
templates:
|
||||
- ""
|
||||
enabled: true
|
||||
step: 5
|
||||
keep_stack: true
|
||||
times: 2
|
||||
cloud:
|
||||
- node: "compute"
|
||||
start_scale: 1
|
||||
end_scale: {{ overcloud_size | int - 3 }}
|
||||
- node: "control"
|
||||
start_scale: 3
|
||||
end_scale: 3
|
@ -5,3 +5,5 @@ grafana_enabled_template: false
|
||||
grafana_host_template: "1.2.3.4.5"
|
||||
browbeat_config_file: "browbeat-basic.yaml.j2"
|
||||
browbeat_cloud_name: "browbeat_ci"
|
||||
overcloud_size: "{{ groups['overcloud'] | length }}"
|
||||
ntp_server: "pool.ntp.org"
|
||||
|
@ -369,3 +369,34 @@ shaker:
|
||||
time: 60
|
||||
placement: single_room
|
||||
file: lib/python2.7/site-packages/shaker/scenarios/openstack/dense_l3_east_west.yaml
|
||||
#yoda scenarios WILL redeploy your overcloud
|
||||
yoda:
|
||||
enabled: false
|
||||
instackenv: "/home/stack/instackenv.json"
|
||||
stackrc: "/home/stack/stackrc"
|
||||
venv: "/home/stack/yoda-venv/bin/activate"
|
||||
benchmarks:
|
||||
- name: scale-deploy
|
||||
type: overcloud
|
||||
ntp_server: pool.ntp.org
|
||||
enabled: true
|
||||
templates:
|
||||
- ""
|
||||
timeout: 600 #deploy timeout in minutes
|
||||
step: 1
|
||||
keep_stack: false
|
||||
times: 3
|
||||
cloud:
|
||||
- node: "compute"
|
||||
start_scale: 1
|
||||
end_scale: 1
|
||||
- node: "control"
|
||||
start_scale: 1
|
||||
end_scale: 3
|
||||
- name: introspect-batch
|
||||
type: introspection
|
||||
enabled: true
|
||||
method: individual #other option is bulk
|
||||
times: 3
|
||||
timeout: 900 #introspection timeout in seconds
|
||||
batch_size: 2
|
||||
|
@ -15,6 +15,7 @@ from lib.Elastic import browbeat_uuid
|
||||
import lib.PerfKit
|
||||
import lib.Rally
|
||||
import lib.Shaker
|
||||
import lib.Yoda
|
||||
import lib.WorkloadBase
|
||||
import lib.Tools
|
||||
import argparse
|
||||
@ -24,7 +25,7 @@ import time
|
||||
import datetime
|
||||
import os
|
||||
|
||||
_workload_opts = ['perfkit', 'rally', 'shaker']
|
||||
_workload_opts = ['perfkit', 'rally', 'shaker', 'yoda']
|
||||
_config_file = 'browbeat-config.yaml'
|
||||
debug_log_file = 'log/debug.log'
|
||||
|
||||
|
@ -143,6 +143,76 @@ using some simple searches such as:
|
||||
shaker_uuid: 97092334-34e8-446c-87d6-6a0f361b9aa8 AND record.concurrency: 1 AND result.result_type: bandwidth
|
||||
shaker_uuid: c918a263-3b0b-409b-8cf8-22dfaeeaf33e AND record.concurrency:1 AND record.test:Bi-Directional
|
||||
|
||||
Running YODA
|
||||
============
|
||||
YODA (Yet Openstack Deployment tool, Another) is a workload integrated into
|
||||
Browbeat for benchmarking TripleO deployment. This includes importing baremetal
|
||||
nodes, running introspections and overcloud deployements of various kinds. Note
|
||||
that YODA assumes it is on the undercloud of a TripleO instance post undercloud
|
||||
installation and introspection.
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
For examples of the configuration see `browbeat-complete.yaml` in the repo root directory.
|
||||
Additional configuration documentation can be found below for each subworkload of YODA.
|
||||
|
||||
Overcloud
|
||||
~~~~~~~~~
|
||||
For overcloud workloads, note that the nodes dictionary is dynamic, so you don't
|
||||
have to define types you aren't using, this is done in the demonstration
|
||||
configurations for the sake of completeness. Furthermore the node name is taken
|
||||
from the name of the field, meaning custom role names should work fine there.
|
||||
|
||||
The step parameter decides how many nodes can be distributed between the various
|
||||
types to get from start scale to end scale, if these are the same it won't
|
||||
matter. But if they are different up to that many nodes will be distributed to
|
||||
the different node types (in no particular order) before the next deploy is
|
||||
performed. The step rule is violated if and only if it is required to keep the
|
||||
deployment viable, for example if the step dictates that 2 control nodes be
|
||||
deployed it will skip to 3 even if it violates step.
|
||||
|
||||
YODA has basic support for custom templates and more advanced roles, configure the
|
||||
`templates:` paramater in the overcloud benchmark section with a string for
|
||||
template paths.
|
||||
|
||||
templates: "-e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml"
|
||||
|
||||
Note that `--templates` is passed to the `overcloud deploy` command before this,
|
||||
then nodes sizes, ntp server and timeout are passed after, so your templates
|
||||
will override the defaults, but not scale, timeout, or ntp settings from the
|
||||
YODA config. If you want to use scheduling hints for your overcloud deploy you
|
||||
will need to pip install [ostag](https://github.com/jkilpatr/ostag) and set
|
||||
`node_pinning: True` in your config file. Ostag will be used before every deploy
|
||||
to clean all tags and tag the appropriate nodes. If you set `node_pinning: False`
|
||||
tags will be cleaned before the deploy. If you need more advanced features view
|
||||
the ostag readme for how to tag based on node properties. If you don't want YODA
|
||||
to edit your node properties, don't define `node_pinning` in your configuration.
|
||||
|
||||
Introspection
|
||||
~~~~~~~~~~~~~
|
||||
Introspection workloads have two modes, batch and individual, the batch workload
|
||||
follows the documentation exactly, nodes are imported, then bulk introspection
|
||||
is run. Individual introspection has it's own custom batch size and handles
|
||||
failures more gracefully (individual instead of group retries). Both have a
|
||||
timeout configured in seconds and record the amount of time required for each
|
||||
node to pxe and the number of failures.
|
||||
|
||||
`timeout` is how long we wait for the node to come back from introspection this is
|
||||
hardware variable. Although the default 900 seconds has been shown to be the 99th
|
||||
percentile for success across at least two stes of hardware. Adjust as required.
|
||||
|
||||
Note that `batch_size` can not produce a batch of unintrospected ndoes if none exist
|
||||
so the last batch may be below the maximum size. When nodes in a batch fail the `failure_count`
|
||||
is incremented and the nodes are returned to the pool. So it's possible that same node will
|
||||
fail again in another batch. There is a saftey mechanism that will kill Yoda if a node exceeds
|
||||
10 retries as that's pretty much garunteed to be misconfigured. For bulk introspection all nodes
|
||||
are tried once and what you get is what you get.
|
||||
|
||||
If you wish to change the introspection workload failure threshold of 10% you can
|
||||
set `max_fail_amnt` to any floating point value you desire.
|
||||
|
||||
I would suggest bulk introspection for testing documented TripleO workflows and
|
||||
individual introspection to test the performance of introspection itself.
|
||||
|
||||
Interpreting Browbeat Results
|
||||
=============================
|
||||
|
@ -61,7 +61,7 @@ class Connmon(object):
|
||||
def connmon_graphs(self, result_dir, test_name):
|
||||
cmd = "python graphing/connmonplot.py {}/connmon/{}.csv".format(result_dir,
|
||||
test_name)
|
||||
return self.tools.run_cmd(cmd)
|
||||
return self.tools.run_cmd(cmd)['stdout']
|
||||
|
||||
# Move connmon results
|
||||
def move_connmon_results(self, result_dir, test_name):
|
||||
|
10
lib/Rally.py
10
lib/Rally.py
@ -66,7 +66,7 @@ class Rally(WorkloadBase.WorkloadBase):
|
||||
cmd += "rally {} task start {} --task-args \'{}\' 2>&1 | tee {}.log".format(
|
||||
plugin_string, task_file, task_args, test_name)
|
||||
from_time = time.time()
|
||||
self.tools.run_cmd(cmd)
|
||||
self.tools.run_cmd(cmd)['stdout']
|
||||
to_time = time.time()
|
||||
if 'sleep_after' in self.config['rally']:
|
||||
time.sleep(self.config['rally']['sleep_after'])
|
||||
@ -93,7 +93,7 @@ class Rally(WorkloadBase.WorkloadBase):
|
||||
def get_task_id(self, test_name):
|
||||
cmd = "grep \"rally task results\" {}.log | awk '{{print $4}}'".format(
|
||||
test_name)
|
||||
return self.tools.run_cmd(cmd)
|
||||
return self.tools.run_cmd(cmd)['stdout']
|
||||
|
||||
def _get_details(self):
|
||||
self.logger.info(
|
||||
@ -111,17 +111,17 @@ class Rally(WorkloadBase.WorkloadBase):
|
||||
cmd = "source {}; ".format(self.config['rally']['venv'])
|
||||
cmd += "rally task report --task {} --out {}.html".format(
|
||||
all_task_ids, test_name)
|
||||
return self.tools.run_cmd(cmd)
|
||||
return self.tools.run_cmd(cmd)['stdout']
|
||||
|
||||
def gen_scenario_json(self, task_id):
|
||||
cmd = "source {}; ".format(self.config['rally']['venv'])
|
||||
cmd += "rally task results {}".format(task_id)
|
||||
return self.tools.run_cmd(cmd)
|
||||
return self.tools.run_cmd(cmd)['stdout']
|
||||
|
||||
def gen_scenario_json_file(self, task_id, test_name):
|
||||
cmd = "source {}; ".format(self.config['rally']['venv'])
|
||||
cmd += "rally task results {} > {}.json".format(task_id, test_name)
|
||||
return self.tools.run_cmd(cmd)
|
||||
return self.tools.run_cmd(cmd)['stdout']
|
||||
|
||||
def rally_metadata(self, result, meta):
|
||||
result['rally_metadata'] = meta
|
||||
|
@ -39,7 +39,7 @@ class Shaker(WorkloadBase.WorkloadBase):
|
||||
|
||||
def shaker_checks(self):
|
||||
cmd = "source /home/stack/overcloudrc; glance image-list | grep -w shaker-image"
|
||||
if self.tools.run_cmd(cmd) == "":
|
||||
if self.tools.run_cmd(cmd)['stdout'] == "":
|
||||
self.logger.error("Shaker Image is not built, try again")
|
||||
exit(1)
|
||||
else:
|
||||
|
46
lib/Tools.py
46
lib/Tools.py
@ -13,6 +13,7 @@
|
||||
import PerfKit
|
||||
import Rally
|
||||
import Shaker
|
||||
import Yoda
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
@ -29,16 +30,37 @@ class Tools(object):
|
||||
self.config = config
|
||||
return None
|
||||
|
||||
# Returns true if ping successful, false otherwise
|
||||
def is_pingable(self, ip):
|
||||
cmd = "ping -c1 " + ip
|
||||
result = self.run_cmd(cmd)
|
||||
if result['rc'] == 0:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
# Run command async from the python main thread, return Popen handle
|
||||
def run_async_cmd(self, cmd):
|
||||
FNULL = open(os.devnull, 'w')
|
||||
self.logger.debug("Running command : %s" % cmd)
|
||||
process = subprocess.Popen(cmd, shell=True, stdout=FNULL)
|
||||
return process
|
||||
|
||||
# Run command, return stdout as result
|
||||
def run_cmd(self, cmd):
|
||||
self.logger.debug("Running command : %s" % cmd)
|
||||
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
stdout, stderr = process.communicate()
|
||||
if len(stderr) > 0:
|
||||
return None
|
||||
else:
|
||||
return stdout.strip()
|
||||
output_dict = {}
|
||||
output_dict['stdout'] = stdout.strip()
|
||||
output_dict['stderr'] = stderr.strip()
|
||||
output_dict['rc'] = process.returncode
|
||||
if process.returncode > 0:
|
||||
self.logger.error("Command {} returned with error".format(cmd))
|
||||
self.logger.error("stdout: {}".format(stdout))
|
||||
self.logger.error("stderr: {}".format(stderr))
|
||||
return output_dict
|
||||
|
||||
# Find Command on host
|
||||
def find_cmd(self, cmd):
|
||||
@ -103,6 +125,9 @@ class Tools(object):
|
||||
elif provider == "shaker":
|
||||
shaker = Shaker.Shaker(self.config)
|
||||
shaker.run_shaker()
|
||||
elif provider == "yoda":
|
||||
yoda = Yoda.Yoda(self.config)
|
||||
yoda.start_workloads()
|
||||
else:
|
||||
self.logger.error("Unknown workload provider: {}".format(provider))
|
||||
|
||||
@ -118,6 +143,7 @@ class Tools(object):
|
||||
def gather_metadata(self):
|
||||
os.putenv("ANSIBLE_SSH_ARGS",
|
||||
" -F {}".format(self.config['ansible']['ssh_config']))
|
||||
|
||||
ansible_cmd = \
|
||||
'ansible-playbook -i {} {}' \
|
||||
.format(self.config['ansible']['hosts'], self.config['ansible']['metadata'])
|
||||
@ -175,3 +201,15 @@ class Tools(object):
|
||||
if workload is "perfkit":
|
||||
# Stub for PerfKit.
|
||||
continue
|
||||
|
||||
def load_stackrc(self, filepath):
|
||||
values = {}
|
||||
with open(filepath) as stackrc:
|
||||
for line in stackrc:
|
||||
pair = line.split('=')
|
||||
if 'export' not in line and '#' not in line and '$(' not in line:
|
||||
values[pair[0].strip()] = pair[1].strip()
|
||||
elif '$(' in line and 'for key' not in line:
|
||||
values[pair[0].strip()] = \
|
||||
self.run_cmd("echo " + pair[1].strip())['stdout'].strip()
|
||||
return values
|
||||
|
643
lib/Yoda.py
Normal file
643
lib/Yoda.py
Normal file
@ -0,0 +1,643 @@
|
||||
#!/usr/bin/env python
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Yet another cloud deployment tool
|
||||
import datetime
|
||||
import Elastic
|
||||
import Grafana
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import Tools
|
||||
import WorkloadBase
|
||||
from openstack import connection
|
||||
from openstack import exceptions
|
||||
import os
|
||||
import requests
|
||||
try:
|
||||
from ostag import ostag
|
||||
except ImportError:
|
||||
ostag = None
|
||||
from collections import deque
|
||||
|
||||
class Yoda(WorkloadBase.WorkloadBase):
|
||||
|
||||
def __init__(self, config):
|
||||
self.logger = logging.getLogger('browbeat.yoda')
|
||||
self.config = config
|
||||
self.tools = Tools.Tools(self.config)
|
||||
self.grafana = Grafana.Grafana(self.config)
|
||||
self.elastic = Elastic.Elastic(self.config, self.__class__.__name__.lower())
|
||||
self.error_count = 0
|
||||
self.pass_count = 0
|
||||
self.test_count = 0
|
||||
self.scenario_count = 0
|
||||
|
||||
def get_stats(self):
|
||||
self.logger.info(
|
||||
"Current number of YODA tests executed: {}".format(
|
||||
self.test_count))
|
||||
self.logger.info(
|
||||
"Current number of YODA tests passed: {}".format(
|
||||
self.pass_count))
|
||||
self.logger.info(
|
||||
"Current number of YODA tests failed: {}".format(
|
||||
self.error_count))
|
||||
|
||||
def update_tests(self):
|
||||
self.test_count += 1
|
||||
self.update_total_tests()
|
||||
|
||||
def update_pass_tests(self):
|
||||
self.pass_count += 1
|
||||
self.update_total_pass_tests()
|
||||
|
||||
def update_fail_tests(self):
|
||||
self.error_count += 1
|
||||
self.update_total_fail_tests()
|
||||
|
||||
def update_scenarios(self):
|
||||
self.scenario_count += 1
|
||||
self.update_total_scenarios()
|
||||
|
||||
def state_tracker_extend(self, state, state_list):
|
||||
if state is None:
|
||||
return state_list
|
||||
elif state_list is None:
|
||||
return [state]
|
||||
elif state in state_list[-1]:
|
||||
return state_list
|
||||
else:
|
||||
state_list.append(state)
|
||||
return state_list
|
||||
|
||||
def node_is_cleaning(self, provision_state):
|
||||
ret = provision_state is not None
|
||||
ret = ret and 'clean' in provision_state
|
||||
ret = ret and 'fail' not in provision_state
|
||||
return ret
|
||||
|
||||
def is_cleaning(self, conn):
|
||||
for node in conn.bare_metal.nodes():
|
||||
if self.node_is_cleaning(node.provision_state):
|
||||
return True
|
||||
return False
|
||||
|
||||
def failed_cleaning_count(self, conn):
|
||||
count = 0
|
||||
for node in conn.bare_metal.nodes():
|
||||
if self.node_is_cleaning(node.provision_state):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
def wait_for_clean(self, env_setup, conn):
|
||||
wait_time = 1
|
||||
# 15 minute timeout
|
||||
timeout = (60 * 15)
|
||||
while self.is_cleaning(conn):
|
||||
# Cleans can fail, so we just try again
|
||||
if wait_time % 1000 == 0:
|
||||
self.set_ironic_node_state("manage", env_setup, conn)
|
||||
time.sleep(30)
|
||||
self.set_ironic_node_state("provide", env_setup, conn)
|
||||
time.sleep(1)
|
||||
wait_time += 1
|
||||
if wait_time > timeout:
|
||||
self.logger.error("Node Cleaning failed")
|
||||
exit(1)
|
||||
|
||||
# Required to use console commands because of this bug
|
||||
# https://bugs.launchpad.net/python-openstacksdk/+bug/1668767
|
||||
def set_ironic_node_state(self, state, env_setup, conn, node_uuid=""):
|
||||
if node_uuid != "":
|
||||
nodes = [node_uuid]
|
||||
else:
|
||||
nodes = deque(map(lambda node: node.id, conn.bare_metal.nodes()))
|
||||
|
||||
if state == "manage":
|
||||
cmd_base = "{} openstack baremetal node manage {}"
|
||||
for _ in range(len(nodes)):
|
||||
node = nodes.pop()
|
||||
node_obj = conn.bare_metal.get_node(node)
|
||||
if "manage" not in node_obj.provision_state:
|
||||
nodes.append(node)
|
||||
elif state == "provide":
|
||||
cmd_base = "{} openstack baremetal node provide {}"
|
||||
for _ in range(len(nodes)):
|
||||
node = nodes.pop()
|
||||
node_obj = conn.bare_metal.get_node(node)
|
||||
prov_state = node_obj.provision_state
|
||||
if prov_state is not None and "available" not in prov_state:
|
||||
nodes.append(node)
|
||||
elif state == "inspect":
|
||||
cmd_base = "{} openstack baremetal introspection start {}"
|
||||
elif state == "off":
|
||||
cmd_base = "{} openstack baremetal node power off {}"
|
||||
for _ in range(len(nodes)):
|
||||
node = nodes.pop()
|
||||
node_obj = conn.bare_metal.get_node(node)
|
||||
if "off" not in node_obj.power_state:
|
||||
nodes.append(node)
|
||||
elif state == "on":
|
||||
cmd_base = "{} openstack baremetal node power on {}"
|
||||
for _ in range(len(nodes)):
|
||||
node = nodes.pop()
|
||||
node_obj = conn.bare_metal.get_node(node)
|
||||
if "on" not in node_obj.power_state:
|
||||
nodes.append(node)
|
||||
elif state == "delete":
|
||||
cmd_base = "{} openstack baremetal node delete {}"
|
||||
else:
|
||||
self.logger.error("set_ironic_node_state() called with invalid state")
|
||||
exit(1)
|
||||
|
||||
for node in nodes:
|
||||
cmd = cmd_base.format(env_setup, node)
|
||||
self.tools.run_async_cmd(cmd)
|
||||
time.sleep(.5)
|
||||
|
||||
# Gathers metrics on the instack env import
|
||||
def import_instackenv(self, filepath, env_setup, conn):
|
||||
results = {}
|
||||
filepath = os.path.abspath(os.path.expandvars(filepath))
|
||||
cmd = "{} openstack overcloud node import {}".format(env_setup, filepath)
|
||||
start_time = datetime.datetime.utcnow()
|
||||
|
||||
out = self.tools.run_cmd(cmd)
|
||||
|
||||
nodes = conn.bare_metal.nodes()
|
||||
for node in nodes:
|
||||
while 'enroll' in node.provision_state:
|
||||
node = conn.bare_metal.get_node(node)
|
||||
time.sleep(1)
|
||||
|
||||
end_time = datetime.datetime.utcnow()
|
||||
results['import_time'] = (end_time - start_time).total_seconds()
|
||||
|
||||
if out['stderr'] == '' or 'Error' not in out['stderr']:
|
||||
results['import_status'] = "success"
|
||||
else:
|
||||
results['import_status'] = "failure"
|
||||
self.logger.error("Instackenv import returned 1, printing stderr")
|
||||
self.logger.error(out['stderr'])
|
||||
|
||||
return results
|
||||
|
||||
# Introspection with exactly the documented workflow
|
||||
def introspection_bulk(self, timeout, env_setup, conn):
|
||||
results = {}
|
||||
nodes = deque(map(lambda node: node.id, conn.bare_metal.nodes()))
|
||||
cmd = "{} openstack overcloud node introspect --all-manageable".format(env_setup)
|
||||
results['nodes'] = {}
|
||||
|
||||
for node in conn.bare_metal.nodes(details=True):
|
||||
results['nodes'][node.id] = {}
|
||||
results['nodes'][node.id]["last_error"] = node.last_error
|
||||
results['nodes'][node.id]["driver"] = node.driver
|
||||
results['nodes'][node.id]["driver_info"] = node.driver_info
|
||||
results['nodes'][node.id]["properties"] = node.properties
|
||||
results['nodes'][node.id]["failures"] = 0
|
||||
results['nodes'][node.id]["state_list"] = None
|
||||
|
||||
self.tools.run_async_cmd(cmd)
|
||||
|
||||
out = self.watch_introspecting_nodes(nodes, timeout, conn, results)
|
||||
|
||||
failed = out[0]
|
||||
results['raw'] = out[1]
|
||||
results["failure_count"] = len(failed)
|
||||
return results
|
||||
|
||||
def watch_introspecting_nodes(self, nodes, timeout, conn, results):
|
||||
start_time = datetime.datetime.utcnow()
|
||||
times = []
|
||||
timeout = datetime.timedelta(seconds=timeout)
|
||||
|
||||
while len(nodes):
|
||||
node = nodes.pop()
|
||||
# rate limit
|
||||
time.sleep(1)
|
||||
node_obj = conn.bare_metal.get_node(node)
|
||||
if node_obj is None:
|
||||
self.logger.error("Can't find node " + node +
|
||||
" Which existed at the start of introspection \
|
||||
did you delete it manually?")
|
||||
continue
|
||||
|
||||
# == works here for string comparison because they are in fact
|
||||
# the same object if not changed
|
||||
stored_properties = str(results['nodes'][node_obj.id]["properties"])
|
||||
node_properties = str(node_obj.properties)
|
||||
changed = not stored_properties == node_properties
|
||||
|
||||
powered_off = 'off' in node_obj.power_state
|
||||
not_cleaning = 'clean' not in node_obj.provision_state
|
||||
if changed and powered_off and not_cleaning:
|
||||
|
||||
results['nodes'][node_obj.id]["properties"] = node_obj.properties
|
||||
|
||||
results['nodes'][node_obj.id]["state_list"] = \
|
||||
self.state_tracker_extend(node_obj.provision_state,
|
||||
results['nodes'][node_obj.id]["state_list"])
|
||||
|
||||
times.append((datetime.datetime.utcnow() - start_time).total_seconds())
|
||||
|
||||
elif (datetime.datetime.utcnow() - start_time) > timeout:
|
||||
for node in nodes:
|
||||
node_obj = conn.bare_metal.get_node(node)
|
||||
|
||||
results['nodes'][node_obj.id]['failures'] += 1
|
||||
if results['nodes'][node_obj.id]['failures'] > 10:
|
||||
self.logger.error("Node "
|
||||
+ node_obj.id
|
||||
+ "has failed more than 10 introspections")
|
||||
self.logger.error("This probably means it's misconfigured, exiting")
|
||||
exit(1)
|
||||
|
||||
break
|
||||
else:
|
||||
results['nodes'][node_obj.id]["state_list"] = \
|
||||
self.state_tracker_extend(node_obj.provision_state,
|
||||
results['nodes'][node_obj.id]["state_list"])
|
||||
nodes.appendleft(node)
|
||||
|
||||
return (nodes, times)
|
||||
|
||||
# Introspection with robust failure handling
|
||||
def introspection_individual(self, batch_size, timeout, env_setup, conn):
|
||||
nodes = deque(map(lambda node: node.id, conn.bare_metal.nodes()))
|
||||
failure_count = 0
|
||||
batch = deque()
|
||||
results = {}
|
||||
results['raw'] = []
|
||||
results['nodes'] = {}
|
||||
|
||||
for node in conn.bare_metal.nodes(details=True):
|
||||
results['nodes'][node.id] = {}
|
||||
results['nodes'][node.id]["last_error"] = node.last_error
|
||||
results['nodes'][node.id]["driver"] = node.driver
|
||||
results['nodes'][node.id]["driver_info"] = node.driver_info
|
||||
results['nodes'][node.id]["properties"] = node.properties
|
||||
results['nodes'][node.id]["failures"] = 0
|
||||
results['nodes'][node.id]["state_list"] = None
|
||||
|
||||
while len(nodes):
|
||||
node = nodes.pop()
|
||||
self.set_ironic_node_state("inspect", env_setup, conn, node)
|
||||
batch.append(node)
|
||||
if len(batch) >= batch_size or (len(nodes) == 0 and len(batch) != 0):
|
||||
out = self.watch_introspecting_nodes(batch, timeout, conn, results)
|
||||
failed = out[0]
|
||||
results['raw'].extend(out[1])
|
||||
failure_count = failure_count + len(failed)
|
||||
nodes.extend(failed)
|
||||
batch.clear()
|
||||
|
||||
results["failure_count"] = failure_count
|
||||
return results
|
||||
|
||||
def delete_stack(self, conn):
|
||||
wait_time = 0
|
||||
# 30 minute timeout
|
||||
timeout = (60 * 30)
|
||||
while conn.orchestration.find_stack("overcloud") is not None:
|
||||
# Deletes can fail, so we just try again
|
||||
if wait_time % 2000 == 0:
|
||||
conn.orchestration.delete_stack("overcloud")
|
||||
time.sleep(5)
|
||||
wait_time += 5
|
||||
if wait_time > timeout:
|
||||
self.logger.error("Overcloud stack delete failed")
|
||||
exit(1)
|
||||
|
||||
def setup_nodes_dict(self, benchmark):
|
||||
nodes = {}
|
||||
for service in benchmark['cloud']:
|
||||
nodes[service['node']] = service['start_scale']
|
||||
nodes["previous_" + service['node']] = -1
|
||||
return nodes
|
||||
|
||||
def update_nodes_dict(self, benchmark, nodes, changed):
|
||||
# update settings for next round, note if changes are made
|
||||
step = benchmark['step']
|
||||
nodes_added = 0
|
||||
for service in benchmark['cloud']:
|
||||
node_type = service['node']
|
||||
end_scale = service['end_scale']
|
||||
nodes["previous_" + node_type] = nodes[node_type]
|
||||
if nodes[node_type] < end_scale:
|
||||
difference = end_scale - nodes[node_type]
|
||||
allowed_difference = step - nodes_added
|
||||
add = min(difference, allowed_difference)
|
||||
nodes[node_type] += add
|
||||
nodes_added += add
|
||||
changed = True
|
||||
|
||||
# edge cases, note we must round up otherwise we get
|
||||
# stuck forever if step is 1, this also means we must
|
||||
# violate the step rules to both ensure a valid deployment
|
||||
# and progression
|
||||
if 'control' in nodes and nodes['control'] == 2:
|
||||
nodes['control'] = 3
|
||||
if 'ceph' in nodes and nodes['ceph'] > 0 and nodes['ceph'] < 3:
|
||||
nodes['ceph'] = 3
|
||||
|
||||
return (nodes, changed)
|
||||
|
||||
def deploy_overcloud(self, start_time, results, ntp_server, conn, env_setup, benchmark):
|
||||
|
||||
if type(ntp_server) != str:
|
||||
self.logger.error("Please configure an NTP server!")
|
||||
exit(1)
|
||||
|
||||
cmd = env_setup + "openstack overcloud deploy --templates "
|
||||
for template in benchmark['templates']:
|
||||
cmd = cmd + " " + template + " "
|
||||
for service in benchmark['cloud']:
|
||||
cmd = cmd + " --" + service['node'] + "-scale " + str(results[service['node']])
|
||||
cmd = cmd + " --timeout=" + str(benchmark['timeout']) + " --ntp-server=" + str(ntp_server)
|
||||
|
||||
self.logger.debug("Openstack deployment command is " + cmd)
|
||||
results["overcloud_deploy_command"] = cmd
|
||||
deploy_process = self.tools.run_async_cmd(cmd)
|
||||
results['cleaning_failures'] = self.failed_cleaning_count(conn)
|
||||
results['nodes'] = {}
|
||||
|
||||
while deploy_process.poll() is None:
|
||||
time.sleep(5)
|
||||
try:
|
||||
for node in conn.compute.servers():
|
||||
time.sleep(1)
|
||||
|
||||
# look for new instances to add to our metadata
|
||||
if node.name not in results['nodes']:
|
||||
results['nodes'][node.name] = {}
|
||||
create_time = datetime.datetime.strptime(node.created_at,
|
||||
"%Y-%m-%dT%H:%M:%SZ")
|
||||
results['nodes'][node.name]['created_at'] = \
|
||||
(create_time - start_time).total_seconds()
|
||||
results['nodes'][node.name]['scheduler_hints'] = \
|
||||
node.scheduler_hints
|
||||
results['nodes'][node.name]['state_list'] = None
|
||||
|
||||
# try and figure out which baremetal node this
|
||||
# instance is scheduled on
|
||||
if 'bm_node' not in results['nodes'][node.name]:
|
||||
try:
|
||||
bm_node = next(conn.bare_metal.nodes(details=True,
|
||||
instance_id=node.id))
|
||||
results['nodes'][node.name]['bm_node'] = \
|
||||
bm_node.id
|
||||
results['nodes'][node.name]['bm_node_properties'] = \
|
||||
bm_node.properties
|
||||
results['nodes'][node.name]['bm_node_driver'] = \
|
||||
bm_node.driver
|
||||
results['nodes'][node.name]['bm_last_error'] = \
|
||||
bm_node.last_error
|
||||
except StopIteration:
|
||||
continue
|
||||
|
||||
update_time = datetime.datetime.strptime(node.updated_at,
|
||||
"%Y-%m-%dT%H:%M:%SZ")
|
||||
results['nodes'][node.name]['last_updated_at'] = \
|
||||
(update_time - start_time).total_seconds()
|
||||
results['nodes'][node.name]['final_status'] = node.status
|
||||
bm_node = next(conn.bare_metal.nodes(details=True,
|
||||
instance_id=node.id))
|
||||
state_list = results['nodes'][node.name]['state_list']
|
||||
state_list = \
|
||||
self.state_tracker_extend(bm_node.provision_state,
|
||||
state_list)
|
||||
|
||||
rentry = results['nodes'][node.name]
|
||||
# Populate this field so it gets indexed every time
|
||||
# even if nodes are never pingable
|
||||
rentry['ping_time'] = -1
|
||||
condition = 'private' in node.addresses
|
||||
condition = condition and 'pingable_at' not in rentry
|
||||
ping = self.tools.is_pingable(node.addresses['private'])
|
||||
condition = condition and ping
|
||||
if condition:
|
||||
ping_time = datetime.datetime.utcnow()
|
||||
rentry['ping_time'] = (ping_time - start_time).total_seconds()
|
||||
|
||||
except exceptions.HttpException:
|
||||
self.logger.error("OpenStack bare_metal API is returning NULL")
|
||||
self.logger.error("This sometimes happens during stack creates")
|
||||
return results
|
||||
|
||||
def elastic_insert(self, results, run, start_time, benchmark, results_dir):
|
||||
scenario_name = benchmark['name']
|
||||
results['action'] = scenario_name.strip()
|
||||
results['browbeat_rerun'] = run
|
||||
results['timestamp'] = str(start_time).replace(" ","T")
|
||||
results['grafana_url'] = self.grafana.grafana_urls()
|
||||
results['scenario'] = benchmark['name']
|
||||
results['scenario_config'] = benchmark
|
||||
|
||||
# Create list of objects for Elastic insertion rather than
|
||||
# dict of dicts. Insert key to not lose name data
|
||||
nodes_data = []
|
||||
for key in results['nodes']:
|
||||
results['nodes'][key]['name'] = key
|
||||
nodes_data.append(results['nodes'][key])
|
||||
results['nodes'] = nodes_data
|
||||
|
||||
results = self.elastic.combine_metadata(results)
|
||||
if not self.elastic.index_result(results, scenario_name, results_dir):
|
||||
self.update_index_failures()
|
||||
|
||||
def dump_scenario_json(self, results_dir, json, time):
|
||||
with open(results_dir + "/" + str(time).strip() + ".json", 'w') as outfile:
|
||||
outfile.write(json)
|
||||
|
||||
def setup_scenario(self, benchmark_name, dir_ts):
|
||||
results_dir = self.tools.create_results_dir(self.config['browbeat']['results'],
|
||||
dir_ts,
|
||||
benchmark_name,
|
||||
benchmark_name)
|
||||
|
||||
if type(results_dir) is bool:
|
||||
self.logger.error("Malformed Config, benchmark names must be unique!")
|
||||
exit(1)
|
||||
|
||||
self.logger.debug("Created result directory: {}".format(results_dir))
|
||||
workload = self.__class__.__name__
|
||||
self.workload_logger(results_dir, workload)
|
||||
return results_dir
|
||||
|
||||
def introspection_workload(self, benchmark, run, results_dir, env_setup, conn):
|
||||
self.delete_stack(conn)
|
||||
self.wait_for_clean(env_setup, conn)
|
||||
test_start = datetime.datetime.utcnow()
|
||||
|
||||
self.wait_for_clean(env_setup, conn)
|
||||
self.set_ironic_node_state("delete", env_setup, conn)
|
||||
while len(list(conn.bare_metal.nodes())) > 0:
|
||||
time.sleep(5)
|
||||
import_results = self.import_instackenv(benchmark['instackenv'], env_setup, conn)
|
||||
self.set_ironic_node_state("manage", env_setup, conn)
|
||||
self.set_ironic_node_state("off", env_setup, conn)
|
||||
|
||||
if benchmark['method'] == "individual":
|
||||
introspection_results = self.introspection_individual(benchmark['batch_size'],
|
||||
benchmark['timeout'],
|
||||
env_setup, conn)
|
||||
elif benchmark['method'] == "bulk":
|
||||
introspection_results = self.introspection_bulk(benchmark['timeout'], env_setup, conn)
|
||||
else:
|
||||
self.logger.error("Malformed YODA configuration for " + benchmark['name'])
|
||||
exit(1)
|
||||
|
||||
self.get_stats()
|
||||
|
||||
# Combines dicts but mutates introspection_results rather than
|
||||
# returning a new value
|
||||
import_results.update(introspection_results)
|
||||
results = import_results
|
||||
|
||||
results['total_nodes'] = len(list(map(lambda node: node.id, conn.bare_metal.nodes())))
|
||||
# If maximum failure precentage is not set, we set it to 10%
|
||||
if 'max_fail_amnt' not in benchmark:
|
||||
benchmark['max_fail_amnt'] = .10
|
||||
if results['failure_count'] >= results['total_nodes'] * benchmark['max_fail_amnt']:
|
||||
self.update_fail_tests()
|
||||
else:
|
||||
self.update_pass_tests()
|
||||
self.update_tests()
|
||||
|
||||
self.dump_scenario_json(results_dir, json.dumps(results), test_start)
|
||||
if self.config['elasticsearch']['enabled']:
|
||||
self.elastic_insert(results, run, test_start, benchmark, results_dir)
|
||||
|
||||
def overcloud_workload(self, benchmark, run, results_dir, env_setup, conn):
|
||||
if conn.orchestration.find_stack("overcloud") is None:
|
||||
self.set_ironic_node_state("provide", env_setup, conn)
|
||||
self.wait_for_clean(env_setup, conn)
|
||||
|
||||
keep_stack = benchmark['keep_stack']
|
||||
results = self.setup_nodes_dict(benchmark)
|
||||
changed = True
|
||||
while changed:
|
||||
|
||||
changed = False
|
||||
|
||||
# Can't scale from HA to non HA or back
|
||||
control_change = results['control'] != results['previous_control']
|
||||
if keep_stack and not control_change:
|
||||
results['method'] = "update"
|
||||
else:
|
||||
self.delete_stack(conn)
|
||||
self.wait_for_clean(env_setup, conn)
|
||||
results['method'] = "new"
|
||||
|
||||
start_time = datetime.datetime.utcnow()
|
||||
if 'node_pinning' in benchmark:
|
||||
if ostag is None:
|
||||
self.logger.error("ostag is not installed please run")
|
||||
self.logger.error(" pip install git+https://github.com/jkilpatr/ostag")
|
||||
self.logger.error("Pinning not used in this test!")
|
||||
elif benchmark['node_pinning']:
|
||||
ostag.clear_tags(conn)
|
||||
for node in benchmark['cloud']:
|
||||
ostag.mark_nodes("", node['node'], conn, False, "", node['end_scale'])
|
||||
else:
|
||||
ostag.clear_tags(conn)
|
||||
|
||||
results = self.deploy_overcloud(start_time, results,
|
||||
benchmark['ntp_server'],
|
||||
conn, env_setup,
|
||||
benchmark)
|
||||
|
||||
results['total_time'] = (datetime.datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
results['result'] = str(conn.orchestration.find_stack("overcloud").status)
|
||||
results['result_reason'] = str(conn.orchestration.find_stack("overcloud").status_reason)
|
||||
results['total_nodes'] = len(list(map(lambda node: node.id, conn.bare_metal.nodes())))
|
||||
if "COMPLETE" in results['result']:
|
||||
self.update_pass_tests()
|
||||
else:
|
||||
self.update_fail_tests()
|
||||
self.update_tests
|
||||
|
||||
self.get_stats()
|
||||
self.tools.gather_metadata()
|
||||
self.dump_scenario_json(results_dir, json.dumps(results), start_time)
|
||||
if self.config['elasticsearch']['enabled']:
|
||||
self.elastic_insert(results, run, start_time, benchmark, results_dir)
|
||||
|
||||
out = self.update_nodes_dict(benchmark, results, changed)
|
||||
results = out[0]
|
||||
changed = out[1]
|
||||
|
||||
def start_workloads(self):
|
||||
"""Iterates through all yoda scenarios in browbeat yaml config file"""
|
||||
self.logger.info("Starting YODA workloads")
|
||||
es_ts = datetime.datetime.utcnow()
|
||||
dir_ts = es_ts.strftime("%Y%m%d-%H%M%S")
|
||||
self.logger.debug("Time Stamp (Prefix): {}".format(dir_ts))
|
||||
|
||||
stackrc = self.config.get('yoda')['stackrc']
|
||||
venv = self.config.get('yoda')['venv']
|
||||
env_setup = "source {}; source {};".format(stackrc,venv)
|
||||
|
||||
auth_vars = self.tools.load_stackrc(stackrc)
|
||||
if 'OS_AUTH_URL' not in auth_vars:
|
||||
self.logger.error("Please make sure your stackrc is configured correctly")
|
||||
exit(1)
|
||||
|
||||
auth_args = {
|
||||
'auth_url': auth_vars['OS_AUTH_URL'],
|
||||
'project_name': 'admin',
|
||||
'username': auth_vars['OS_USERNAME'],
|
||||
'password': auth_vars['OS_PASSWORD'],
|
||||
'verify': False
|
||||
}
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
conn = connection.Connection(**auth_args)
|
||||
|
||||
instackenv = self.config.get('yoda')['instackenv']
|
||||
benchmarks = self.config.get('yoda')['benchmarks']
|
||||
if (benchmarks is not None and len(benchmarks) > 0):
|
||||
for benchmark in benchmarks:
|
||||
if benchmark['enabled']:
|
||||
|
||||
results_dir = self.setup_scenario(benchmark['name'], dir_ts)
|
||||
times = benchmark['times']
|
||||
if 'instackenv' not in benchmark:
|
||||
benchmark['instackenv'] = instackenv
|
||||
for rerun in range(self.config['browbeat']['rerun']):
|
||||
for run in range(times):
|
||||
self.update_tests()
|
||||
if benchmark['type'] == "overcloud":
|
||||
self.overcloud_workload(benchmark,
|
||||
run,
|
||||
results_dir,
|
||||
env_setup,
|
||||
conn)
|
||||
elif benchmark['type'] == "introspection":
|
||||
self.introspection_workload(benchmark,
|
||||
run,
|
||||
results_dir,
|
||||
env_setup,
|
||||
conn)
|
||||
else:
|
||||
self.logger.error("Could not identify YODA workload!")
|
||||
exit(1)
|
||||
self.update_scenarios()
|
||||
|
||||
else:
|
||||
self.logger.info(
|
||||
"Skipping {} benchmarks enabled: false".format(benchmark['name']))
|
||||
else:
|
||||
self.logger.error("Config file contains no yoda benchmarks.")
|
@ -281,3 +281,86 @@ mapping:
|
||||
file:
|
||||
type: str
|
||||
required: True
|
||||
|
||||
yoda:
|
||||
required: False
|
||||
type: map
|
||||
allowempty: True
|
||||
mapping:
|
||||
enabled:
|
||||
type: bool
|
||||
required: True
|
||||
instackenv:
|
||||
type: str
|
||||
required: True
|
||||
stackrc:
|
||||
type: str
|
||||
required: True
|
||||
venv:
|
||||
type: str
|
||||
required: True
|
||||
benchmarks:
|
||||
type: seq
|
||||
required: True
|
||||
sequence:
|
||||
- type: map
|
||||
mapping:
|
||||
name:
|
||||
type: str
|
||||
required: True
|
||||
type:
|
||||
type: str
|
||||
required: True
|
||||
enabled:
|
||||
required: True
|
||||
type: bool
|
||||
ntp_server:
|
||||
type: str
|
||||
required: False
|
||||
templates:
|
||||
type: seq
|
||||
required: False
|
||||
sequence:
|
||||
- type: str
|
||||
instackenv:
|
||||
type: str
|
||||
required: false
|
||||
times:
|
||||
type: int
|
||||
required: True
|
||||
step:
|
||||
type: int
|
||||
required: False
|
||||
method:
|
||||
type: str
|
||||
required: False
|
||||
timeout:
|
||||
type: int
|
||||
required: True
|
||||
max_fail_amnt:
|
||||
type: float
|
||||
required: False
|
||||
batch_size:
|
||||
type: int
|
||||
required: False
|
||||
keep_stack:
|
||||
type: bool
|
||||
required: False
|
||||
node_pinning:
|
||||
type: bool
|
||||
required: False
|
||||
cloud:
|
||||
type: seq
|
||||
sequence:
|
||||
- type: map
|
||||
allowempty: True
|
||||
mapping:
|
||||
node:
|
||||
type: str
|
||||
required: True
|
||||
start_scale:
|
||||
type: int
|
||||
required: True
|
||||
end_scale:
|
||||
type: int
|
||||
required: True
|
||||
|
@ -3,3 +3,5 @@ elasticsearch
|
||||
python-dateutil==2.4.2
|
||||
python-openstackclient==3.11.0
|
||||
pykwalify
|
||||
elasticsearch
|
||||
openstacksdk
|
||||
|
Loading…
Reference in New Issue
Block a user