From 5b21d92ef397ad756a2d6b750cb1de13483a5fcd Mon Sep 17 00:00:00 2001 From: jkilpatr <jkilpatr@redhat.com> Date: Thu, 15 Jun 2017 16:03:39 -0400 Subject: [PATCH] Yoda CI job config files This commit splits the Browbeat CI out by workload by passing in different playbooks, mostly just makes a different playbook for yoda because why deploy an overcloud before testing yoda? makes no sense and adds another couple of hours to the test. We also add an integration test playbook but that doesn't seriously diverge from the normal testing playbook at the moment Change-Id: Ic83412cac668cbf7d2fb6d8d86c2c86eb354c9dd --- ...baremetal-virt-undercloud-int-browbeat.yml | 24 +++ ...aremetal-virt-undercloud-yoda-browbeat.yml | 88 +++++++++++ .../oooq/roles/oooq-metadata/tasks/main.yml | 5 + .../oooq/roles/oooq-metadata/vars/main.yml | 1 + .../templates/browbeat-yoda-ci.yaml.j2 | 147 ++++++++++++++++++ .../oooq/roles/template-configs/vars/main.yml | 2 +- ci-scripts/tripleo/microbrow.sh | 1 - lib/Yoda.py | 48 ++++-- 8 files changed, 299 insertions(+), 17 deletions(-) create mode 100644 ansible/oooq/baremetal-virt-undercloud-int-browbeat.yml create mode 100644 ansible/oooq/baremetal-virt-undercloud-yoda-browbeat.yml create mode 100644 ansible/oooq/roles/template-configs/templates/browbeat-yoda-ci.yaml.j2 diff --git a/ansible/oooq/baremetal-virt-undercloud-int-browbeat.yml b/ansible/oooq/baremetal-virt-undercloud-int-browbeat.yml new file mode 100644 index 000000000..7a6b0affd --- /dev/null +++ b/ansible/oooq/baremetal-virt-undercloud-int-browbeat.yml @@ -0,0 +1,24 @@ +--- +# Browbeat integration test + +- include: baremetal-prep-virthost.yml + +- include: configure-browbeat.yml + +- include: undercloud-collectd.yml + +- include: baremetal-quickstart-extras.yml + +- include: install-browbeat.yml + +- include: disable-ssh-dns.yml + +- include: overcloud-collectd.yml + +- include: gather-metadata.yml + +- name: Run Browbeat + hosts: undercloud + roles: + - browbeat/browbeat-run + - browbeat/browbeat-classify diff --git a/ansible/oooq/baremetal-virt-undercloud-yoda-browbeat.yml b/ansible/oooq/baremetal-virt-undercloud-yoda-browbeat.yml new file mode 100644 index 000000000..761243ed1 --- /dev/null +++ b/ansible/oooq/baremetal-virt-undercloud-yoda-browbeat.yml @@ -0,0 +1,88 @@ +--- +- include: baremetal-prep-virthost.yml + +- name: Validate IPMI and instackenv.json + hosts: undercloud + gather_facts: yes + roles: + - validate-ipmi + +- name: Deploy the undercloud + hosts: undercloud + gather_facts: no + roles: + - undercloud-deploy + +- name: copy over config files + hosts: undercloud + gather_facts: no + roles: + - overcloud-prep-config + +- name: Perpare the baremetal overcloud + hosts: undercloud + gather_facts: yes + roles: + - baremetal-prep-overcloud + +- name: Prepare overcloud containers + hosts: undercloud + gather_facts: no + roles: + - { role: overcloud-prep-containers, when: containerized_overcloud|bool } + +- name: Prepare the overcloud images for deployment + hosts: undercloud + gather_facts: no + roles: + - overcloud-prep-images + +- name: Prepare overcloud flavors + hosts: undercloud + gather_facts: no + roles: + - overcloud-prep-flavors + +- name: Prepare the undercloud networks for the overcloud deployment + hosts: undercloud + gather_facts: no + roles: + - overcloud-prep-network + +- name: Prepare the SSL Configuration for the overcloud deployment + hosts: undercloud + gather_facts: no + roles: + - { role: overcloud-ssl, when: ssl_overcloud|bool } + +- name: Configure Browbeat + hosts: undercloud + roles: + - browbeat/pre-install-setup + - browbeat/oooq-metadata + +- name: Setup Undercloud CollectD + hosts: undercloud + vars: + config_type: "{{group_names[0]}}" + roles: + - browbeat/common + - browbeat/epel + - browbeat/collectd-openstack + +- name: Install Browbeat + hosts: undercloud + vars: + results_in_httpd: false + statsd_host: "{{ graphite_host }}" + roles: + - browbeat/common + - browbeat/browbeat + - browbeat/yoda + - browbeat/template-configs + - browbeat/statsd-ironic + +- name: Run Browbeat + hosts: undercloud + roles: + - browbeat/browbeat-run diff --git a/ansible/oooq/roles/oooq-metadata/tasks/main.yml b/ansible/oooq/roles/oooq-metadata/tasks/main.yml index 33eb7a630..591b82b3e 100644 --- a/ansible/oooq/roles/oooq-metadata/tasks/main.yml +++ b/ansible/oooq/roles/oooq-metadata/tasks/main.yml @@ -10,6 +10,7 @@ shell: docker ps | wc -l register: docker_ps delegate_to: overcloud-controller-0 + when: "'overcloud' in group_names" ignore_errors: True - name: Set var for container deployment @@ -24,6 +25,10 @@ when: docker_ps.stdout|int < 2 ignore_errors: True +- name: Count nodes in Instackenv.json + shell: "grep pm_addr {{instackenv}} | wc -l" + register: num_nodes + - name: Make sure the results directory exists file: "path={{ ansible_env.HOME }}/browbeat/metadata state=directory" diff --git a/ansible/oooq/roles/oooq-metadata/vars/main.yml b/ansible/oooq/roles/oooq-metadata/vars/main.yml index 7e58fa274..4a2a238be 100644 --- a/ansible/oooq/roles/oooq-metadata/vars/main.yml +++ b/ansible/oooq/roles/oooq-metadata/vars/main.yml @@ -1,3 +1,4 @@ dlrn_hash: "Not a pipeline build" rhos_puddle: "Not a pipeline build" logs_link: "https://thirdparty.logs.rdoproject.org/jenkins-{{ lookup('env','JOB_NAME') }}-{{ lookup('env','BUILD_NUMBER') }}/" +instackenv: "/home/stack/instackenv.json" diff --git a/ansible/oooq/roles/template-configs/templates/browbeat-yoda-ci.yaml.j2 b/ansible/oooq/roles/template-configs/templates/browbeat-yoda-ci.yaml.j2 new file mode 100644 index 000000000..4e1456ec4 --- /dev/null +++ b/ansible/oooq/roles/template-configs/templates/browbeat-yoda-ci.yaml.j2 @@ -0,0 +1,147 @@ +# Tests to be compleated for the install-and-check.sh script minimal and short workloads are performed +# to confirm functionality. +browbeat: + results : results/ + rerun: 1 + cloud_name: {{ browbeat_cloud_name }} +elasticsearch: + enabled: {{ elastic_enabled }} + host: {{ elastic_host }} + port: 9200 + regather: false + metadata_files: + - name: hardware-metadata + file: metadata/hardware-metadata.json + - name: environment-metadata + file: metadata/environment-metadata.json + - name: software-metadata + file: metadata/software-metadata.json + - name: version + file: metadata/version.json +ansible: + ssh_config: ansible/ssh-config + hosts: ansible/hosts + adjust: + keystone_token: ansible/browbeat/adjustment-keystone-token.yml + neutron_l3: ansible/browbeat/adjustment-l3.yml + nova_db: ansible/browbeat/adjustment-db.yml + workers: ansible/browbeat/adjustment-workers.yml + grafana_snapshot: ansible/browbeat/snapshot-general-performance-dashboard.yml + metadata: ansible/gather/site.yml +connmon: + enabled: {{ connmon_enabled }} + sudo: true +grafana: + enabled: {{ grafana_enabled }} + grafana_ip: {{ grafana_host }} + grafana_port: 3000 + dashboards: + - openstack-general-system-performance + snapshot: + enabled: false + snapshot_compute: false +yoda: + enabled: true + instackenv: "/home/stack/instackenv.json" + stackrc: "/home/stack/stackrc" + venv: "/home/stack/yoda-venv/bin/activate" + benchmarks: + - name: introspect-{{ overcloud_size }}-batch-5 + type: introspection + enabled: true + method: individual + times: 10 + timeout: 1800 + batch_size: 5 + - name: introspect-{{ overcloud_size }}-bulk + type: introspection + enabled: true + method: bulk + times: 10 + timeout: 1800 + - name: No-HA-Max-Compute-{{ overcloud_size }}-full-deploy + type: overcloud + ntp_server: pool.ntp.org + timeout: 600 + templates: + - "" + enabled: true + step: 5 + keep_stack: false + times: 2 + cloud: + - node: "compute" + start_scale: 1 + end_scale: {{ overcloud_size | int - 1 }} + - node: "control" + start_scale: 1 + end_scale: 1 + - name: No-HA-Max-Compute-{{ overcloud_size }}-stack-update + type: overcloud + ntp_server: pool.ntp.org + timeout: 600 + templates: + - "" + instackenv: "/home/stack/instackenv.json" + enabled: true + step: 5 + keep_stack: true + times: 2 + cloud: + - node: "compute" + start_scale: 1 + end_scale: {{ overcloud_size | int - 1 }} + - node: "control" + start_scale: 1 + end_scale: 1 + - name: HA-Max-Compute-{{ overcloud_size }}-full-deploy + type: overcloud + ntp_server: pool.ntp.org + timeout: 600 + templates: + - "" + enabled: true + step: 5 + keep_stack: false + times: 1 + cloud: + - node: "compute" + start_scale: 1 + end_scale: {{ overcloud_size | int - 3 }} + - node: "control" + start_scale: 3 + end_scale: 3 + - name: HA-Max-Compute-{{ overcloud_size }}-stack-update + type: overcloud + ntp_server: pool.ntp.org + timeout: 600 + templates: + - "" + enabled: true + step: 5 + keep_stack: true + times: 1 + cloud: + - node: "compute" + start_scale: 1 + end_scale: {{ overcloud_size | int - 3 }} + - node: "control" + start_scale: 3 + end_scale: 3 + - name: HA-Max-Compute-{{ overcloud_size }}-stack-update + type: overcloud + ntp_server: pool.ntp.org + timeout: 600 + templates: + - "" + enabled: true + step: 5 + keep_stack: true + times: 1 + cloud: + - node: "compute" + start_scale: 1 + end_scale: {{ overcloud_size | int - 3 }} + - node: "control" + start_scale: 3 + end_scale: 3 diff --git a/ansible/oooq/roles/template-configs/vars/main.yml b/ansible/oooq/roles/template-configs/vars/main.yml index 73929f4bc..b9fcd6010 100644 --- a/ansible/oooq/roles/template-configs/vars/main.yml +++ b/ansible/oooq/roles/template-configs/vars/main.yml @@ -5,5 +5,5 @@ grafana_enabled: false grafana_host: "1.2.3.4.5" browbeat_config_file: "browbeat-basic.yaml.j2" browbeat_cloud_name: "browbeat_ci" -overcloud_size: "{{ groups['overcloud'] | length }}" +overcloud_size: "{{num_nodes.stdout|int}}" ntp_server: "pool.ntp.org" diff --git a/ci-scripts/tripleo/microbrow.sh b/ci-scripts/tripleo/microbrow.sh index 8fa015b1e..51ab66af9 100755 --- a/ci-scripts/tripleo/microbrow.sh +++ b/ci-scripts/tripleo/microbrow.sh @@ -10,7 +10,6 @@ export OPT_DEBUG_ANSIBLE=0 export HW_ENV_DIR=$WORKSPACE/tripleo-environments/hardware_environments/$HW_ENV export NETWORK_ISOLATION=no_vlan export REQS=quickstart-extras-requirements.txt -export PLAYBOOK=baremetal-virt-undercloud-tripleo-browbeat.yml export VARS="elastic_enabled=true \ --extra-vars grafana_enabled=true \ --extra-vars elastic_host=$ELASTIC_HOST \ diff --git a/lib/Yoda.py b/lib/Yoda.py index 75b7913ed..0954fe6c5 100644 --- a/lib/Yoda.py +++ b/lib/Yoda.py @@ -227,8 +227,13 @@ class Yoda(WorkloadBase.WorkloadBase): while len(nodes): node = nodes.pop() # rate limit - time.sleep(1) - node_obj = conn.bare_metal.get_node(node) + time.sleep(10) + try: + node_obj = conn.bare_metal.get_node(node) + except exceptions.SDKException: + self.logger.error("Ironic endpoint is down, retrying in 10 seconds") + time.sleep(10) + continue if node_obj is None: self.logger.error("Can't find node " + node + " Which existed at the start of introspection \ @@ -311,15 +316,21 @@ class Yoda(WorkloadBase.WorkloadBase): wait_time = 0 # 30 minute timeout timeout = (60 * 30) - while conn.orchestration.find_stack("overcloud") is not None: - # Deletes can fail, so we just try again - if wait_time % 2000 == 0: - conn.orchestration.delete_stack("overcloud") - time.sleep(5) - wait_time += 5 - if wait_time > timeout: - self.logger.error("Overcloud stack delete failed") + try: + while conn.orchestration.find_stack("overcloud") is not None: + # Deletes can fail, so we just try again + if wait_time % 2000 == 0: + conn.orchestration.delete_stack("overcloud") + time.sleep(10) + wait_time += 10 + if wait_time > timeout: + self.logger.error("Overcloud stack delete failed") exit(1) + except exceptions.SDKException: + # Recursion is probably the wrong way to handle this + self.logger.error("Heat failure during overcloud delete, retrying") + time.sleep(10) + self.delete_stack(conn) def setup_nodes_dict(self, benchmark): nodes = {} @@ -561,9 +572,17 @@ class Yoda(WorkloadBase.WorkloadBase): benchmark) results['total_time'] = (datetime.datetime.utcnow() - start_time).total_seconds() + try: + stack_status = conn.orchestration.find_stack("overcloud") + except exceptions.SDKException: + self.logger.error("Heat endpoint failed to respond, waiting 10 seconds") + time.sleep(10) + continue + if stack_status is None: + continue + results['result'] = str(stack_status.status) + results['result_reason'] = str(stack_status.status_reason) - results['result'] = str(conn.orchestration.find_stack("overcloud").status) - results['result_reason'] = str(conn.orchestration.find_stack("overcloud").status_reason) results['total_nodes'] = len(list(map(lambda node: node.id, conn.bare_metal.nodes()))) if "COMPLETE" in results['result']: self.update_pass_tests() @@ -589,8 +608,8 @@ class Yoda(WorkloadBase.WorkloadBase): self.logger.debug("Time Stamp (Prefix): {}".format(dir_ts)) stackrc = self.config.get('yoda')['stackrc'] - venv = self.config.get('yoda')['venv'] - env_setup = "source {}; source {};".format(stackrc,venv) + # venv = self.config.get('yoda')['venv'] + env_setup = "source {};".format(stackrc) auth_vars = self.tools.load_stackrc(stackrc) if 'OS_AUTH_URL' not in auth_vars: @@ -619,7 +638,6 @@ class Yoda(WorkloadBase.WorkloadBase): benchmark['instackenv'] = instackenv for rerun in range(self.config['browbeat']['rerun']): for run in range(times): - self.update_tests() if benchmark['type'] == "overcloud": self.overcloud_workload(benchmark, run,