Merge role overcloud-scale

This commit is contained in:
Attila Darazs 2016-11-21 11:50:23 +01:00
commit dfc253e1e7
18 changed files with 740 additions and 0 deletions

View File

@ -0,0 +1,64 @@
---
control_memory: 6144
compute_memory: 6144
ceph_memory: 8192
undercloud_memory: 8192
undercloud_vcpu: 2
overcloud_nodes:
- name: control_0
flavor: control
- name: compute_0
flavor: compute
- name: ceph-storage_0
flavor: ceph
- name: ceph-storage_1
flavor: ceph
tempest: false
pingtest: true
deploy_timeout: 60
# General deployment info
libvirt_args: "--libvirt-type qemu"
flavor_args: >-
--control-flavor {{flavor_map.control
if flavor_map is defined and 'control' in flavor_map else 'oooq_control'}}
--compute-flavor {{flavor_map.compute
if flavor_map is defined and 'compute' in flavor_map else 'oooq_compute'}}
--ceph-storage-flavor {{flavor_map.ceph
if flavor_map is defined and 'ceph' in flavor_map else 'oooq_ceph'}}
timeout_args: "--timeout {{ deploy_timeout }}"
extra_args: "--ceph-storage-scale 1 --neutron-network-type vxlan --neutron-tunnel-types vxlan -e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml -e /usr/share/openstack-tripleo-heat-templates/environments/net-single-nic-with-vlans.yaml -e ~/network-environment.yaml --ntp-server pool.ntp.org"
# Pulled this out so we can hand these configs to the openstack overcloud node delete command
scale_extra_configs: "-e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml -e /usr/share/openstack-tripleo-heat-templates/environments/net-single-nic-with-vlans.yaml -e ~/network-environment.yaml"
scale_extra_args: "--{{ node_to_scale_deployment_arg }}-scale {{ final_scale_value }} --neutron-network-type vxlan --neutron-tunnel-types vxlan {{ scale_extra_configs }} --ntp-server pool.ntp.org"
# Scale deployment info
node_to_scale: ceph # Type of node to scale
node_to_scale_deployment_arg: ceph-storage # argument needed to scale node
initial_scale_value: 1 # Initial number of nodes to deploy
final_scale_value: 2 # Number of additional nodes to add during the scale
# Initial deployment arguments
deploy_args: >-
{{ libvirt_args }}
{{ flavor_args }}
{{ timeout_args }}
{{ extra_args }}
# Scale deployment arguments
scale_args: >-
{{ libvirt_args }}
{{ flavor_args }}
{{ timeout_args }}
{{ scale_extra_args }}
# Sample call
# ./deploy.sh -v --playbook scale_nodes --config-file config/scale/scale_ceph.yml

View File

@ -0,0 +1,88 @@
---
control_memory: 16384
control_vcpu: 4
overcloud_nodes:
- name: control_0
flavor: control
- name: compute_0
flavor: compute
- name: compute_1
flavor: compute
run_tempest: false
tempest_config: false
test_ping: true
enable_pacemaker: true
network_isolation: true
deploy_timeout: 60
# General deployment info
libvirt_args: "--libvirt-type qemu"
flavor_args: >-
--control-flavor {{flavor_map.control
if flavor_map is defined and 'control' in flavor_map else 'oooq_control'}}
--compute-flavor {{flavor_map.compute
if flavor_map is defined and 'compute' in flavor_map else 'oooq_compute'}}
timeout_args: "--timeout {{ deploy_timeout }}"
extra_args: "--compute-scale 1 --neutron-network-type vxlan --neutron-tunnel-types vxlan -e {{ overcloud_templates_path }}/environments/network-isolation.yaml -e {{ overcloud_templates_path }}/environments/net-single-nic-with-vlans.yaml -e ~/network-environment.yaml --ntp-server pool.ntp.org"
# Pulled this out so we can hand these configs to the openstack overcloud node delete command
scale_extra_configs: "-e {{ overcloud_templates_path }}/environments/network-isolation.yaml -e {{ overcloud_templates_path }}/environments/net-single-nic-with-vlans.yaml -e /home/stack/network-environment.yaml"
scale_extra_args: "--{{ node_to_scale }}-scale {{ final_scale_value }} --neutron-network-type vxlan --neutron-tunnel-types vxlan {{ scale_extra_configs }} --ntp-server pool.ntp.org"
# Scale deployment info
node_to_scale: compute # Type of node to scale
initial_scale_value: 1 # Initial number of nodes to deploy
final_scale_value: 2 # Number of additional nodes to add during the scale
# Scale deployment arguments
scale_args: >-
{{ libvirt_args }}
{{ flavor_args }}
{{ timeout_args }}
{{ pacemaker_args }}
{{ scale_extra_args }}
# options below direct automatic doc generation by tripleo-collect-logs
artcl_gen_docs: true
artcl_create_docs_payload:
included_deployment_scripts:
- undercloud-install
- overcloud-custom-tht-script
- overcloud-prep-flavors
- overcloud-prep-images
- overcloud-prep-network
- overcloud-deploy
- overcloud-deploy-post
- overcloud-validate
- scale-deployment
- delete-node
included_static_docs:
- env-setup-virt
table_of_contents:
- env-setup-virt
- undercloud-install
- overcloud-custom-tht-script
- overcloud-prep-flavors
- overcloud-prep-images
- overcloud-prep-network
- overcloud-deploy
- overcloud-deploy-post
- overcloud-validate
- scale-deployment
- delete-node
- overcloud-validate
# Path to tripleo-heat-templates
overcloud_templates_path: /usr/share/openstack-tripleo-heat-templates
# Sample call
# ./deploy.sh -v --playbook scale_nodes --config-file config/scale/scale_compute.yml

28
playbooks/scale_nodes.yml Normal file
View File

@ -0,0 +1,28 @@
---
################
# Deploy Nodes #
################
# From tripleo-quickstart/playbooks
- include: quickstart-extras.yml
###############
# Scale Nodes #
###############
# Scale nodes w/o delete
- name: Scale overcloud nodes
hosts: undercloud
roles:
- { role: tripleo-overcloud-scale, artosn_scale_nodes: true, artosn_delete_original_node: false }
# Delete the original node of type that was scaled - ensure overcloud validates after reducing scale
- name: Delete original node of type scaled
hosts: undercloud
roles:
- { role: tripleo-overcloud-scale, artosn_scale_nodes: false, artosn_delete_original_node: true }
# NOTE(hrybacki: inventory regeneration and overcloud validation must be completed in a second playbook. The
# deleted node is removed from the hosts file. However, it still exists in memory and will cause the
# 'ansible-role-tripleo-inventory: regenerate ssh config' task to fail when attempting to acces non-existant host vars

View File

@ -0,0 +1,21 @@
---
# NOTE(hrybacki: inventory regeneration and overcloud validation must be completed in a second playbook. The
# deleted node is removed from the hosts file. However, it still exists in memory and will cause the
# 'ansible-role-tripleo-inventory: regenerate ssh config' task to fail when attempting to acces non-existant host vars
# Re-inventory the overcloud
- name: Inventory the overcloud
hosts: undercloud
gather_facts: yes
vars:
inventory: all
roles:
- tripleo-inventory
# Validate the overcloud
- name: Validate the overcloud post-delete-node
hosts: undercloud
gather_facts: no
roles:
- tripleo-overcloud-validate

View File

@ -0,0 +1,99 @@
Role Name
=========
An Ansible role for scaling and deleting nodes from an overcloud.
Requirements
------------
This role assumes it will be executed against a host on which a Liberty or Mitaka under/overcloud have already been deployed.
**Note:** The ansible-role-tripleo-overcloud-validate role must be accessible.
Role Variables
--------------
A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well.
**Note:** Make sure to include all environment file and options from your [initial Overcloud creation](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux_OpenStack_Platform/7/html/Director_Installation_and_Usage/sect-Scaling_the_Overcloud.html#sect-Adding_Compute_or_Ceph_Storage_Nodes). This includes the same scale parameters for non-Compute nodes.
- artosn_scale_nodes: <true> -- boolean value that will scale nodes if true
- artosn_delete_original_node: <false> -- boolean value that will delete the original node of type that was scaled
- artosn_working_dir: <'/home/stack'> -- working directory for the role. Assumes stackrc file is present at this location
Dependencies
------------
1. [ansible-role-tripleo-overcloud-validate](https://github.com/redhat-openstack/ansible-role-tripleo-overcloud-validate)
Example Playbook
----------------
1. Sample playbook to call the role
- name: Scale overcloud nodes
hosts: undercloud
roles:
- ansible-role-tripleo-overcloud-scale-nodes
2. Sample config file to scale from one compute node to two compute nodes on the overcloud
control_memory: 6144
compute_memory: 6144
undercloud_memory: 8192
undercloud_vcpu: 2
overcloud_nodes:
- name: control_0
flavor: control
- name: compute_0
flavor: compute
- name: compute_1
flavor: compute
- name: compute_2
flavor: compute
tempest: false
pingtest: true
deploy_timeout: 60
# General deployment info
libvirt_args: "--libvirt-type qemu"
flavor_args: >-
--control-flavor {{flavor_map.control
if flavor_map is defined and 'control' in flavor_map else 'oooq_control'}}
--compute-flavor {{flavor_map.compute
if flavor_map is defined and 'compute' in flavor_map else 'oooq_compute'}}
--ceph-storage-flavor {{flavor_map.ceph
if flavor_map is defined and 'ceph' in flavor_map else 'oooq_ceph'}}
timeout_args: "--timeout {{ deploy_timeout }}"
# Pulled this out so we can hand these configs to the openstack overcloud node delete command
scale_extra_configs: "-e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml -e /usr/share/openstack-tripleo-heat-templates/environments/net-single-nic-with-vlans.yaml -e ~/network-environment.yaml"
scale_extra_args: "--{{ node_to_scale }}-scale {{ final_scale_value }} --neutron-network-type vxlan --neutron-tunnel-types vxlan {{ scale_extra_configs }} --ntp-server pool.ntp.org"
# Scale deployment info
node_to_scale: compute # Type of node to scale
initial_scale_value: 1 # Initial number of nodes to deploy
final_scale_value: 2 # Number of additional nodes to add during the scale
# Scale deployment arguments
scale_args: >-
{{ libvirt_args }}
{{ flavor_args }}
{{ timeout_args }}
{{ scale_extra_args }}
License
-------
Apache
Author Information
------------------
RDO-CI Team

View File

@ -0,0 +1,6 @@
---
# defaults file for ansible-role-tripleo-overcloud-scale-nodes
artosn_scale_nodes: true
artosn_delete_original_node: false
artosn_working_dir: /home/stack

View File

@ -0,0 +1,2 @@
---
# handlers file for ansible-role-tripleo-overcloud-scale-nodes

View File

@ -0,0 +1,173 @@
galaxy_info:
author: your name
description:
company: your company (optional)
# If the issue tracker for your role is not on github, uncomment the
# next line and provide a value
# issue_tracker_url: http://example.com/issue/tracker
# Some suggested licenses:
# - BSD (default)
# - MIT
# - GPLv2
# - GPLv3
# - Apache
# - CC-BY
license: license (GPLv2, CC-BY, etc)
min_ansible_version: 1.2
# Optionally specify the branch Galaxy will use when accessing the GitHub
# repo for this role. During role install, if no tags are available,
# Galaxy will use this branch. During import Galaxy will access files on
# this branch. If travis integration is cofigured, only notification for this
# branch will be accepted. Otherwise, in all cases, the repo's default branch
# (usually master) will be used.
#github_branch:
#
# Below are all platforms currently available. Just uncomment
# the ones that apply to your role. If you don't see your
# platform on this list, let us know and we'll get it added!
#
#platforms:
#- name: EL
# versions:
# - all
# - 5
# - 6
# - 7
#- name: GenericUNIX
# versions:
# - all
# - any
#- name: Solaris
# versions:
# - all
# - 10
# - 11.0
# - 11.1
# - 11.2
# - 11.3
#- name: Fedora
# versions:
# - all
# - 16
# - 17
# - 18
# - 19
# - 20
# - 21
# - 22
# - 23
#- name: opensuse
# versions:
# - all
# - 12.1
# - 12.2
# - 12.3
# - 13.1
# - 13.2
#- name: IOS
# versions:
# - all
# - any
#- name: SmartOS
# versions:
# - all
# - any
#- name: eos
# versions:
# - all
# - Any
#- name: Windows
# versions:
# - all
# - 2012R2
#- name: Amazon
# versions:
# - all
# - 2013.03
# - 2013.09
#- name: GenericBSD
# versions:
# - all
# - any
#- name: Junos
# versions:
# - all
# - any
#- name: FreeBSD
# versions:
# - all
# - 10.0
# - 10.1
# - 10.2
# - 8.0
# - 8.1
# - 8.2
# - 8.3
# - 8.4
# - 9.0
# - 9.1
# - 9.1
# - 9.2
# - 9.3
#- name: Ubuntu
# versions:
# - all
# - lucid
# - maverick
# - natty
# - oneiric
# - precise
# - quantal
# - raring
# - saucy
# - trusty
# - utopic
# - vivid
# - wily
# - xenial
#- name: SLES
# versions:
# - all
# - 10SP3
# - 10SP4
# - 11
# - 11SP1
# - 11SP2
# - 11SP3
#- name: GenericLinux
# versions:
# - all
# - any
#- name: NXOS
# versions:
# - all
# - any
#- name: Debian
# versions:
# - all
# - etch
# - jessie
# - lenny
# - sid
# - squeeze
# - stretch
# - wheezy
galaxy_tags: []
# List tags for your role here, one per line. A tag is
# a keyword that describes and categorizes the role.
# Users find roles by searching for tags. Be sure to
# remove the '[]' above if you add tags to this list.
#
# NOTE: A tag is limited to a single word comprised of
# alphanumeric characters. Maximum 20 tags per role.
dependencies: []
# List your role dependencies here, one per line.
# Be sure to remove the '[]' above if you add dependencies
# to this list.

View File

@ -0,0 +1,63 @@
---
# Delete the scaled node
- name: Check the overcloud heat stack-list state
shell: >
source "{{ artosn_working_dir }}"/stackrc;
heat stack-list
register: heat_stack_list_result
- name: Verify the overcloud is in a complete state
fail: msg='Overcloud heat stack is not in a complete state'
when: heat_stack_list_result.stdout.find('COMPLETE') == -1
- name: Register uuid of original "{{ node_to_scale}}" node
shell: >
source "{{ artosn_working_dir }}"/stackrc;
nova list | grep -m 1 "{{ node_to_scale }}" | sed -e 's/|//g' | awk '{print $1}'
register: node_id_to_delete
- name: Register the Name of the original "{{ node_to_scale }}" node
shell: >
source "{{ artosn_working_dir }}"/stackrc;
nova list | grep -m 1 "{{ node_to_scale }}" | sed -e 's/|//g' | awk '{print $2}'
register: node_name_to_delete
- name: Display node name to be deleted
debug: msg={{ node_name_to_delete.stdout }}
- name: Copy delete node script to undercloud
template:
src: delete-node.j2
dest: "{{ artosn_working_dir }}/delete-node.sh"
mode: 0755
- name: Delete node by id
shell: >
cat "{{ artosn_working_dir }}"/delete-node.sh;
"{{ artosn_working_dir }}"/delete-node.sh &> delete_node_scale_console.log;
# Verify the delete was successful
- name: Poll heat stack-list to determine when node delete is complete
shell: >
source "{{ artosn_working_dir }}"/stackrc;
heat stack-list
register: heat_stack_list_result
until: heat_stack_list_result.stdout.find("COMPLETE") != -1
retries: 20
delay: 90
- name: Determine the post scale node count
shell: >
source "{{ artosn_working_dir }}/stackrc";
nova list | grep "{{ node_to_scale }}" | cut -f2- -d':' | wc -l
register: post_scale_node_count
- name: Remove deleted hosts from the host file
shell: >
sed -i '/{{ node_name_to_delete.stdout }}/d' {{ local_working_dir }}/hosts
delegate_to: localhost
- name: Check that post delete node count is correct
fail: msg="Overcloud nova list does not show expected number of {{ node_to_scale }} services"
when: post_scale_node_count.stdout != "{{ initial_scale_value }}"

View File

@ -0,0 +1,18 @@
---
# tasks file for ansible-role-tripleo-overcloud-scale-nodes
- include: pre-scale.yml
when: artosn_scale_nodes
tags:
- pre-overcloud-scale-nodes
- include: scale-nodes.yml
when: artosn_scale_nodes
tags:
- overcloud-scale-nodes
# Optionally delete the original node of type scaled
- include: delete-original-node.yml
when: artosn_delete_original_node
tags:
- post-overcloud-scale-nodes-delete

View File

@ -0,0 +1,60 @@
---
# Prepare deployment args
- name: Enable pacemaker
set_fact:
pacemaker_args: >-
-e {{ overcloud_templates_path }}/environments/puppet-pacemaker.yaml
when: enable_pacemaker
- name: extract the number of controllers to be deployed
set_fact: number_of_controllers="{{ scale_args| regex_replace('^.*--control-scale +([0-9]+).*$', '\\1') | regex_replace('^[^ 0-9]*.*$', '1') }}"
- name: disable L3 HA
set_fact:
pacemaker_args: >-
{{ pacemaker_args }}
-e $HOME/neutronl3ha.yaml
when: enable_pacemaker and number_of_controllers|int < 3
# Prep for scaling overcloud
- name: Determine initial number of node(s) that will be scaled
shell: >
source "{{ artosn_working_dir }}/stackrc";
nova list | grep "{{ node_to_scale }}" | cut -f2- -d':' | wc -l
register: initial_node_count
- name: Register uuid of original "{{ node_to_scale}}" node
shell: >
source "{{ artosn_working_dir }}"/stackrc;
nova list | grep -m 1 "{{ node_to_scale }}" | sed -e 's/|//g' | awk '{print $1}'
register: node_id_to_delete
- name: Register the Name of the original "{{ node_to_scale }}" node
shell: >
source "{{ artosn_working_dir }}"/stackrc;
nova list | grep -m 1 "{{ node_to_scale }}" | sed -e 's/|//g' | awk '{print $2}'
register: node_name_to_delete
- name: Register pre-scale nova list
shell: >
source "{{ artosn_working_dir }}/stackrc";
nova list
register: pre_scale_nova_list
- name: Display pre-scale nova list
debug: msg={{ pre_scale_nova_list.stdout_lines }}
when: pre_scale_nova_list is defined
- name: Copy scale deployment template to undercloud
template:
src: scale-deployment.j2
dest: "{{ artosn_working_dir }}/scale-deployment.sh"
mode: 0755
- name: Copy neutron l3 ha heat template
when: enable_pacemaker and number_of_controllers|int < 3
template:
src: "neutronl3ha.yaml.j2"
dest: "{{ artosn_working_dir }}/neutronl3ha.yaml"
mode: 0755

View File

@ -0,0 +1,37 @@
---
# Do the scale
- name: Call scale deployment script
shell: >
source "{{ artosn_working_dir }}/stackrc";
"{{ artosn_working_dir }}"/scale-deployment.sh &> overcloud_deployment_scale_console.log;
- name: Poll heat stack-list to determine when node scale is complete
shell: >
source "{{ artosn_working_dir }}"/stackrc;
heat stack-list
register: heat_stack_list_result
until: heat_stack_list_result.stdout.find("COMPLETE") != -1
retries: 20
delay: 90
- name: Register post-scale nova list
shell: >
source "{{ artosn_working_dir }}/stackrc";
nova list
register: post_scale_nova_list
- name: Display post-scale nova list
debug: msg={{ post_scale_nova_list.stdout_lines }}
when: post_scale_nova_list is defined
# Verify the scale
- name: Determine the post scale node count
shell: >
source "{{ artosn_working_dir }}/stackrc";
nova list | grep "{{ node_to_scale }}" | cut -f2- -d':' | wc -l
register: post_scale_node_count
- name: Check that post scale node count is correct
fail: msg="Overcloud nova list does not show expected number of {{ node_to_scale }} services"
when: post_scale_node_count.stdout != "{{ final_scale_value }}"

View File

@ -0,0 +1,18 @@
#! /bin/bash
### --start_docs
## Delete the original {{ node_to_scale }} node
## ============================================
## * Source in the undercloud credentials
## ::
source ./stackrc
## * Delete the {{ node_to_scale }} node from the original deployment
## ::
openstack overcloud node delete --debug --stack overcloud --templates {{ overcloud_templates_path }} {{ scale_extra_configs }} {{ node_id_to_delete.stdout }}
### ---stop_docs

View File

@ -0,0 +1,6 @@
# Note: we need to disable the L3 HA for Neutron if we want to use pacemaker
# and only 1 controller.
# This particular use case is used with the upgrade CI workflow
parameter_defaults:
NeutronL3HA: false

View File

@ -0,0 +1,49 @@
#!/bin/bash
set -eux
### --start_docs
## Scale the overcloud
## ===================
## * Source in undercloud credentials.
## ::
source {{ artosn_working_dir }}/stackrc
### --stop_docs
# Wait until there are hypervisors available.
while true; do
count=$(openstack hypervisor stats show -c count -f value)
if [ $count -gt 0 ]; then
break
fi
done
deploy_status=0
### --start_docs
## * Scale the overcloud
## ::
openstack overcloud deploy --templates {{ overcloud_templates_path }} {{ scale_args }} \
${DEPLOY_ENV_YAML:+-e $DEPLOY_ENV_YAML} || deploy_status=1
### --stop_docs
# We don't always get a useful error code from the openstack deploy command,
# so check `heat stack-list` for a CREATE_FAILED status.
if heat stack-list | grep -q 'CREATE_FAILED'; then
deploy_status=1
for failed in $(heat resource-list \
--nested-depth 5 overcloud | grep FAILED |
grep 'StructuredDeployment ' | cut -d '|' -f3)
do heat deployment-show $failed > failed_deployment_$failed.log
done
fi
exit $deploy_status

View File

@ -0,0 +1 @@
localhost

View File

@ -0,0 +1,5 @@
---
- hosts: localhost
remote_user: root
roles:
- ansible-role-tripleo-overcloud-scale-nodes

View File

@ -0,0 +1,2 @@
---
# vars file for ansible-role-tripleo-overcloud-scale-nodes