Raoul Scarazzini 20bbd0f456 Fix the way iptables rules are managed
Today we use iptables-save to store the configuration related to the
firewall (we enable INPUT traffic for pacemaker remote on all the hosts)
but this method is not reliable since Neutron put and remove rules by
itself, and by using iptables-save we also store them.
This patch treat the single rule in both apply and undo playbooks so
that we don't touch anything that neutron might touch.

Change-Id: I6293d7a065f8e531de6218f272a8b08844b3eb42
2018-05-22 16:42:27 +02:00

390 lines
16 KiB
YAML

---
- name: Apply STONITH for compute nodes
include_role:
name: stonith-config
vars:
stonith_devices: "computes"
when:
- stonith_devices in ["all","computes"]
- name: Disable openstack-nova-compute on compute
service:
name: openstack-nova-compute
state: stopped
enabled: no
become: yes
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
when: release not in [ 'pike', 'rhos-12' ]
- name: Disable neutron-openvswitch-agent on compute
service:
name: neutron-openvswitch-agent
state: stopped
enabled: no
become: yes
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Disable openstack-ceilometer-compute on compute
service:
name: openstack-ceilometer-compute
state: stopped
enabled: no
become: yes
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Disable libvirtd on compute
become: yes
service:
name: libvirtd
state: stopped
enabled: no
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Generate authkey for remote pacemaker
shell: |
dd if=/dev/urandom of="/tmp/authkey" bs=4096 count=1
delegate_to: localhost
- name: Make sure pacemaker config dir exists
become: yes
file:
path: /etc/pacemaker
state: directory
mode: 0750
group: "haclient"
delegate_to: "{{ item }}"
with_items:
- "{{ groups['controller'] }}"
- "{{ groups['compute'] }}"
- name: Copy authkey on all the overcloud nodes
become: yes
copy:
src: /tmp/authkey
dest: /etc/pacemaker/authkey
mode: 0640
group: "haclient"
delegate_to: "{{ item }}"
with_items:
- "{{ groups['controller'] }}"
- "{{ groups['compute'] }}"
- name: Remove authkey from local dir
file:
path: /tmp/authkey
state: absent
delegate_to: localhost
- name: Enable iptables traffic for pacemaker_remote
become: yes
shell: |
iptables -A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT
delegate_to: "{{ item }}"
with_items:
- "{{ groups['controller'] }}"
- "{{ groups['compute'] }}"
- name: Make iptables pacemaker_remote rule permanent
become: yes
lineinfile:
path: /etc/sysconfig/iptables
line: "-A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT"
insertafter: ":OUTPUT ACCEPT"
delegate_to: "{{ item }}"
with_items:
- "{{ groups['controller'] }}"
- "{{ groups['compute'] }}"
- name: Start pacemaker remote service on compute nodes
become: yes
service:
name: pacemaker_remote
enabled: yes
state: started
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
- name: Get the name of the stack
shell: |
source {{ working_dir }}/stackrc
openstack stack list -f value -c 'Stack Name'
register: stack_name
- name: Check if a v3 overcloud's rc file exists
stat:
path: "{{ working_dir }}/{{ stack_name.stdout }}rc.v3"
register: v3_rc_file_stat
- name: Get the contents of the overcloud's rc file v3
set_fact:
overcloudrc: "{{ stack_name.stdout }}rc.v3"
when: v3_rc_file_stat.stat.exists
- name: Get the contents of the overcloud's rc file
set_fact:
overcloudrc: "{{ stack_name.stdout }}rc"
when: not v3_rc_file_stat.stat.exists
- block:
- name: Get OS_USERNAME from overcloudrc
shell: |
grep OS_USERNAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USERNAME=//g'
register: "OS_USERNAME"
- name: Get OS_PASSWORD from overcloudrc
shell: |
grep OS_PASSWORD {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PASSWORD=//g'
register: "OS_PASSWORD"
- name: Get OS_AUTH_URL from overcloudrc
shell: |
grep OS_AUTH_URL {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_AUTH_URL=//g'
register: "OS_AUTH_URL"
- name: Get OS_PROJECT_NAME or OS_TENANT_NAME from overcloudrc
shell: |
grep -E 'OS_PROJECT_NAME|OS_TENANT_NAME' {{ working_dir }}/{{ overcloudrc }} | tail -1 | sed 's/export OS_.*_NAME=//g'
register: "OS_TENANT_NAME"
- name: Get OS_USER_DOMAIN_NAME from overcloudrc
shell: |
grep OS_USER_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USER_DOMAIN_NAME=//g'
register: "OS_USER_DOMAIN_NAME"
when: v3_rc_file_stat.stat.exists
- name: Get OS_PROJECT_DOMAIN_NAME from overcloudrc
shell: |
grep OS_PROJECT_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PROJECT_DOMAIN_NAME=//g'
register: "OS_PROJECT_DOMAIN_NAME"
when: v3_rc_file_stat.stat.exists
- name: Define variable for pcs additional options for overcloud's rc file v3
set_fact:
pcs_v3_rc_file_opts: ""
- name: Define variable for pcs additional options for no_shared_storage
set_fact:
pcs_NovaEvacuate_no_shared_storage_opts: ""
pcs_fence_compute_no_shared_storage_opts: ""
- name: Set pcs additional options for overcloud's rc file v3
set_fact:
pcs_v3_rc_file_opts: "project_domain=$OS_PROJECT_DOMAIN_NAME user_domain=$OS_USER_DOMAIN_NAME"
when: v3_rc_file_stat.stat.exists
- name: Set pcs additional options for no_shared_storage
set_fact:
pcs_NovaEvacuate_no_shared_storage_opts: "no_shared_storage=1"
pcs_fence_compute_no_shared_storage_opts: "no-shared-storage=True"
when: not instance_ha_shared_storage|bool
- block:
- name: Create resource nova-evacuate
shell: |
pcs resource create nova-evacuate ocf:openstack:NovaEvacuate auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME {{ pcs_v3_rc_file_opts }} {{ pcs_NovaEvacuate_no_shared_storage_opts }} --force
- name: Create pacemaker constraint to start nova-evacuate only on non compute nodes
shell: |
pcs constraint location nova-evacuate rule resource-discovery=never score=-INFINITY osprole eq compute
- name: Create pacemaker constraints to start VIP resources before nova-evacuate
shell: |
for i in $(pcs status | grep IP | awk '{ print $1 }')
do pcs constraint order start $i then nova-evacuate
done
- name: Create pacemaker constraints to start openstack services before nova-evacuate
shell: "pcs constraint order start {{ item }} then nova-evacuate require-all=false"
with_items:
- openstack-glance-api-clone
- neutron-metadata-agent-clone
- openstack-nova-conductor-clone
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Disable keystone resource
shell: "pcs resource disable openstack-keystone --wait=900"
when: release in [ 'liberty', 'rhos-8' ]
# Keystone resource was replaced by openstack-core resource in RHOS9
- name: Disable openstack-core resource
shell: "pcs resource disable openstack-core --wait=900"
when: release in [ 'mitaka', 'rhos-9' ]
- name: Set controller pacemaker property on controllers
shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=controller"
with_items: "{{ groups['controller'] }}"
- name: Get stonith devices
shell: "pcs stonith | awk '{print $1}' | tr '\n' ' '"
register: stonithdevs
- name: Setup stonith devices
shell: |
for i in $(cibadmin -Q --xpath //primitive --node-path | awk -F "id='" '{print $2}' | awk -F "'" '{print $1}' | uniq); do
found=0
if [ -n "{{ stonithdevs.stdout }}" ]; then
for x in {{ stonithdevs.stdout }}; do
if [ "$x" == "$i" ]; then
found=1
fi
done
fi
if [ $found = 0 ]; then
pcs constraint location $i rule resource-discovery=exclusive score=0 osprole eq controller
fi
done
when: release not in [ 'pike', 'rhos-12' ]
- name: Create compute pacemaker resources and constraints
shell: |
pcs resource create nova-compute-checkevacuate ocf:openstack:nova-compute-wait auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain=localdomain op start timeout=300 --clone interleave=true --disabled --force
pcs constraint location nova-compute-checkevacuate-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs resource create nova-compute systemd:openstack-nova-compute op start timeout=60s --clone interleave=true --disabled --force
pcs constraint location nova-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs constraint order start nova-compute-checkevacuate-clone then nova-compute-clone require-all=true
pcs constraint order start nova-compute-clone then nova-evacuate require-all=false
when: release not in [ 'pike', 'rhos-12' ]
- name: Create compute pacemaker resources and constraints
shell: |
pcs resource create neutron-openvswitch-agent-compute systemd:neutron-openvswitch-agent --clone interleave=true --disabled --force
pcs constraint location neutron-openvswitch-agent-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs resource create libvirtd-compute systemd:libvirtd --clone interleave=true --disabled --force
pcs constraint location libvirtd-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs constraint order start neutron-openvswitch-agent-compute-clone then libvirtd-compute-clone
pcs constraint colocation add libvirtd-compute-clone with neutron-openvswitch-agent-compute-clone
pcs resource create ceilometer-compute systemd:openstack-ceilometer-compute --clone interleave=true --disabled --force
pcs constraint location ceilometer-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs constraint order start libvirtd-compute-clone then ceilometer-compute-clone
pcs constraint colocation add ceilometer-compute-clone with libvirtd-compute-clone
pcs constraint order start libvirtd-compute-clone then nova-compute-clone
pcs constraint colocation add nova-compute-clone with libvirtd-compute-clone
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Create pacemaker constraint for neutron-server, nova-conductor and ceilometer-notification
shell: |
pcs constraint order start neutron-server-clone then neutron-openvswitch-agent-compute-clone require-all=false
pcs constraint order start openstack-ceilometer-notification-clone then ceilometer-compute-clone require-all=false
pcs constraint order start openstack-nova-conductor-clone then nova-compute-checkevacuate-clone require-all=false
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Check if ipmi exists for all compute nodes
shell: |
pcs stonith show ipmilan-{{ hostvars[item]['ansible_hostname'] }}
with_items: "{{ groups['compute'] }}"
- name: Set requires to fencing as default for all resources (Pike/RHOS-12)
shell: "pcs resource defaults requires=fencing"
when: release in [ 'pike', 'rhos-12' ]
- name: Create fence-nova pacemaker resource (no shared storage)
shell: "pcs stonith create fence-nova fence_compute auth-url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant-name=$OS_TENANT_NAME domain=localdomain record-only=1 {{ pcs_fence_compute_no_shared_storage_opts }} --force"
when: release not in [ 'pike', 'rhos-12' ]
- name: Create fence-nova pacemaker resource (Pike/RHOS-12)
shell: "pcs stonith create fence-nova fence_compute auth-url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant-name=$OS_TENANT_NAME domain=localdomain record-only=1 {{ pcs_fence_compute_no_shared_storage_opts }} meta provides=unfencing --force"
when: release in [ 'pike', 'rhos-12' ]
- name: Create pacemaker constraint for fence-nova to fix it on controller node and set resource-discovery never
shell: "pcs constraint location fence-nova rule resource-discovery=never score=0 osprole eq controller"
- name: Create pacemaker constraint for fence-nova to start after galera
shell: "pcs constraint order promote galera-master then fence-nova require-all=false"
when: release not in [ 'pike', 'rhos-12' ]
- name: Create nova-compute order constraint on fence-nova
shell: "pcs constraint order start fence-nova then nova-compute-clone"
when: release not in [ 'pike', 'rhos-12' ]
- name: Set cluster recheck interval to 1 minute
shell: "pcs property set cluster-recheck-interval=1min"
- name: Create pacemaker remote resource on compute nodes
shell: "pcs resource create {{ hostvars[item]['ansible_hostname'] }} ocf:pacemaker:remote reconnect_interval=240 op monitor interval=20"
with_items: "{{ groups['compute'] }}"
- name: Set osprole for compute nodes
shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=compute"
with_items: "{{ groups['compute'] }}"
- name: Add pacemaker stonith devices of compute nodes to level 1
shell: "pcs stonith level add 1 {{ hostvars[item]['ansible_hostname'] }} ipmilan-{{ hostvars[item]['ansible_hostname'] }},fence-nova"
with_items: "{{ groups['compute'] }}"
- name: Enable keystone resource
shell: "pcs resource enable openstack-keystone"
when: release in [ 'liberty', 'rhos-8' ]
- name: Enable openstack-core resource
shell: "pcs resource enable openstack-core"
when: release in [ 'mitaka', 'rhos-9' ]
- name: Wait for httpd service to be started
shell: "systemctl show httpd --property=ActiveState"
register: httpd_status_result
until: httpd_status_result.stdout.find('inactive') == -1 and httpd_status_result.stdout.find('activating') == -1
retries: 30
delay: 10
when: release not in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Enable compute nodes resources (nova)
shell: "pcs resource enable {{ item }}"
with_items:
- nova-compute-checkevacuate
- nova-compute
when: release not in [ 'pike', 'rhos-12' ]
- name: Create compute unfence resource to override default resource requires (Pike/RHOS-12)
shell: |
pcs resource create compute-unfence-trigger ocf:pacemaker:Dummy op start requires="unfencing" --clone --disabled
pcs constraint location compute-unfence-trigger-clone rule resource-discovery=never score=-INFINITY osprole ne compute
pcs resource enable compute-unfence-trigger
when: release in [ 'pike', 'rhos-12' ]
- name: Enable compute nodes resources (others)
shell: "pcs resource enable {{ item }}"
with_items:
- neutron-openvswitch-agent-compute
- libvirtd-compute
- ceilometer-compute
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
environment:
OS_USERNAME: "{{ OS_USERNAME.stdout }}"
OS_PASSWORD: "{{ OS_PASSWORD.stdout }}"
OS_AUTH_URL: "{{ OS_AUTH_URL.stdout }}"
OS_TENANT_NAME: "{{ OS_TENANT_NAME.stdout }}"
OS_USER_DOMAIN_NAME: "{{ OS_USER_DOMAIN_NAME.stdout }}"
OS_PROJECT_DOMAIN_NAME: "{{ OS_PROJECT_DOMAIN_NAME.stdout }}"
become: yes
delegate_to: "{{ groups.controller[0] }}"
- name: Cleanup (if any) failed resources
shell: |
for resource in $(pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq)
do
pcs resource cleanup $resource
done
become: yes
delegate_to: "{{ groups.controller[0] }}"
- name: Wait for (if any) failed resources to recover
shell: pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
register: failed_resources
until: failed_resources.stdout != []
retries: 10
delay: 10
become: yes
delegate_to: "{{ groups.controller[0] }}"