tripleo-ha-utils/roles/instance-ha/tasks/apply.yml

---
- name: Apply STONITH for compute nodes
  include_role:
    name: stonith-config
  vars:
    stonith_devices: "computes"
  when:
    - stonith_devices in ["all","computes"]

- name: Disable openstack-nova-compute on compute
  service:
    name: openstack-nova-compute
    state: stopped
    enabled: no
  become: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release not in [ 'pike', 'rhos-12' ]

- name: Disable neutron-openvswitch-agent on compute
  service:
    name: neutron-openvswitch-agent
    state: stopped
    enabled: no
  become: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]

- name: Disable openstack-ceilometer-compute on compute
  service:
    name: openstack-ceilometer-compute
    state: stopped
    enabled: no
  become: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]

- name: Disable libvirtd on compute
  become: yes
  service:
    name: libvirtd
    state: stopped
    enabled: no
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]

- name: Generate authkey for remote pacemaker
  shell: |
    dd if=/dev/urandom of="/tmp/authkey" bs=4096 count=1
  delegate_to: localhost

- name: Make sure pacemaker config dir exists
  become: yes
  file:
     path: /etc/pacemaker
     state: directory
     mode: 0750
     group: "haclient"
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"

- name: Copy authkey on all the overcloud nodes
  become: yes
  copy:
     src: /tmp/authkey
     dest: /etc/pacemaker/authkey
     mode: 0640
     group: "haclient"
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"

- name: Remove authkey from local dir
  file:
     path: /tmp/authkey
     state: absent
  delegate_to: localhost

- name: Enable iptables traffic for pacemaker_remote
  become: yes
  shell: |
    iptables -A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"

- name: Make iptables pacemaker_remote rule permanent
  become: yes
  lineinfile:
    path: /etc/sysconfig/iptables
    line: "-A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT"
    insertafter: ":OUTPUT ACCEPT"
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"

- name: Start pacemaker remote service on compute nodes
  become: yes
  service:
    name: pacemaker_remote
    enabled: yes
    state: started
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"

- name: Get the name of the stack
  shell: |
    source {{ working_dir }}/stackrc
    openstack stack list -f value -c 'Stack Name'
  register: stack_name

- name: Check if a v3 overcloud's rc file exists
  stat:
    path: "{{ working_dir }}/{{ stack_name.stdout }}rc.v3"
  register: v3_rc_file_stat

- name: Get the contents of the overcloud's rc file v3
  set_fact:
    overcloudrc: "{{ stack_name.stdout }}rc.v3"
  when: v3_rc_file_stat.stat.exists

- name: Get the contents of the overcloud's rc file
  set_fact:
    overcloudrc: "{{ stack_name.stdout }}rc"
  when: not v3_rc_file_stat.stat.exists

- block:
  - name: Get OS_USERNAME from overcloudrc
    shell: |
       grep OS_USERNAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USERNAME=//g'
    register: "OS_USERNAME"

  - name: Get OS_PASSWORD from overcloudrc
    shell: |
       grep OS_PASSWORD {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PASSWORD=//g'
    register: "OS_PASSWORD"

  - name: Get OS_AUTH_URL from overcloudrc
    shell: |
       grep OS_AUTH_URL {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_AUTH_URL=//g'
    register: "OS_AUTH_URL"

  - name: Get OS_PROJECT_NAME or OS_TENANT_NAME from overcloudrc
    shell: |
       grep -E 'OS_PROJECT_NAME|OS_TENANT_NAME' {{ working_dir }}/{{ overcloudrc }} | tail -1 | sed 's/export OS_.*_NAME=//g'
    register: "OS_TENANT_NAME"

  - name: Get OS_USER_DOMAIN_NAME from overcloudrc
    shell: |
       grep OS_USER_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USER_DOMAIN_NAME=//g'
    register: "OS_USER_DOMAIN_NAME"
    when: v3_rc_file_stat.stat.exists

  - name: Get OS_PROJECT_DOMAIN_NAME from overcloudrc
    shell: |
       grep OS_PROJECT_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PROJECT_DOMAIN_NAME=//g'
    register: "OS_PROJECT_DOMAIN_NAME"
    when: v3_rc_file_stat.stat.exists

- name: Define variable for pcs additional options for overcloud's rc file v3
  set_fact:
    pcs_v3_rc_file_opts: ""

- name: Define variable for pcs additional options for no_shared_storage
  set_fact:
    pcs_NovaEvacuate_no_shared_storage_opts: ""
    pcs_fence_compute_no_shared_storage_opts: ""

- name: Set pcs additional options for overcloud's rc file v3
  set_fact:
    pcs_v3_rc_file_opts: "project_domain=$OS_PROJECT_DOMAIN_NAME user_domain=$OS_USER_DOMAIN_NAME"
  when: v3_rc_file_stat.stat.exists

- name: Set pcs additional options for no_shared_storage
  set_fact:
    pcs_NovaEvacuate_no_shared_storage_opts: "no_shared_storage=1"
    pcs_fence_compute_no_shared_storage_opts: "no-shared-storage=True"
  when: not instance_ha_shared_storage|bool

- block:
    - name: Create resource nova-evacuate
      shell: |
        pcs resource create nova-evacuate ocf:openstack:NovaEvacuate auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME {{ pcs_v3_rc_file_opts }} {{ pcs_NovaEvacuate_no_shared_storage_opts }} --force

    - name: Create pacemaker constraint to start nova-evacuate only on non compute nodes
      shell: |
        pcs constraint location nova-evacuate rule resource-discovery=never score=-INFINITY osprole eq compute

    - name: Create pacemaker constraints to start VIP resources before nova-evacuate
      shell: |
          for i in $(pcs status | grep IP | awk '{ print $1 }')
            do pcs constraint order start $i then nova-evacuate
          done

    - name: Create pacemaker constraints to start openstack services before nova-evacuate
      shell: "pcs constraint order start {{ item }} then nova-evacuate require-all=false"
      with_items:
          - openstack-glance-api-clone
          - neutron-metadata-agent-clone
          - openstack-nova-conductor-clone
      when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]

    - name: Disable keystone resource
      shell: "pcs resource disable openstack-keystone --wait=900"
      when: release in [ 'liberty', 'rhos-8' ]

    # Keystone resource was replaced by openstack-core resource in RHOS9
    - name: Disable openstack-core resource
      shell: "pcs resource disable openstack-core --wait=900"
      when: release in [ 'mitaka', 'rhos-9' ]

    - name: Set controller pacemaker property on controllers
      shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=controller"
      with_items: "{{ groups['controller'] }}"

    - name: Get stonith devices
      shell: "pcs stonith | awk '{print $1}' | tr '\n' ' '"
      register: stonithdevs

    - name: Setup stonith devices
      shell: |
          for i in $(cibadmin -Q --xpath //primitive --node-path | awk -F "id='" '{print $2}' | awk -F "'" '{print $1}' | uniq); do
            found=0
            if [ -n "{{ stonithdevs.stdout }}" ]; then
              for x in {{ stonithdevs.stdout }}; do
                if [ "$x" == "$i" ]; then
                  found=1
                fi
              done
            fi
            if [ $found = 0 ]; then
              pcs constraint location $i rule resource-discovery=exclusive score=0 osprole eq controller
            fi
          done
      when: release not in [ 'pike', 'rhos-12' ]

    - name: Create compute pacemaker resources and constraints
      shell: |
          pcs resource create nova-compute-checkevacuate ocf:openstack:nova-compute-wait auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain=localdomain op start timeout=300 --clone interleave=true --disabled --force
          pcs constraint location nova-compute-checkevacuate-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs resource create nova-compute systemd:openstack-nova-compute op start timeout=60s --clone interleave=true --disabled --force
          pcs constraint location nova-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs constraint order start nova-compute-checkevacuate-clone then nova-compute-clone require-all=true
          pcs constraint order start nova-compute-clone then nova-evacuate require-all=false
      when: release not in [ 'pike', 'rhos-12' ]

    - name: Create compute pacemaker resources and constraints
      shell: |
          pcs resource create neutron-openvswitch-agent-compute systemd:neutron-openvswitch-agent --clone interleave=true --disabled --force
          pcs constraint location neutron-openvswitch-agent-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs resource create libvirtd-compute systemd:libvirtd --clone interleave=true --disabled --force
          pcs constraint location libvirtd-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs constraint order start neutron-openvswitch-agent-compute-clone then libvirtd-compute-clone
          pcs constraint colocation add libvirtd-compute-clone with neutron-openvswitch-agent-compute-clone
          pcs resource create ceilometer-compute systemd:openstack-ceilometer-compute --clone interleave=true --disabled --force
          pcs constraint location ceilometer-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs constraint order start libvirtd-compute-clone then ceilometer-compute-clone
          pcs constraint colocation add ceilometer-compute-clone with libvirtd-compute-clone
          pcs constraint order start libvirtd-compute-clone then nova-compute-clone
          pcs constraint colocation add nova-compute-clone with libvirtd-compute-clone
      when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]

    - name: Create pacemaker constraint for neutron-server, nova-conductor and ceilometer-notification
      shell: |
          pcs constraint order start neutron-server-clone then neutron-openvswitch-agent-compute-clone require-all=false
          pcs constraint order start openstack-ceilometer-notification-clone then ceilometer-compute-clone require-all=false
          pcs constraint order start openstack-nova-conductor-clone then nova-compute-checkevacuate-clone require-all=false
      when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]

    - name: Check if ipmi exists for all compute nodes
      shell: |
          pcs stonith show ipmilan-{{ hostvars[item]['ansible_hostname'] }}
      with_items: "{{ groups['compute'] }}"

    - name: Set requires to fencing as default for all resources (Pike/RHOS-12)
      shell: "pcs resource defaults requires=fencing"
      when: release in [ 'pike', 'rhos-12' ]

    - name: Create fence-nova pacemaker resource (no shared storage)
      shell: "pcs stonith create fence-nova fence_compute auth-url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant-name=$OS_TENANT_NAME domain=localdomain record-only=1 {{ pcs_fence_compute_no_shared_storage_opts }} --force"
      when: release not in [ 'pike', 'rhos-12' ]

    - name: Create fence-nova pacemaker resource (Pike/RHOS-12)
      shell: "pcs stonith create fence-nova fence_compute auth-url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant-name=$OS_TENANT_NAME domain=localdomain record-only=1 {{ pcs_fence_compute_no_shared_storage_opts }} meta provides=unfencing --force"
      when: release in [ 'pike', 'rhos-12' ]

    - name: Create pacemaker constraint for fence-nova to fix it on controller node and set resource-discovery never
      shell: "pcs constraint location fence-nova rule resource-discovery=never score=0 osprole eq controller"

    - name: Create pacemaker constraint for fence-nova to start after galera
      shell: "pcs constraint order promote galera-master then fence-nova require-all=false"
      when: release not in [ 'pike', 'rhos-12' ]

    - name: Create nova-compute order constraint on fence-nova
      shell: "pcs constraint order start fence-nova then nova-compute-clone"
      when: release not in [ 'pike', 'rhos-12' ]

    - name: Set cluster recheck interval to 1 minute
      shell: "pcs property set cluster-recheck-interval=1min"

    - name: Create pacemaker remote resource on compute nodes
      shell: "pcs resource create {{ hostvars[item]['ansible_hostname'] }} ocf:pacemaker:remote reconnect_interval=240 op monitor interval=20"
      with_items: "{{ groups['compute'] }}"

    - name: Set osprole for compute nodes
      shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=compute"
      with_items: "{{ groups['compute'] }}"

    - name: Add pacemaker stonith devices of compute nodes to level 1
      shell: "pcs stonith level add 1 {{ hostvars[item]['ansible_hostname'] }} ipmilan-{{ hostvars[item]['ansible_hostname'] }},fence-nova"
      with_items: "{{ groups['compute'] }}"

    - name: Enable keystone resource
      shell: "pcs resource enable openstack-keystone"
      when: release in [ 'liberty', 'rhos-8' ]

    - name: Enable openstack-core resource
      shell: "pcs resource enable openstack-core"
      when: release in [ 'mitaka', 'rhos-9' ]

    - name: Wait for httpd service to be started
      shell: "systemctl show httpd --property=ActiveState"
      register: httpd_status_result
      until: httpd_status_result.stdout.find('inactive') == -1 and httpd_status_result.stdout.find('activating') == -1
      retries: 30
      delay: 10
      when: release not in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]

    - name: Enable compute nodes resources (nova)
      shell: "pcs resource enable {{ item }}"
      with_items:
          - nova-compute-checkevacuate
          - nova-compute
      when: release not in [ 'pike', 'rhos-12' ]

    - name: Create compute unfence resource to override default resource requires (Pike/RHOS-12)
      shell: |
        pcs resource create compute-unfence-trigger ocf:pacemaker:Dummy op start requires="unfencing" --clone --disabled
        pcs constraint location compute-unfence-trigger-clone rule resource-discovery=never score=-INFINITY osprole ne compute
        pcs resource enable compute-unfence-trigger
      when: release in [ 'pike', 'rhos-12' ]

    - name: Enable compute nodes resources (others)
      shell: "pcs resource enable {{ item }}"
      with_items:
          - neutron-openvswitch-agent-compute
          - libvirtd-compute
          - ceilometer-compute
      when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
  environment:
     OS_USERNAME: "{{ OS_USERNAME.stdout }}"
     OS_PASSWORD: "{{ OS_PASSWORD.stdout }}"
     OS_AUTH_URL: "{{ OS_AUTH_URL.stdout }}"
     OS_TENANT_NAME: "{{ OS_TENANT_NAME.stdout }}"
     OS_USER_DOMAIN_NAME: "{{ OS_USER_DOMAIN_NAME.stdout }}"
     OS_PROJECT_DOMAIN_NAME: "{{ OS_PROJECT_DOMAIN_NAME.stdout }}"
  become: yes
  delegate_to: "{{ groups.controller[0] }}"

- name: Cleanup (if any) failed resources
  shell: |
     for resource in $(pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq)
      do
       pcs resource cleanup $resource
      done
  become: yes
  delegate_to: "{{ groups.controller[0] }}"

- name: Wait for (if any) failed resources to recover
  shell: pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
  register: failed_resources
  until: failed_resources.stdout != []
  retries: 10
  delay: 10
  become: yes
  delegate_to: "{{ groups.controller[0] }}"