Add Neutron L3 agent failover check during upgrade

This change adds suppport for validating that a router created on
the overcloud before upgrade remains active on the same node between
the various steps of the upgrade process and does not failover.

Change-Id: I1273367fb07bda5cdacb538081a87f9d3e5ece3c
(cherry picked from commit 71aaf58e6f)
This commit is contained in:
Marius Cornea 2018-04-06 15:51:37 -04:00
parent 9c3c90ca2f
commit aa0a0983b5
12 changed files with 80 additions and 6 deletions

View File

@ -129,6 +129,7 @@ l3_agent_connectivity_check_start_script: "{{ working_dir }}/l3_agent_start_ping
l3_agent_connectivity_check_stop_script: "{{ working_dir }}/l3_agent_stop_ping.sh"
l3_agent_connectivity_check_start_template: l3_agent_start_ping.sh.j2
l3_agent_connectivity_check_stop_template: l3_agent_stop_ping.sh.j2
l3_agent_failover_check: false
# migrate instances between compute nodes during upgrade
compute_evacuate: false

View File

@ -170,5 +170,10 @@
- name: Set overcloud ssh user name
set_fact:
overcloud_ssh_user: "{{ install.overcloud.ssh.user }}"
- name: Set upgrade l3 agent failover check
set_fact:
l3_agent_failover_check: true
when: install.upgrade.l3agent.failover.check
roles:
- tripleo-upgrade

View File

@ -98,6 +98,12 @@ subparsers:
help: |
Launch workload after major composable upgrade step
default: false
upgrade-l3agent-failover-check:
type: Bool
help: |
Check l3 agent does not failover during upgrade.
Existing neutron router is required.
default: false
- title: TripleO Update
options:
overcloud-update:

View File

@ -0,0 +1,13 @@
- block:
- name: create l3 agent failover check pre script
template:
src: "l3_agent_failover_pre.sh.j2"
dest: "{{ working_dir }}/l3_agent_failover_pre.sh"
mode: 0775
- name: create l3 agent failover check post scripts
template:
src: "l3_agent_failover_post.sh.j2"
dest: "{{ working_dir }}/l3_agent_failover_post.sh"
mode: 0775
when: l3_agent_failover_check|bool

View File

@ -0,0 +1,6 @@
---
- name: run l3 agent failover post script
shell: |
source {{ overcloud_rc }}
{{ working_dir }}/l3_agent_failover_post.sh
when: l3_agent_failover_check|bool

View File

@ -0,0 +1,6 @@
---
- name: run l3 agent failover pre script
shell: |
source {{ overcloud_rc }}
{{ working_dir }}/l3_agent_failover_pre.sh
when: l3_agent_failover_check|bool

View File

@ -90,3 +90,5 @@
- include: ../common/ssh_config_skip_host.yml
- include: ../common/create_l3_agent_connectivity_check_script.yml
- include: ../common/create_l3_agent_failover_check_script.yml

View File

@ -23,7 +23,10 @@
when: ffu_undercloud_upgrade|bool
- block:
- include: ../common/l3_agent_connectivity_check_start_script.yml
- include: "{{ item }}"
with_items:
- '../common/l3_agent_connectivity_check_start_script.yml'
- '../common/l3_agent_failover_check_pre_script.yml'
tags: ffu_update_stack_outputs
- name: apply pre ffu update stack outputs workarounds
@ -34,7 +37,10 @@
- include: ffu_update_stack_outputs.yaml
tags: ffu_update_stack_outputs
- include: ../common/l3_agent_connectivity_check_stop_script.yml
- include: "{{ item }}"
with_items:
- '../common/l3_agent_connectivity_check_stop_script.yml'
- '../common/l3_agent_failover_check_post_script.yml'
tags: ffu_update_stack_outputs
- name: apply pre ffu download config workarounds
@ -106,7 +112,10 @@
- include: ffu_upgrade_ceph.yaml
tags: ffu_upgrade_ceph
- include: ../common/l3_agent_connectivity_check_stop_script.yml
- include: "{{ item }}"
with_items:
- '../common/l3_agent_connectivity_check_stop_script.yml'
- '../common/l3_agent_failover_check_post_script.yml'
tags: ffu_upgrade_ceph
when: ceph_ansible|succeeded

View File

@ -46,5 +46,7 @@
command: "{{working_dir}}/ffu_deploy_steps_playbook_script.sh"
tags: ffu_deploy_steps_playbook
- include: ../common/l3_agent_connectivity_check_stop_script.yml
- include: "{{ item }}"
with_items:
- '../common/l3_agent_connectivity_check_stop_script.yml'
- '../common/l3_agent_failover_check_post_script.yml'

View File

@ -51,7 +51,10 @@
command: "{{working_dir}}//ffu_deploy_steps_playbook_script_non_Compute.sh"
tags: ffu_upgrade_non_compute
- include: ../common/l3_agent_connectivity_check_stop_script.yml
- include: "{{ item }}"
with_items:
- '../common/l3_agent_connectivity_check_stop_script.yml'
- '../common/l3_agent_failover_check_post_script.yml'
tags: ffu_upgrade_non_compute
- include: ../common/l3_agent_connectivity_check_start_script.yml

View File

@ -0,0 +1,14 @@
#!/bin/bash
#
# Script which validates that the active agent hosting a neutron router
# has not failed over during upgrade. Used with l3_agent_failover_check.
source {{ working_dir }}/router_active_agent.sh
ROUTER_HOST_POST=$(neutron l3-agent-list-hosting-router ${ROUTER_ID} -f json | jq -r -c '.[]|select(.ha_state=="active")|.host')
if [ "$ROUTER_HOST_PRE" != "$ROUTER_HOST_POST" ]; then
echo "Neutron router failover detected."
echo "Router $ROUTER_ID previously hosted by $ROUTER_HOST_PRE and now hosted by $ROUTER_HOST_POST"
exit 1
fi

View File

@ -0,0 +1,7 @@
#!/bin/bash
#
# Script which checks the active agent hosting a neutron router
FIRST_ROUTER=$(openstack router list -f json | jq -r -c '[.[]|select(.State=="UP")][0].ID')
echo export ROUTER_ID="${FIRST_ROUTER}" > {{ working_dir }}/router_active_agent.sh
echo export ROUTER_HOST_PRE=$(neutron l3-agent-list-hosting-router ${FIRST_ROUTER} -f json | jq -r -c '.[]|select(.ha_state=="active")|.host') >> {{ working_dir }}/router_active_agent.sh