From a2533dd8eef38bb4604298faf4f316c5b3f03e0e Mon Sep 17 00:00:00 2001 From: Lukas Bezdicka Date: Wed, 1 Dec 2021 14:01:59 +0100 Subject: [PATCH] [train-only][ffwd] Resolve OVNDB data loss during upgrade Due to possibility of new writes to db during Ctrl0 system upgrade we better copy the data from last master to Ctrl0 during the transfer data step. At the same time we resolve race in ovn-controller shut down where ovn-controller stops but keeps hanging. Resolves: rhbz#1942449 Change-Id: I209f1d4ed11ca23c163eb15a9f43b6e5a1238cad --- .../ovn/ovn-controller-container-puppet.yaml | 5 ++- deployment/ovn/ovn-dbs-pacemaker-puppet.yaml | 37 +++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/deployment/ovn/ovn-controller-container-puppet.yaml b/deployment/ovn/ovn-controller-container-puppet.yaml index 164be48749..eb1f15fb13 100644 --- a/deployment/ovn/ovn-controller-container-puppet.yaml +++ b/deployment/ovn/ovn-controller-container-puppet.yaml @@ -398,9 +398,10 @@ outputs: - name: Disable autorestart on ovn_controller container command: docker update --restart=no ovn_controller when: ovn_controller_running.rc == 0 - - name: Tell ovn_controller to clean up and stop + - name: Tell ovn_controller to clean up and stop but don't fail on it + failed_when: false shell: | - docker exec -u root ovn_controller bash -c "if [ -f /usr/bin/ovn-appctl ] ; then ovn-appctl -t ovn-controller exit ; else ovs-appctl -t ovn-controller exit ; fi" + docker exec -u root ovn_controller bash -c "if [ -f /usr/bin/ovn-appctl ] ; then ovn-appctl -t ovn-controller exit ; else ovs-appctl --timeout=30 -t ovn-controller exit ; fi" when: ovn_controller_running.rc == 0 # nova_hybrid_state - name: Gather missing facts diff --git a/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml b/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml index 3da8a9cfec..cfa7693ec3 100644 --- a/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml +++ b/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml @@ -80,6 +80,9 @@ parameters: type: string description: Specifies the default CA cert to use if TLS is used for services in the internal network. + OVNDBSUpgradeTransfer: + type: boolean + default: true OVNDBSPacemakerTimeout: description: timeout for monitor of ovn dbs resource in seconds type: number @@ -385,6 +388,40 @@ outputs: vars: tripleo_ha_wrapper_minor_update: true + external_upgrade_tasks: + - vars: + ovn_upgrade_transfer: {get_param: OVNDBSUpgradeTransfer} + when: + - step|int == 2 + - ovn_upgrade_transfer + tags: + - never + - system_upgrade_transfer_data + block: + - name: Check which ovndb was contacting master + become: true + delegate_to: "{{item}}" + with_items: + - "{{hostvars[groups['overcloud'][0]]['ovn_dbs_short_node_names'][1]}}" + - "{{hostvars[groups['overcloud'][0]]['ovn_dbs_short_node_names'][2]}}" + shell: | + grep $( hiera -c /etc/puppet/hiera.yaml ovn_dbs_vip ) /var/lib/openvswitch/ovn/ovnnb-active.conf + failed_when: false + register: ovnnb_active + - name: register transfer host + set_fact: + ovndbs_source_host: "{{item.item}}" + when: item.rc != 0 + with_items: "{{ovnnb_active.results}}" + - name: Transfer ovn data + include_role: + name: tripleo-transfer + vars: + tripleo_transfer_src_dir: /var/lib/openvswitch/ovn + tripleo_transfer_src_host: "{{ovndbs_source_host}}" + tripleo_transfer_dest_dir: /var/lib/openvswitch/ovn + tripleo_transfer_dest_host: "{{hostvars[groups['overcloud'][0]]['ovn_dbs_short_bootstrap_node_name']}}" + tripleo_transfer_flag_file: /var/lib/tripleo/transfer-flags/var-lib-openvswitch upgrade_tasks: - name: Prepare switch of ovn-dbs image name when: