From dd45ce71a9709ff50c218836f66d225123bacfc1 Mon Sep 17 00:00:00 2001 From: Michele Baldessari Date: Wed, 20 Oct 2021 09:01:29 +0200 Subject: [PATCH] Reauthenicate remotes after an upgrade to Train When upgrading from Queens to Train and changing major operating system as well, we currently take great care in puppet tripleo to let any remotes still running on Centos7 keep working while the control plane has moved to new Centos8 and hence new pcmk/pcs versions. Since pcs has changed how things authenticate with remotes, we need to make sure that after an upgrade we reauthenticate any remotes with pcs. Other wise any pcs operation involving a remote (scaleup) will fail with an error like the following: 021-10-14 18:57:16,332 p=133710 u=mistral n=ansible | fatal: [clopkhd1]: FAILED! => {"ansible_job_id": "823537932379.262448", "attempts": 126, "changed": true, "cmd": "set -o pipefail; puppet apply --modulepath=/etc/puppet/modules:/opt/stack/puppet-modules:/usr/share/openstack-puppet/modules --detailed-exitcodes --summarize --color=false /var/lib/tripleo-config/puppet_step_config.pp 2>&1 | logger -s -t puppet-user", "delta": "0:06:42.041400", "end": "2021-10-14 18:57:13.293758", "failed_when_result": true, "finished": 1, "msg": "non-zero return code", " ... ... Error: pcs create failed: Error: Hosts 'cmp1', 'cmp10', 'cmp11', 'cmp12', 'cmp14', 'cmp15', 'cmp16', 'cmp17', 'cmp18', 'cmp19', 'cmp2', 'cmp3', 'cmp4', 'cmp5', 'cmp6', 'cmp7', 'cmp8', 'cmp9' are not known to pcs, try to authenticate the hosts using 'pcs host auth cmp1 cmp10 cmp11 cmp12 cmp14 cmp15 cmp16 cmp17 cmp18 cmp19 cmp2 cmp3 cmp4 cmp5 cmp6 cmp7 cmp8 cmp9' command, use --skip-offline to override\n<13>Oct 14 18:56:54 puppet-user: Error: /Stage[main]/Tripleo::Profile::Base::Pacemaker/Pacemaker::Resource::Remote[cmp13]/Pcmk_remote[cmp13]/ensure: change from 'absent' to 'present' failed: pcs create failed: Error: Hosts 'cmp1', 'cmp10', 'cmp11', 'cmp12', 'cmp14', 'cmp15', 'cmp16', 'cmp17', 'cmp18', 'cmp19', 'cmp2', 'cmp3', 'cmp4', 'cmp5', 'cmp6', 'cmp7', 'cmp8', 'cmp9' are not known to pcs, try to authenticate the hosts using 'pcs host auth cmp1 cmp10 cmp11 cmp12 cmp14 cmp15 cmp16 cmp17 cmp18 cmp19 cmp2 cmp3 cmp4 cmp5 cmp6 cmp7 cmp8 cmp9' command, use --skip-offline to override\n<13>Oct 14 18:56:54 puppet-user: Notice: /Stage[main]/Tripleo::Profile::Base::Pacemaker/Exec[exec-wait-for-cmp13]: Dependency Pcmk_remote[cmp13] has failures: true This is because the upgrade remotes have not been reauthenticated to pcs (which basically means that the remote computes are not present in /var/lib/pcsd/known-hosts). With this change we observe the following during an FFU: 2021-10-29 21:12:20 | TASK [Try and reauthenticate the remote via pcsd from the core cluster] ******** 2021-10-29 21:12:20 | Friday 29 October 2021 21:12:14 +0000 (0:00:00.760) 0:00:15.098 ******** 2021-10-29 21:12:20 | changed: [compute-0 -> 192.168.24.20] => {"changed": true, "cmd": "pcs host auth \"compute-0\" -u hacluster -p $(hiera -c /etc/puppet/hiera.yaml hacluster_pwd)", "delta": "0:00:02.180656", "end": "2021-10-29 21:12:17.656863", "rc": 0, "start": "2021-10-29 21:12:15.476207", "stderr": "", "stderr_lines": [], "stdout": "compute-0: Authorized", "stdout_lines": ["compute-0: Authorized"]} And afterwards we see the node in /var/lib/pcsd/known-hosts (which we did not before): [root@controller-0 ~]# grep compute /var/lib/pcsd/known-hosts "compute-0": { "addr": "compute-0", Closes-Bug: #1949255 Change-Id: Ib105fedd014a46260cd3f2fa3e2e59ed0ffb730d --- deployment/pacemaker/pacemaker-remote-baremetal-puppet.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deployment/pacemaker/pacemaker-remote-baremetal-puppet.yaml b/deployment/pacemaker/pacemaker-remote-baremetal-puppet.yaml index 206563f57b..737f846906 100644 --- a/deployment/pacemaker/pacemaker-remote-baremetal-puppet.yaml +++ b/deployment/pacemaker/pacemaker-remote-baremetal-puppet.yaml @@ -180,3 +180,8 @@ outputs: loop: - pacemaker_remote_short_node_names_override - pacemaker_remote_node_ips_override + - name: Try and reauthenticate the remote via pcsd from the core cluster + when: step|int == 1 + delegate_to: "{{ groups['pacemaker'] | first }}" + shell: | + pcs host auth "{{ ansible_facts['hostname']|lower }}" -u hacluster -p $(hiera -c /etc/puppet/hiera.yaml hacluster_pwd)