Fixing replacement of /etc/hosts timeouts

During the restore operation, there is a span of time where we end up
without the /etc/hosts file and, when executing the playbooks remotely,
we reach the 12s timeout for command. This commit fixes it by ensuring
that /etc/hosts is available and with the bare minimum entries at all
times and also add a rescue operation to recover the previous /etc/hosts
file in case any task fails.

TEST PLAN
PASS remote play of the restore playbook
PASS local play of the restore playbook
PASS bootstrap newly installed system

Closes-Bug: #1986693
Signed-off-by: Thiago Brito <thiago.brito@windriver.com>
Change-Id: Iff8e56478339f660ec66e2d4f7cd8ad000b4d306
This commit is contained in:
Thiago Brito
2022-08-16 15:25:41 -03:00
parent a80d03e5af
commit aa75882544

View File

@@ -10,83 +10,100 @@
# Check host connectivity, change password if provided # Check host connectivity, change password if provided
- name: Remove existing /etc/hosts - name: Refresh local DNS
file: block:
path: /etc/hosts
state: absent
- block: - name: Moving existing /etc/hosts to backup and creating a new one with bare minimum entries
- name: Populate /etc/hosts copy:
lineinfile: dest: /etc/hosts
path: /etc/hosts backup: yes
line: "{{ item }}" # If this is not done, sudo operations might timeout. See bug #1986693
create: yes content: |
with_items: {{ localhost_name_ip_mapping }}
- "{{ localhost_name_ip_mapping }}" {{ derived_network_params.controller_0_address }} controller-0
- "{{ controller_floating_address }}\tcontroller" {{ derived_network_params.controller_1_address }} controller-1
# May not need this entry register: etc_hosts
- "{{ controller_0_cluster_host }}\tcontroller-0-infra"
- "{{ controller_pxeboot_floating_address }}\tpxecontroller"
- "{{ external_oam_floating_address }}\toamcontroller"
- "{{ derived_network_params.nfs_management_address_1 }}\tcontroller-platform-nfs"
- "{{ derived_network_params.controller_1_address }}\tcontroller-1"
- "{{ derived_network_params.controller_0_address }}\tcontroller-0"
# May not need this entry
- "{{ controller_1_cluster_host }}\tcontroller-1-infra"
- "{{ derived_network_params.nfs_management_address_2 }}\tcontroller-nfs"
- block: - block:
- name: Set central registry for subcloud - name: Populate /etc/hosts
set_fact: lineinfile:
# For virtual subcloud (StarlingX running in OpenStack Nova VM - QEMU/KVM), path: /etc/hosts
# central-cloud's local registry has to be exposed on the MGMT interface line: "{{ item }}"
# instead of the OAM interface as there is no physical OAM interface on create: yes
# subcloud to access central registry via OAM interface, so set with_items:
# "registry.central" to system controller's MGMT IP on subcloud to allow - "{{ controller_floating_address }}\tcontroller"
# subcloud pull images from central registry # May not need this entry
registry_central_address: - "{{ controller_0_cluster_host }}\tcontroller-0-infra"
"{{ system_controller_floating_address - "{{ controller_pxeboot_floating_address }}\tpxecontroller"
if (virtual_system is defined and virtual_system|bool) - "{{ external_oam_floating_address }}\toamcontroller"
else system_controller_oam_floating_address}}" - "{{ derived_network_params.nfs_management_address_1 }}\tcontroller-platform-nfs"
# May not need this entry
- "{{ controller_1_cluster_host }}\tcontroller-1-infra"
- "{{ derived_network_params.nfs_management_address_2 }}\tcontroller-nfs"
- name: Update /etc/hosts for subcloud - block:
lineinfile: - name: Set central registry for subcloud
path: /etc/hosts set_fact:
line: "{{ registry_central_address }}\tregistry.central" # For virtual subcloud (StarlingX running in OpenStack Nova VM - QEMU/KVM),
insertbefore: EOF # central-cloud's local registry has to be exposed on the MGMT interface
when: distributed_cloud_role == 'subcloud' # instead of the OAM interface as there is no physical OAM interface on
# subcloud to access central registry via OAM interface, so set
# "registry.central" to system controller's MGMT IP on subcloud to allow
# subcloud pull images from central registry
registry_central_address:
"{{ system_controller_floating_address
if (virtual_system is defined and virtual_system|bool)
else system_controller_oam_floating_address}}"
- name: Save hosts file to permanent location - name: Update /etc/hosts for subcloud
copy: lineinfile:
src: /etc/hosts path: /etc/hosts
dest: "{{ config_permdir }}" line: "{{ registry_central_address }}\tregistry.central"
remote_src: yes insertbefore: EOF
when: distributed_cloud_role == 'subcloud'
when: mode == 'bootstrap' - name: Save hosts file to permanent location
copy:
src: /etc/hosts
dest: "{{ config_permdir }}"
remote_src: yes
- block: when: mode == 'bootstrap'
- name: Restore /etc/hosts file
command: tar -C /etc -xpf {{ restore_data_file }} --transform='s,.*/,,' etc/hosts
args:
warn: false
- name: Restore hosts in config permdir - block:
command: >- - name: Restore /etc/hosts file
tar -C {{ config_permdir }} -xpf {{ restore_data_file }} --transform='s,.*/,,' command: tar -C /etc --overwrite -xpf {{ restore_data_file }} --transform='s,.*/,,' etc/hosts
{{ archive_config_permdir }}/hosts args:
args: warn: false
warn: false
- name: Temporary add central OAM as registry.central in /etc/hosts for subcloud restore - name: Restore hosts in config permdir
lineinfile: command: >-
path: /etc/hosts tar -C {{ config_permdir }} -xpf {{ restore_data_file }} --transform='s,.*/,,'
line: "{{ system_controller_oam_floating_address }}\tregistry.central" {{ archive_config_permdir }}/hosts
insertbefore: EOF args:
when: distributed_cloud_role == 'subcloud' warn: false
# Wei: I noticed that /etc/hosts and /opt/platform/config/<version>/hosts are always different - name: Temporary add central OAM as registry.central in /etc/hosts for subcloud restore
lineinfile:
path: /etc/hosts
line: "{{ system_controller_oam_floating_address }}\tregistry.central"
insertbefore: EOF
when: distributed_cloud_role == 'subcloud'
# Wei: I noticed that /etc/hosts and /opt/platform/config/<version>/hosts are always different
when: mode == 'restore'
rescue:
- name: Restore backed up /etc/hosts on failure
copy:
dest: /etc/hosts
src: etc_hosts.backup_file
remote_src: true
- name: Force a failure of playbook on /etc/hosts update failure
command: /bin/false
when: mode == 'restore'
- name: Update name service caching server - name: Update name service caching server
command: nscd -i hosts command: nscd -i hosts