--- # # Copyright (c) 2019 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # # SUB-TASKS DESCRIPTION: # These tasks perform following activities: # - verify if the installed load matches the backup load # - install patches if the system was patched # - reboot the controller if it is required by the patching # - block: - name: Create {{ restore_in_progress_flag }} flag file file: path: "{{ restore_in_progress_flag }}" state: touch # For remote play the backup tarball will be transferred to /scratch - block: # Check if the backup tarball already exists. If it is the second run # after the reboot, no need to transfer the backup tarball again. - name: Check if {{ backup_filename }} has been uploaded already stat: path: "/scratch/{{ backup_filename }}" register: check_backup_tarball - block: # TODO(wzhou): Considering to break backup tarball into multiple small tarfiles # During restore upload each small tarfile one at a time to restore a subfunction. # Because Ansible copy module uses ansible_remote_tmp directory as # a staging area to transfer file, the default ansible_remote_tmp # which is set in /tmp (1GB) may be too small for backup tarball, # we require user to set ansible_remote_tmp to a new directory in # /home/sysadmin via -e option on the command line. For example: # -e "ansible_remote_tmp=/home/sysadmin/ansible-restore" - name: Transfer backup tarball to /scratch on controller-0 copy: src: "{{ initial_backup_dir }}/{{ backup_filename }}" dest: /scratch owner: root group: root mode: 0644 # As an alternative to Ansible copy, synchronize module may be # used to transfer large files. But synchronize is broken in Ansible 2.8 # https://github.com/ansible/ansible/issues/56629. # - name: Transfer backup tarball to /scratch on controller-0 # synchronize: # src: "{{ initial_backup_dir }}/{{ backup_filename }}" # dest: "/scratch/{{ backup_filename }}" when: not check_backup_tarball.stat.exists - name: Set target_backup_dir to /scratch set_fact: target_backup_dir: /scratch when: inventory_hostname != "localhost" - name: For local play set target_backup_dir to initial_backup_dir set_fact: target_backup_dir: "{{ initial_backup_dir }}" when: inventory_hostname == "localhost" - name: Set fact for patching staging dir set_fact: patching_staging_dir: /scratch/patching - name: Create staging directory {{ patching_staging_dir }} for patch files file: path: "{{ patching_staging_dir }}" state: directory - block: - name: Get the checksum of the build.info file of the installed load stat: path: /etc/build.info get_checksum: yes register: installed_buildinfo_check - name: Retrieve build.info file from backup command: >- tar -C {{ patching_staging_dir }} -xpf {{ target_backup_dir }}/{{ backup_filename }} --transform='s,.*/,,' etc/build.info args: warn: false - name: Get the checksum of the build.info file from the backup stat: path: "{{ patching_staging_dir }}/build.info" get_checksum: yes register: backup_buildinfo_check - name: Fail if load version of backup does not match the version of the installed load fail: msg: "Load version of backup does not match the version of the installed load." when: installed_buildinfo_check.stat.checksum != backup_buildinfo_check.stat.checksum - name: Retrieve platform.conf file from the backup command: >- tar -C {{ patching_staging_dir }} -xpf {{ target_backup_dir }}/{{ backup_filename }} --transform='s,.*/,,' etc/platform/platform.conf args: warn: false - name: Get subfunction from the backup shell: grep -F 'subfunction' {{ patching_staging_dir }}/platform.conf register: backup_subfunc - name: Get subfunction set from backup platform.conf set_fact: backup_subfunc_set: "{{ backup_subfunc.stdout_lines[0].split('=')[1].split(',') }}" - name: Get subfunction from the installed load shell: grep -F 'subfunction' /etc/platform/platform.conf register: installed_subfunc - name: Get subfunction set from installed platform.conf set_fact: installed_subfunc_set: "{{ installed_subfunc.stdout_lines[0].split('=')[1].split(',') }}" - name: Check the difference between the two subfunction sets set_fact: diff_set: "{{ backup_subfunc_set | symmetric_difference(installed_subfunc_set) }}" - name: Fail if subfunction of backup does not match the subfunction of the installed load fail: msg: "Subfunction mismatch - backup: {{ backup_subfunc_set }}, installed: {{ installed_subfunc_set }}" when: diff_set != [] # Patching is potentially a multi-phase step. If the controller is impacted by patches from the # backup, it must be rebooted before continuing the restore. If restore_patching_complete_flag # file exists, it means it is the second run after the reboot. The restore and apply patching # block will be skipped. - name: Check if {{ restore_patching_complete_flag }} file exists stat: path: "{{ restore_patching_complete_flag }}" register: check_patching_complete # Restore and apply patching - block: - name: Strip the leading slash in dirname and assign it to a new variable set_fact: short_patching_permdir: "{{ patching_permdir | regex_replace('^\\/', '') }}" short_patching_repo_permdir: "{{ patching_repo_permdir | regex_replace('^\\/', '') }}" - name: Delete {{ patching_permdir }} dir if it exists file: path: "{{ patching_permdir }}" state: absent - name: Restore patching command: >- tar -C /opt -xpf {{ target_backup_dir }}/{{ backup_filename }} --strip-components=1 {{ short_patching_permdir }} args: warn: false - name: Delete {{ patching_repo_permdir }} dir if it exists file: path: "{{ patching_repo_permdir }}" state: absent - name: Restore patching repo command: >- tar -C /www/pages -xpf {{ target_backup_dir }}/{{ backup_filename }} --strip-components=2 {{ short_patching_repo_permdir }} args: warn: false - name: Apply patches command: sw-patch install-local args: warn: false - name: Create {{ restore_patching_complete_flag }} file file: path: "{{ restore_patching_complete_flag }}" state: touch # Check if the controller was impacted by patches - name: Check if {{ node_is_patched_flag }} file exists stat: path: "{{ node_is_patched_flag }}" register: check_node_is_patched # The controller was not impacted by patches. Reboot is not required. # However we need to restart the patch controller and agent, since # we setup the repo and patch store outside its control. - block: - name: Restart the patch controller and agent systemd: name: "{{ item }}" state: restarted with_items: - sw-patch-controller-daemon - sw-patch-agent when: not check_node_is_patched.stat.exists # The controller was impacted by patches. Reboot is required. - block: - name: Inform user that this controller will be rebooted debug: msg: >- This controller has been patched. A reboot will start. After reboot is completed, please re-run the playbook to restore the platform again. - name: Remove the {{ restore_in_progress_flag }} file file: path: "{{ restore_in_progress_flag }}" state: absent - name: Remove staging directory {{ patching_staging_dir }} for patch files file: path: "{{ patching_staging_dir }}" state: absent # For better control of the restore, we don't invoke Ansible # reboot module to reboot the node. We require user to re-run # the playbook to restore the platform after reboot is completed. # TODO(wzhou): Suport patching without re-run of the restore_platform # playbook by either invoking Ansible reboot module or defining reboot # as an async task. - name: Reboot the controller shell: sleep 5 && reboot failed_when: false - name: >- Define a variable to indicate that the play was ended due to required controller reboot set_fact: required_reboot: true - name: Trigger the play to end and do cleanup fail: msg: Trigger the play to end and do cleanup. when: check_node_is_patched.stat.exists when: not check_patching_complete.stat.exists # The restore_patching_complete_flag file is removed in the following two scenarios: # 1. This is the first run with no patches to apply. # 2. This is the second run after the node reboot due to patching. - name: Clear {{ restore_patching_complete_flag }} flag file file: path: "{{ restore_patching_complete_flag }}" state: absent - name: Remove staging directory {{ patching_staging_dir }} for patch files file: path: "{{ patching_staging_dir }}" state: absent rescue: - block: - name: Remove the {{ restore_in_progress_flag }} file file: path: "{{ restore_in_progress_flag }}" state: absent - name: Remove staging directory {{ patching_staging_dir }} for patch files file: path: "{{ patching_staging_dir }}" state: absent - name: Fail the platform restore fail: msg: Restore platform failed! when: required_reboot is not defined # This is inside bootstrap playbook. Invoking end_play will only end bootstrap. # The restore_platform playbook will continue to play which is not what we want. - name: Terminate the platform restore fail: msg: >- The restore is terminated due to required controller node reboot. Please re-run the playbook to restore the platform after reboot is completed. when: required_reboot become: yes become_user: root