285 lines
10 KiB
YAML
285 lines
10 KiB
YAML
---
|
|
#
|
|
# Copyright (c) 2019 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# SUB-TASKS DESCRIPTION:
|
|
# These tasks perform following activities:
|
|
# - verify if the installed load matches the backup load
|
|
# - install patches if the system was patched
|
|
# - reboot the controller if it is required by the patching
|
|
#
|
|
- block:
|
|
- name: Create {{ restore_in_progress_flag }} flag file
|
|
file:
|
|
path: "{{ restore_in_progress_flag }}"
|
|
state: touch
|
|
|
|
# For remote play the backup tarball will be transferred to /scratch
|
|
- block:
|
|
# Check if the backup tarball already exists. If it is the second run
|
|
# after the reboot, no need to transfer the backup tarball again.
|
|
- name: Check if {{ backup_filename }} has been uploaded already
|
|
stat:
|
|
path: "/scratch/{{ backup_filename }}"
|
|
register: check_backup_tarball
|
|
|
|
- block:
|
|
# TODO(wzhou): Considering to break backup tarball into multiple small tarfiles
|
|
# During restore upload each small tarfile one at a time to restore a subfunction.
|
|
|
|
# Because Ansible copy module uses ansible_remote_tmp directory as
|
|
# a staging area to transfer file, the default ansible_remote_tmp
|
|
# which is set in /tmp (1GB) may be too small for backup tarball,
|
|
# we require user to set ansible_remote_tmp to a new directory in
|
|
# /home/sysadmin via -e option on the command line. For example:
|
|
# -e "ansible_remote_tmp=/home/sysadmin/ansible-restore"
|
|
- name: Transfer backup tarball to /scratch on controller-0
|
|
copy:
|
|
src: "{{ initial_backup_dir }}/{{ backup_filename }}"
|
|
dest: /scratch
|
|
owner: root
|
|
group: root
|
|
mode: 0644
|
|
|
|
# As an alternative to Ansible copy, synchronize module may be
|
|
# used to transfer large files. But synchronize is broken in Ansible 2.8
|
|
# https://github.com/ansible/ansible/issues/56629.
|
|
# - name: Transfer backup tarball to /scratch on controller-0
|
|
# synchronize:
|
|
# src: "{{ initial_backup_dir }}/{{ backup_filename }}"
|
|
# dest: "/scratch/{{ backup_filename }}"
|
|
|
|
when: not check_backup_tarball.stat.exists
|
|
|
|
- name: Set target_backup_dir to /scratch
|
|
set_fact:
|
|
target_backup_dir: /scratch
|
|
|
|
when: inventory_hostname != "localhost"
|
|
|
|
- name: For local play set target_backup_dir to initial_backup_dir
|
|
set_fact:
|
|
target_backup_dir: "{{ initial_backup_dir }}"
|
|
when: inventory_hostname == "localhost"
|
|
|
|
- name: Set fact for patching staging dir
|
|
set_fact:
|
|
patching_staging_dir: /scratch/patching
|
|
|
|
- name: Create staging directory {{ patching_staging_dir }} for patch files
|
|
file:
|
|
path: "{{ patching_staging_dir }}"
|
|
state: directory
|
|
|
|
- block:
|
|
- name: Get the checksum of the build.info file of the installed load
|
|
stat:
|
|
path: /etc/build.info
|
|
get_checksum: yes
|
|
register: installed_buildinfo_check
|
|
|
|
- name: Retrieve build.info file from backup
|
|
command: >-
|
|
tar -C {{ patching_staging_dir }} -xpf {{ target_backup_dir }}/{{ backup_filename }} --transform='s,.*/,,'
|
|
etc/build.info
|
|
args:
|
|
warn: false
|
|
|
|
- name: Get the checksum of the build.info file from the backup
|
|
stat:
|
|
path: "{{ patching_staging_dir }}/build.info"
|
|
get_checksum: yes
|
|
register: backup_buildinfo_check
|
|
|
|
- name: Fail if load version of backup does not match the version of the installed load
|
|
fail:
|
|
msg: "Load version of backup does not match the version of the installed load."
|
|
when: installed_buildinfo_check.stat.checksum != backup_buildinfo_check.stat.checksum
|
|
|
|
- name: Retrieve platform.conf file from the backup
|
|
command: >-
|
|
tar -C {{ patching_staging_dir }} -xpf {{ target_backup_dir }}/{{ backup_filename }} --transform='s,.*/,,'
|
|
etc/platform/platform.conf
|
|
args:
|
|
warn: false
|
|
|
|
- name: Get subfunction from the backup
|
|
shell: grep -F 'subfunction' {{ patching_staging_dir }}/platform.conf
|
|
register: backup_subfunc
|
|
|
|
- name: Get subfunction set from backup platform.conf
|
|
set_fact:
|
|
backup_subfunc_set: "{{ backup_subfunc.stdout_lines[0].split('=')[1].split(',') }}"
|
|
|
|
- name: Get subfunction from the installed load
|
|
shell: grep -F 'subfunction' /etc/platform/platform.conf
|
|
register: installed_subfunc
|
|
|
|
- name: Get subfunction set from installed platform.conf
|
|
set_fact:
|
|
installed_subfunc_set: "{{ installed_subfunc.stdout_lines[0].split('=')[1].split(',') }}"
|
|
|
|
- name: Check the difference between the two subfunction sets
|
|
set_fact:
|
|
diff_set: "{{ backup_subfunc_set | symmetric_difference(installed_subfunc_set) }}"
|
|
|
|
- name: Fail if subfunction of backup does not match the subfunction of the installed load
|
|
fail:
|
|
msg: "Subfunction mismatch - backup: {{ backup_subfunc_set }}, installed: {{ installed_subfunc_set }}"
|
|
when: diff_set != []
|
|
|
|
# Patching is potentially a multi-phase step. If the controller is impacted by patches from the
|
|
# backup, it must be rebooted before continuing the restore. If restore_patching_complete_flag
|
|
# file exists, it means it is the second run after the reboot. The restore and apply patching
|
|
# block will be skipped.
|
|
- name: Check if {{ restore_patching_complete_flag }} file exists
|
|
stat:
|
|
path: "{{ restore_patching_complete_flag }}"
|
|
register: check_patching_complete
|
|
|
|
# Restore and apply patching
|
|
- block:
|
|
- name: Strip the leading slash in dirname and assign it to a new variable
|
|
set_fact:
|
|
short_patching_permdir: "{{ patching_permdir | regex_replace('^\\/', '') }}"
|
|
short_patching_repo_permdir: "{{ patching_repo_permdir | regex_replace('^\\/', '') }}"
|
|
|
|
- name: Delete {{ patching_permdir }} dir if it exists
|
|
file:
|
|
path: "{{ patching_permdir }}"
|
|
state: absent
|
|
|
|
- name: Restore patching
|
|
command: >-
|
|
tar -C /opt -xpf {{ target_backup_dir }}/{{ backup_filename }} --strip-components=1
|
|
{{ short_patching_permdir }}
|
|
args:
|
|
warn: false
|
|
|
|
- name: Delete {{ patching_repo_permdir }} dir if it exists
|
|
file:
|
|
path: "{{ patching_repo_permdir }}"
|
|
state: absent
|
|
|
|
- name: Restore patching repo
|
|
command: >-
|
|
tar -C /www/pages -xpf {{ target_backup_dir }}/{{ backup_filename }} --strip-components=2
|
|
{{ short_patching_repo_permdir }}
|
|
args:
|
|
warn: false
|
|
|
|
- name: Apply patches
|
|
command: sw-patch install-local
|
|
args:
|
|
warn: false
|
|
|
|
- name: Create {{ restore_patching_complete_flag }} file
|
|
file:
|
|
path: "{{ restore_patching_complete_flag }}"
|
|
state: touch
|
|
|
|
# Check if the controller was impacted by patches
|
|
- name: Check if {{ node_is_patched_flag }} file exists
|
|
stat:
|
|
path: "{{ node_is_patched_flag }}"
|
|
register: check_node_is_patched
|
|
|
|
# The controller was not impacted by patches. Reboot is not required.
|
|
# However we need to restart the patch controller and agent, since
|
|
# we setup the repo and patch store outside its control.
|
|
- block:
|
|
- name: Restart the patch controller and agent
|
|
systemd:
|
|
name: "{{ item }}"
|
|
state: restarted
|
|
with_items:
|
|
- sw-patch-controller-daemon
|
|
- sw-patch-agent
|
|
when: not check_node_is_patched.stat.exists
|
|
|
|
# The controller was impacted by patches. Reboot is required.
|
|
- block:
|
|
- name: Inform user that this controller will be rebooted
|
|
debug:
|
|
msg: >-
|
|
This controller has been patched. A reboot will start.
|
|
After reboot is completed, please re-run the playbook to
|
|
restore the platform again.
|
|
|
|
- name: Remove the {{ restore_in_progress_flag }} file
|
|
file:
|
|
path: "{{ restore_in_progress_flag }}"
|
|
state: absent
|
|
|
|
- name: Remove staging directory {{ patching_staging_dir }} for patch files
|
|
file:
|
|
path: "{{ patching_staging_dir }}"
|
|
state: absent
|
|
|
|
# For better control of the restore, we don't invoke Ansible
|
|
# reboot module to reboot the node. We require user to re-run
|
|
# the playbook to restore the platform after reboot is completed.
|
|
# TODO(wzhou): Suport patching without re-run of the restore_platform
|
|
# playbook by either invoking Ansible reboot module or defining reboot
|
|
# as an async task.
|
|
- name: Reboot the controller
|
|
shell: sleep 5 && reboot
|
|
failed_when: false
|
|
|
|
- name: >-
|
|
Define a variable to indicate that the play was ended due to required controller reboot
|
|
set_fact:
|
|
required_reboot: true
|
|
|
|
- name: Trigger the play to end and do cleanup
|
|
fail:
|
|
msg: Trigger the play to end and do cleanup.
|
|
|
|
when: check_node_is_patched.stat.exists
|
|
|
|
when: not check_patching_complete.stat.exists
|
|
|
|
# The restore_patching_complete_flag file is removed in the following two scenarios:
|
|
# 1. This is the first run with no patches to apply.
|
|
# 2. This is the second run after the node reboot due to patching.
|
|
- name: Clear {{ restore_patching_complete_flag }} flag file
|
|
file:
|
|
path: "{{ restore_patching_complete_flag }}"
|
|
state: absent
|
|
|
|
- name: Remove staging directory {{ patching_staging_dir }} for patch files
|
|
file:
|
|
path: "{{ patching_staging_dir }}"
|
|
state: absent
|
|
|
|
rescue:
|
|
- block:
|
|
- name: Remove the {{ restore_in_progress_flag }} file
|
|
file:
|
|
path: "{{ restore_in_progress_flag }}"
|
|
state: absent
|
|
|
|
- name: Remove staging directory {{ patching_staging_dir }} for patch files
|
|
file:
|
|
path: "{{ patching_staging_dir }}"
|
|
state: absent
|
|
|
|
- name: Fail the platform restore
|
|
fail:
|
|
msg: Restore platform failed!
|
|
when: required_reboot is not defined
|
|
|
|
# This is inside bootstrap playbook. Invoking end_play will only end bootstrap.
|
|
# The restore_platform playbook will continue to play which is not what we want.
|
|
- name: Terminate the platform restore
|
|
fail:
|
|
msg: >-
|
|
The restore is terminated due to required controller node reboot. Please
|
|
re-run the playbook to restore the platform after reboot is completed.
|
|
when: required_reboot
|
|
|
|
become: yes
|
|
become_user: root
|