ansible-playbooks/playbookconfig/src/playbooks/roles/bootstrap/prepare-env/tasks/load_patching_tasks.yml

285 lines
10 KiB
YAML

---
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# SUB-TASKS DESCRIPTION:
# These tasks perform following activities:
# - verify if the installed load matches the backup load
# - install patches if the system was patched
# - reboot the controller if it is required by the patching
#
- block:
- name: Create {{ restore_in_progress_flag }} flag file
file:
path: "{{ restore_in_progress_flag }}"
state: touch
# For remote play the backup tarball will be transferred to /scratch
- block:
# Check if the backup tarball already exists. If it is the second run
# after the reboot, no need to transfer the backup tarball again.
- name: Check if {{ backup_filename }} has been uploaded already
stat:
path: "/scratch/{{ backup_filename }}"
register: check_backup_tarball
- block:
# TODO(wzhou): Considering to break backup tarball into multiple small tarfiles
# During restore upload each small tarfile one at a time to restore a subfunction.
# Because Ansible copy module uses ansible_remote_tmp directory as
# a staging area to transfer file, the default ansible_remote_tmp
# which is set in /tmp (1GB) may be too small for backup tarball,
# we require user to set ansible_remote_tmp to a new directory in
# /home/sysadmin via -e option on the command line. For example:
# -e "ansible_remote_tmp=/home/sysadmin/ansible-restore"
- name: Transfer backup tarball to /scratch on controller-0
copy:
src: "{{ initial_backup_dir }}/{{ backup_filename }}"
dest: /scratch
owner: root
group: root
mode: 0644
# As an alternative to Ansible copy, synchronize module may be
# used to transfer large files. But synchronize is broken in Ansible 2.8
# https://github.com/ansible/ansible/issues/56629.
# - name: Transfer backup tarball to /scratch on controller-0
# synchronize:
# src: "{{ initial_backup_dir }}/{{ backup_filename }}"
# dest: "/scratch/{{ backup_filename }}"
when: not check_backup_tarball.stat.exists
- name: Set target_backup_dir to /scratch
set_fact:
target_backup_dir: /scratch
when: inventory_hostname != "localhost"
- name: For local play set target_backup_dir to initial_backup_dir
set_fact:
target_backup_dir: "{{ initial_backup_dir }}"
when: inventory_hostname == "localhost"
- name: Set fact for patching staging dir
set_fact:
patching_staging_dir: /scratch/patching
- name: Create staging directory {{ patching_staging_dir }} for patch files
file:
path: "{{ patching_staging_dir }}"
state: directory
- block:
- name: Get the checksum of the build.info file of the installed load
stat:
path: /etc/build.info
get_checksum: yes
register: installed_buildinfo_check
- name: Retrieve build.info file from backup
command: >-
tar -C {{ patching_staging_dir }} -xpf {{ target_backup_dir }}/{{ backup_filename }} --transform='s,.*/,,'
etc/build.info
args:
warn: false
- name: Get the checksum of the build.info file from the backup
stat:
path: "{{ patching_staging_dir }}/build.info"
get_checksum: yes
register: backup_buildinfo_check
- name: Fail if load version of backup does not match the version of the installed load
fail:
msg: "Load version of backup does not match the version of the installed load."
when: installed_buildinfo_check.stat.checksum != backup_buildinfo_check.stat.checksum
- name: Retrieve platform.conf file from the backup
command: >-
tar -C {{ patching_staging_dir }} -xpf {{ target_backup_dir }}/{{ backup_filename }} --transform='s,.*/,,'
etc/platform/platform.conf
args:
warn: false
- name: Get subfunction from the backup
shell: grep -F 'subfunction' {{ patching_staging_dir }}/platform.conf
register: backup_subfunc
- name: Get subfunction set from backup platform.conf
set_fact:
backup_subfunc_set: "{{ backup_subfunc.stdout_lines[0].split('=')[1].split(',') }}"
- name: Get subfunction from the installed load
shell: grep -F 'subfunction' /etc/platform/platform.conf
register: installed_subfunc
- name: Get subfunction set from installed platform.conf
set_fact:
installed_subfunc_set: "{{ installed_subfunc.stdout_lines[0].split('=')[1].split(',') }}"
- name: Check the difference between the two subfunction sets
set_fact:
diff_set: "{{ backup_subfunc_set | symmetric_difference(installed_subfunc_set) }}"
- name: Fail if subfunction of backup does not match the subfunction of the installed load
fail:
msg: "Subfunction mismatch - backup: {{ backup_subfunc_set }}, installed: {{ installed_subfunc_set }}"
when: diff_set != []
# Patching is potentially a multi-phase step. If the controller is impacted by patches from the
# backup, it must be rebooted before continuing the restore. If restore_patching_complete_flag
# file exists, it means it is the second run after the reboot. The restore and apply patching
# block will be skipped.
- name: Check if {{ restore_patching_complete_flag }} file exists
stat:
path: "{{ restore_patching_complete_flag }}"
register: check_patching_complete
# Restore and apply patching
- block:
- name: Strip the leading slash in dirname and assign it to a new variable
set_fact:
short_patching_permdir: "{{ patching_permdir | regex_replace('^\\/', '') }}"
short_patching_repo_permdir: "{{ patching_repo_permdir | regex_replace('^\\/', '') }}"
- name: Delete {{ patching_permdir }} dir if it exists
file:
path: "{{ patching_permdir }}"
state: absent
- name: Restore patching
command: >-
tar -C /opt -xpf {{ target_backup_dir }}/{{ backup_filename }} --strip-components=1
{{ short_patching_permdir }}
args:
warn: false
- name: Delete {{ patching_repo_permdir }} dir if it exists
file:
path: "{{ patching_repo_permdir }}"
state: absent
- name: Restore patching repo
command: >-
tar -C /www/pages -xpf {{ target_backup_dir }}/{{ backup_filename }} --strip-components=2
{{ short_patching_repo_permdir }}
args:
warn: false
- name: Apply patches
command: sw-patch install-local
args:
warn: false
- name: Create {{ restore_patching_complete_flag }} file
file:
path: "{{ restore_patching_complete_flag }}"
state: touch
# Check if the controller was impacted by patches
- name: Check if {{ node_is_patched_flag }} file exists
stat:
path: "{{ node_is_patched_flag }}"
register: check_node_is_patched
# The controller was not impacted by patches. Reboot is not required.
# However we need to restart the patch controller and agent, since
# we setup the repo and patch store outside its control.
- block:
- name: Restart the patch controller and agent
systemd:
name: "{{ item }}"
state: restarted
with_items:
- sw-patch-controller-daemon
- sw-patch-agent
when: not check_node_is_patched.stat.exists
# The controller was impacted by patches. Reboot is required.
- block:
- name: Inform user that this controller will be rebooted
debug:
msg: >-
This controller has been patched. A reboot will start.
After reboot is completed, please re-run the playbook to
restore the platform again.
- name: Remove the {{ restore_in_progress_flag }} file
file:
path: "{{ restore_in_progress_flag }}"
state: absent
- name: Remove staging directory {{ patching_staging_dir }} for patch files
file:
path: "{{ patching_staging_dir }}"
state: absent
# For better control of the restore, we don't invoke Ansible
# reboot module to reboot the node. We require user to re-run
# the playbook to restore the platform after reboot is completed.
# TODO(wzhou): Suport patching without re-run of the restore_platform
# playbook by either invoking Ansible reboot module or defining reboot
# as an async task.
- name: Reboot the controller
shell: sleep 5 && reboot
failed_when: false
- name: >-
Define a variable to indicate that the play was ended due to required controller reboot
set_fact:
required_reboot: true
- name: Trigger the play to end and do cleanup
fail:
msg: Trigger the play to end and do cleanup.
when: check_node_is_patched.stat.exists
when: not check_patching_complete.stat.exists
# The restore_patching_complete_flag file is removed in the following two scenarios:
# 1. This is the first run with no patches to apply.
# 2. This is the second run after the node reboot due to patching.
- name: Clear {{ restore_patching_complete_flag }} flag file
file:
path: "{{ restore_patching_complete_flag }}"
state: absent
- name: Remove staging directory {{ patching_staging_dir }} for patch files
file:
path: "{{ patching_staging_dir }}"
state: absent
rescue:
- block:
- name: Remove the {{ restore_in_progress_flag }} file
file:
path: "{{ restore_in_progress_flag }}"
state: absent
- name: Remove staging directory {{ patching_staging_dir }} for patch files
file:
path: "{{ patching_staging_dir }}"
state: absent
- name: Fail the platform restore
fail:
msg: Restore platform failed!
when: required_reboot is not defined
# This is inside bootstrap playbook. Invoking end_play will only end bootstrap.
# The restore_platform playbook will continue to play which is not what we want.
- name: Terminate the platform restore
fail:
msg: >-
The restore is terminated due to required controller node reboot. Please
re-run the playbook to restore the platform after reboot is completed.
when: required_reboot
become: yes
become_user: root