424 lines
14 KiB
YAML
424 lines
14 KiB
YAML
---
|
|
#
|
|
# Copyright (c) 2019 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# ROLE DESCRIPTION:
|
|
# This role is to restore the remaining data in the backup tarball
|
|
# during platform restore.
|
|
#
|
|
|
|
# These hieradata were generated after persist-config role was run. They
|
|
# will be re-generated when sysinv is restarted after postgres db is restored
|
|
- name: Remove newly generated hieradata data
|
|
file:
|
|
path: "{{ item }}"
|
|
state: absent
|
|
with_items:
|
|
- "{{ puppet_permdir }}/hieradata/{{ controller_floating_address|ipmath(1) }}.yaml"
|
|
- "{{ puppet_permdir }}/hieradata/system.yaml"
|
|
- "{{ puppet_permdir }}/hieradata/secure_system.yaml"
|
|
|
|
# To work around an ansible quirk that regex_replace filter
|
|
# is ignored when it is applied to variables in the command module
|
|
- name: Remove leading '/' from dir name
|
|
set_fact:
|
|
short_platform_conf_path: "{{ platform_conf_path | regex_replace('^\\/', '') }}"
|
|
short_config_permdir: "{{ config_permdir | regex_replace('^\\/', '') }}"
|
|
|
|
- name: Extract platform.conf from the backup tarball
|
|
command: >-
|
|
tar -C {{ staging_dir }} -xpf {{ target_backup_dir }}/{{ backup_filename }} --transform='s,.*/,,'
|
|
{{ short_platform_conf_path }}/platform.conf
|
|
args:
|
|
warn: false
|
|
|
|
- name: Search for the new INSTALL_UUID in /etc/platform/platform.conf
|
|
shell: grep INSTALL_UUID {{ platform_conf_path }}/platform.conf
|
|
register: result
|
|
|
|
- name: Replace INSTALL_UUID with the new one
|
|
lineinfile:
|
|
dest: "{{ staging_dir }}/platform.conf"
|
|
regexp: 'INSTALL_UUID'
|
|
line: "{{ result.stdout }}"
|
|
|
|
- name: Strip out entries that are host specific
|
|
lineinfile:
|
|
dest: "{{ staging_dir }}/platform.conf"
|
|
regexp: "{{ item }}"
|
|
state: absent
|
|
with_items:
|
|
- '^oam_interface='
|
|
- '^cluster_host_interface='
|
|
- '^UUID='
|
|
|
|
- name: Search for the management_interface in /etc/platform/platform.conf
|
|
shell: grep management_interface {{ platform_conf_path }}/platform.conf
|
|
failed_when: false
|
|
register: result
|
|
|
|
- name: Replace management_interface with the new one
|
|
lineinfile:
|
|
dest: "{{ staging_dir }}/platform.conf"
|
|
regexp: '^management_interface='
|
|
line: "{{ result.stdout }}"
|
|
when: result.rc == 0
|
|
|
|
- name: Replace platform config file
|
|
command: mv -f {{ staging_dir }}/platform.conf {{ platform_conf_path}}/platform.conf
|
|
|
|
# Restore resolv.conf and dnsmaq
|
|
- name: Extract resolv.conf from backup tarball
|
|
command: >-
|
|
tar -C /etc -xpf {{ target_backup_dir }}/{{ backup_filename }} --overwrite
|
|
--transform='s,.*/,,' etc/resolv.conf
|
|
args:
|
|
warn: false
|
|
|
|
- name: Restore resolv.conf in config permdir (/opt/platform/config/...)
|
|
command: >-
|
|
tar -C {{ config_permdir }} -xpf {{ target_backup_dir }}/{{ backup_filename }}
|
|
--overwrite --transform='s,.*/,,' '{{ short_config_permdir }}/resolv.conf'
|
|
args:
|
|
warn: false
|
|
|
|
- name: Restore dnsmaq in config permdir (/opt/platform/config/...)
|
|
command: >-
|
|
tar -C {{ config_permdir }} -xpf {{ target_backup_dir }}/{{ backup_filename }}
|
|
--overwrite --transform='s,.*/,,' '{{ short_config_permdir }}/dnsmasq*'
|
|
args:
|
|
warn: false
|
|
|
|
- name: Remove leading '/' from directory name
|
|
set_fact:
|
|
short_pxe_config_permdir: "{{ pxe_config_permdir | regex_replace('^\\/', '') }}"
|
|
|
|
- name: Restore boot files in pxelinux.cfg dir
|
|
command: >-
|
|
tar -C {{ pxe_config_permdir }} -xpf {{ target_backup_dir }}/{{ backup_filename }}
|
|
--overwrite --transform='s,.*/,,' '{{ short_pxe_config_permdir }}/*-*-*'
|
|
args:
|
|
warn: false
|
|
|
|
- name: Extract ldap.db to staging directory
|
|
command: >-
|
|
tar -C {{ staging_dir }} -xpf {{ target_backup_dir }}/{{ backup_filename }}
|
|
--transform='s,.*/,,' '*/ldap.db'
|
|
args:
|
|
warn: false
|
|
|
|
- name: Stop openldap service
|
|
shell: "export SYSTEMCTL_SKIP_REDIRECT=1; /etc/init.d/openldap stop"
|
|
|
|
- name: Delete ldap directory
|
|
file:
|
|
path: "{{ ldap_permdir }}"
|
|
state: absent
|
|
|
|
- name: Recreate ldap directory
|
|
file:
|
|
path: "{{ ldap_permdir }}"
|
|
state: directory
|
|
recurse: yes
|
|
owner: root
|
|
group: root
|
|
mode: 0755
|
|
|
|
- name: Restore ldap
|
|
shell: slapadd -F /etc/openldap/schema -l {{ staging_dir }}/ldap.db
|
|
|
|
- name: Start openldap service
|
|
shell: "export SYSTEMCTL_SKIP_REDIRECT=1; /etc/init.d/openldap start"
|
|
|
|
- name: Delete file from staging dir
|
|
file:
|
|
path: "{{ staging_dir }}/ldap.db"
|
|
state: absent
|
|
|
|
- name: Restore home directory
|
|
shell: tar -C / --overwrite -xpf {{ target_backup_dir }}/{{ backup_filename }} 'home/*'
|
|
args:
|
|
warn: false
|
|
become_user: root
|
|
|
|
- name: Restore Helm charts, armada manifests and extension filesystem
|
|
command: tar -C / --overwrite -xpf {{ target_backup_dir }}/{{ backup_filename }} {{ item }}
|
|
args:
|
|
warn: false
|
|
become_user: root
|
|
with_items:
|
|
- "{{ helm_charts_permdir | regex_replace('^\\/', '') }}"
|
|
- "{{ armada_permdir | regex_replace('^\\/', '') }}"
|
|
- "{{ extension_permdir | regex_replace('^\\/', '') }}"
|
|
|
|
- name: Restore sysinv default configuration file
|
|
command: >-
|
|
tar -C {{ sysinv_config_permdir }} -xpf {{ target_backup_dir }}/{{ backup_filename }}
|
|
--transform='s,.*/,,' '*/sysinv.conf.default'
|
|
args:
|
|
warn: false
|
|
|
|
# Can't store ceph crushmap at sysinv_config_permdir (/opt/platform/sysinv/)
|
|
# for AIO systems because when unlocking controller-0 for the first time,
|
|
# the crushmap is set thru ceph puppet when /opt/platform is not mounted yet.
|
|
# So for AIO systems store the crushmap at /etc/sysinv.
|
|
- name: Set ceph crushmap directory to /etc/sysinv if it is AIO system
|
|
set_fact:
|
|
ceph_crushmap_dir: /etc/sysinv
|
|
when: system_type == 'All-in-one'
|
|
|
|
- name: Set ceph crushmap directory to /opt/platform/sysinv if it is non-AIO system
|
|
set_fact:
|
|
ceph_crushmap_dir: "{{ sysinv_config_permdir }}"
|
|
when: system_type != 'All-in-one'
|
|
|
|
- name: Restore ceph crush map
|
|
command: >-
|
|
tar -C {{ ceph_crushmap_dir }} -xpf {{ target_backup_dir }}/{{ backup_filename }}
|
|
--transform='s,.*/,,' '*/crushmap.bin.backup'
|
|
args:
|
|
warn: false
|
|
|
|
# Need to remove osd info from the crushmap before it is loaded into ceph.
|
|
# When osds are created they will be inserted into the crushmap by ceph.
|
|
# TODO: There might be a better command to do this, like the rebuild option
|
|
# with the ceph-monstore-tool.
|
|
- name: Remove osds from the crushmap
|
|
shell: >-
|
|
crushtool -i {{ ceph_crushmap_dir }}/{{ crushmap_file }} --tree |
|
|
awk /osd/'{print $NF}' |
|
|
xargs -i crushtool -i {{ ceph_crushmap_dir }}/{{ crushmap_file }} --remove-item {}
|
|
-o {{ ceph_crushmap_dir }}/{{ crushmap_file }}
|
|
|
|
- name: Remove leading '/' from patch-vault directory
|
|
set_fact:
|
|
short_patch_vault_permdir: "{{ patch_vault_permdir | regex_replace('^\\/', '') }}"
|
|
|
|
- name: Look for patch-vault filesystem
|
|
shell: "tar -tf {{ target_backup_dir }}/{{ backup_filename }} | grep 'patch-vault'"
|
|
args:
|
|
warn: false
|
|
failed_when: false
|
|
register: search_result
|
|
|
|
- name: Restore patch-vault filesystem
|
|
command: >-
|
|
tar -C / --overwrite -xpf {{ target_backup_dir }}/{{ backup_filename }}
|
|
{{ short_patch_vault_permdir }}
|
|
args:
|
|
warn: false
|
|
when: search_result.rc == 0
|
|
|
|
# TODO: Restore ceph_external when it is supported
|
|
|
|
- name: Create Helm overrides directory
|
|
file:
|
|
path: "{{ helm_overrides_permdir }}"
|
|
state: directory
|
|
recurse: yes
|
|
owner: root
|
|
group: root
|
|
mode: 0755
|
|
|
|
- block:
|
|
- name: Shutdown mtce
|
|
command: /usr/lib/ocf/resource.d/platform/mtcAgent stop
|
|
environment:
|
|
OCF_ROOT: "/usr/lib/ocf"
|
|
OCF_RESKEY_state: "active"
|
|
|
|
- name: Stop services
|
|
systemd:
|
|
name: "{{ item }}"
|
|
state: stopped
|
|
with_items:
|
|
- openstack-keystone
|
|
- fminit
|
|
- fm-api
|
|
- sysinv-api
|
|
- sysinv-conductor
|
|
- sysinv-agent
|
|
- openstack-barbican-api
|
|
|
|
- name: Create staging directory for postgres data
|
|
file:
|
|
path: "{{ staging_dir }}/postgres"
|
|
state: directory
|
|
recurse: yes
|
|
owner: root
|
|
group: root
|
|
mode: 0755
|
|
|
|
- name: Extract postgres db to staging directory
|
|
command: >-
|
|
tar -C {{ staging_dir }}/postgres -xpf {{ target_backup_dir }}/{{ backup_filename }}
|
|
--transform='s,.*/,,' '*/*\.postgreSql\.*'
|
|
args:
|
|
warn: false
|
|
|
|
- name: Restore postgres db
|
|
shell: "psql -f {{ item }} {{ (item|basename).split('.')[0] }}"
|
|
become_user: postgres
|
|
with_items:
|
|
- "{{ staging_dir }}/postgres/postgres.postgreSql.config"
|
|
- "{{ staging_dir }}/postgres/postgres.postgreSql.data"
|
|
- "{{ staging_dir }}/postgres/template1.postgreSql.data"
|
|
- "{{ staging_dir }}/postgres/sysinv.postgreSql.data"
|
|
- "{{ staging_dir }}/postgres/keystone.postgreSql.data"
|
|
- "{{ staging_dir }}/postgres/fm.postgreSql.data"
|
|
- "{{ staging_dir }}/postgres/barbican.postgreSql.data"
|
|
|
|
- name: Remove postgres staging directory
|
|
file:
|
|
path: "{{ staging_dir }}/postgres"
|
|
state: absent
|
|
|
|
# Set all the hosts including controller-0 to locked/disabled/offline state.
|
|
# After the services are restarted, mtce will update controller-0 to
|
|
# locked/disabled/online state. Setting controller-0 to offline state now
|
|
# will ensure that keystone, sysinv and mtcAgent are indeed in-service after being restated.
|
|
- name: Set all the hosts to locked/disabled/offline state
|
|
shell: >-
|
|
psql -c "update i_host set administrative='locked', operational='disabled',
|
|
availability='offline'" sysinv
|
|
become_user: postgres
|
|
when: wipe_ceph_osds|bool
|
|
|
|
- name: Set all the hosts, except storage nodes to locked/disabled/offline state
|
|
shell: >-
|
|
psql -c "update i_host set administrative='locked', operational='disabled',
|
|
availability='offline' where personality!='storage'" sysinv
|
|
become_user: postgres
|
|
when: not wipe_ceph_osds|bool
|
|
|
|
# Set platform-integ-apps to "uploaded" state, so that once ceph is up after
|
|
# controller-0 is unlocked for the first time, the manifest will be applied.
|
|
- name: Set platform-integ-apps to "uploaded" state
|
|
shell: psql -c "update kube_app set status='uploaded' where name='platform-integ-apps'" sysinv
|
|
become_user: postgres
|
|
|
|
# If stx-openstack app is in "applied" state, set it to "uploaded" state to
|
|
# avoid confusion. stx-openstack app will be brought up in stages after the
|
|
# platform is restored.
|
|
- name: Check stx-openstack app state
|
|
shell: psql -c "select status from kube_app where name='stx-openstack'" sysinv
|
|
become_user: postgres
|
|
register: app_res
|
|
|
|
- name: Set stx-openstack app to "uploaded" state
|
|
shell: psql -c "update kube_app set status='uploaded' where name='stx-openstack'" sysinv
|
|
become_user: postgres
|
|
when: app_res.stdout is search('applied')
|
|
|
|
- name: Restart services
|
|
systemd:
|
|
name: "{{ item }}"
|
|
state: restarted
|
|
with_items:
|
|
- openstack-keystone
|
|
- fminit
|
|
- fm-api
|
|
- sysinv-api
|
|
- sysinv-conductor
|
|
- sysinv-agent
|
|
- openstack-barbican-api
|
|
|
|
- name: Bring up Maintenance Agent
|
|
command: /usr/lib/ocf/resource.d/platform/mtcAgent start
|
|
environment:
|
|
OCF_ROOT: "/usr/lib/ocf"
|
|
OCF_RESKEY_state: "active"
|
|
|
|
- name: Wait for 90 secs before check if services come up
|
|
wait_for: timeout=90
|
|
|
|
# admin-keystone is always the very last to be ready,
|
|
# So we just wait and check for admin-keystone to come up.
|
|
- name: Make sure admin-keystone is ready
|
|
shell: "ps -ef | grep admin-keystone | grep -v grep"
|
|
register: result
|
|
until: result.stdout.find("keystone") != -1
|
|
retries: 6
|
|
delay: 10
|
|
|
|
# Run "system host-list" to verify that controller-0 is in
|
|
# "online" state. This will ensure that keystone, sysinv and
|
|
# mtcAgent are indeed in-service after being restated.
|
|
- name: Check controller-0 is in online state
|
|
shell: source /etc/platform/openrc; system host-show controller-0 --column availability --format value
|
|
register: check_online
|
|
failed_when: false
|
|
retries: 30
|
|
delay: 10
|
|
until: check_online.stdout == "online"
|
|
|
|
- name: Inform user that restore_platform is not successful
|
|
debug:
|
|
msg: >-
|
|
Platform restore was unsuccessful. Please refer to the system administration
|
|
guide for next step.
|
|
when: check_online.stdout != "online"
|
|
|
|
# Restore ceph-mon data
|
|
- block:
|
|
- block:
|
|
# Recover procedure for systems with storage nodes is different from
|
|
# that of systems with controller storage:
|
|
# - For controller storage we recover ceph-mon data by scanning OSDs.
|
|
# - For systems with storage nodes we get ceph-mon data from storage-0
|
|
# ceph-mon that is already up and will not be reinstalled.
|
|
- name: Check if setup has storage nodes
|
|
shell: source /etc/platform/openrc; system host-list --format value --column personality
|
|
register: node_personalities
|
|
failed_when: false
|
|
|
|
# Get system_mode after restore and create flag file to skip wiping OSDs
|
|
- name: Retrieve system mode
|
|
shell: source /etc/platform/platform.conf; echo $system_mode
|
|
register: restore_system_mode_result
|
|
|
|
- name: Fail if system mode is not defined
|
|
fail:
|
|
msg: "system_mode is missing in /etc/platform/platform.conf"
|
|
when: restore_system_mode_result.stdout_lines|length == 0
|
|
|
|
- name: Set system mode fact
|
|
set_fact:
|
|
restore_system_mode: "{{ restore_system_mode_result.stdout_lines[0] }}"
|
|
|
|
- name: Create flag file in /etc/platform to skip wiping OSDs
|
|
file:
|
|
path: "{{ skip_ceph_osds_wipe_flag }}"
|
|
state: touch
|
|
when: restore_system_mode != 'simplex'
|
|
|
|
# Recover ceph data for systems with controller storage
|
|
- include_role:
|
|
name: recover-ceph-data
|
|
when: node_personalities.stdout is not search('storage')
|
|
|
|
- name: Mark crushmap as restored
|
|
file:
|
|
path: "{{ sysinv_config_permdir }}/.crushmap_applied"
|
|
owner: root
|
|
group: root
|
|
mode: 644
|
|
state: touch
|
|
|
|
when: not wipe_ceph_osds|bool
|
|
|
|
- name: Inform user that restore_platform is run successfully
|
|
debug:
|
|
msg: >-
|
|
Controller-0 is now online. The next step is to unlock this controller.
|
|
Please refer to the system administration guide for more details.
|
|
when: check_online.stdout == "online"
|
|
|
|
# Remove temporary staging area used by the copy module
|
|
- name: Remove {{ ansible_remote_tmp }} directory
|
|
file:
|
|
path: "{{ ansible_remote_tmp }}"
|
|
state: absent
|