Improve and fix ceph data restore
When recovering with flag wipe_ceph_osds=false, in some cases, kube-cephfs filesystem is not recovered from backup. Some improvements were made to make it more robust and added more debugging info. When recovering with flag wipe_ceph_osds=true, the default pools were not being recreated because platform-integ-apps application does not recreate those pools when reapplied. To solve this, the application is now removed and will be automatically applied by conductor after restore is complete. Test-Plan: PASS: B&R AIO-SX without ceph configured PASS: Optimized B&R AIO-SX without ceph configured PASS: B&R AIO-SX with wipe_ceph_osds=true PASS: B&R AIO-SX with wipe_ceph_osds=false PASS: Optimized B&R AIO-SX with wipe_ceph_osds=true PASS: Optimized B&R AIO-SX with wipe_ceph_osds=false PASS: Upgrade AIO-SX from stx-7.0 to stx-8.0 PASS: Upgrade AIO-SX from stx-6.0 to stx-8.0 Closes-Bug: 2016328 Change-Id: Ie09c4bf9c74b2e0bf0dde9e7f41cf85002177525 Signed-off-by: Felipe Sanches Zanoni <Felipe.SanchesZanoni@windriver.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
# Copyright (c) 2022-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@@ -157,3 +157,14 @@
|
||||
state: absent
|
||||
|
||||
when: not wipe_ceph_osds|bool and ceph_backend.rc == 0
|
||||
|
||||
# The applicaiton platform-integ-apps is being removed when the flag
|
||||
# wipe_ceph_osds is set to true because this application needs to be
|
||||
# reapplied, but helm will not reapply the charts if the version is not bumped.
|
||||
#
|
||||
# The application is removed here to be applied after host is unlocked and
|
||||
# ceph is correctly configured after a wipe. This app is automatically
|
||||
# applied by conductor when there is ceph backend configured.
|
||||
- name: Remove platform-integ-apps application when asked to wipe ceph osd disks
|
||||
shell: source /etc/platform/openrc; system application-remove platform-integ-apps
|
||||
when: wipe_ceph_osds|bool and ceph_backend.rc == 0
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
# Copyright (c) 2022-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@@ -45,6 +45,13 @@
|
||||
shell: psql -c "update kube_app set status='restore-requested' where status='applied'" sysinv
|
||||
become_user: postgres
|
||||
|
||||
- name: Set platform-integ-apps to applied state when set to wipe osds disks to remove the app later
|
||||
shell: >-
|
||||
psql -c "update kube_app set status='applied' where name='platform-integ-apps'
|
||||
and status='restore-requested'" sysinv
|
||||
become_user: postgres
|
||||
when: wipe_ceph_osds|bool
|
||||
|
||||
- name: Bringup flock services
|
||||
systemd:
|
||||
name: "{{ item }}"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright (c) 2021 Wind River Systems, Inc.
|
||||
# Copyright (c) 2021-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@@ -31,12 +31,18 @@ set -x
|
||||
# Check if the filesystem for the system RWX provisioner is present
|
||||
ceph fs ls | grep ${FS_NAME}
|
||||
if [ $? -ne 0 ]; then
|
||||
# If we have existing metadata/data pools, use them
|
||||
# Use existing metadata/data pools to recover cephfs
|
||||
ceph fs new ${FS_NAME} ${METADATA_POOL_NAME} ${DATA_POOL_NAME} --force
|
||||
# Reset the filesystem and journal
|
||||
|
||||
# Recover MDS state from filesystem
|
||||
ceph fs reset ${FS_NAME} --yes-i-really-mean-it
|
||||
|
||||
# Try to recover from some common errors
|
||||
cephfs-journal-tool --rank=${FS_NAME}:0 event recover_dentries summary
|
||||
cephfs-journal-tool --rank=${FS_NAME}:0 journal reset
|
||||
cephfs-table-tool ${FS_NAME}:0 reset session
|
||||
cephfs-table-tool ${FS_NAME}:0 reset snap
|
||||
cephfs-table-tool ${FS_NAME}:0 reset inode
|
||||
fi
|
||||
|
||||
# Start the Ceph MDS
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2019-2022 Wind River Systems, Inc.
|
||||
# Copyright (c) 2019-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@@ -310,10 +310,18 @@
|
||||
script: recover_cephfs.sh
|
||||
register: cephfs_recovery_out
|
||||
|
||||
- name: Create ceph.client.guest.keyring to allow ceph mount again
|
||||
command: touch /etc/ceph/ceph.client.guest.keyring
|
||||
- name: Display cephfs recovery script stdout output
|
||||
debug:
|
||||
var: cephfs_recovery_out.stdout_lines
|
||||
|
||||
- debug: var=cephfs_recovery_out.stdout_lines
|
||||
- name: Display cephfs recovery script stderr output
|
||||
debug:
|
||||
var: cephfs_recovery_out.stderr_lines
|
||||
|
||||
- name: Create ceph.client.guest.keyring to allow ceph mount again
|
||||
file:
|
||||
path: "/etc/ceph/ceph.client.guest.keyring"
|
||||
state: touch
|
||||
|
||||
- name: Restart ceph one more time to pick latest changes
|
||||
command: /etc/init.d/ceph restart
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2019-2022 Wind River Systems, Inc.
|
||||
# Copyright (c) 2019-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@@ -493,6 +493,13 @@
|
||||
shell: psql -c "update kube_app set status='restore-requested' where status='applied'" sysinv
|
||||
become_user: postgres
|
||||
|
||||
- name: Set platform-integ-apps to applied state when set to wipe osds disks
|
||||
shell: >-
|
||||
psql -c "update kube_app set status='applied' where name='platform-integ-apps'
|
||||
and status='restore-requested'" sysinv
|
||||
become_user: postgres
|
||||
when: wipe_ceph_osds|bool and ceph_backend.rc == 0
|
||||
|
||||
- name: Restart services
|
||||
systemd:
|
||||
name: "{{ item }}"
|
||||
@@ -599,6 +606,17 @@
|
||||
|
||||
when: not wipe_ceph_osds|bool
|
||||
|
||||
# The applicaiton platform-integ-apps is being removed when the flag
|
||||
# wipe_ceph_osds is set to true because this application needs to be
|
||||
# reapplied, but helm will not reapply the charts if the version is not bumped.
|
||||
#
|
||||
# The application is removed here to be applied after host is unlocked and
|
||||
# ceph is correctly configured after a wipe. This app is automatically
|
||||
# applied by conductor when there is ceph backend configured.
|
||||
- name: Remove platform-integ-apps
|
||||
shell: source /etc/platform/openrc; system application-remove platform-integ-apps
|
||||
when: wipe_ceph_osds|bool
|
||||
|
||||
when: check_online.stdout == "online" and ceph_backend.rc == 0
|
||||
|
||||
- name: Apply kube-apiserver parameters
|
||||
|
||||
Reference in New Issue
Block a user