From 420daed20bf60eddb0d77b793953acb33c5fb590 Mon Sep 17 00:00:00 2001 From: Felipe Sanches Zanoni Date: Wed, 6 Nov 2024 11:56:52 -0300 Subject: [PATCH] Add playbooks to update cephfs k8s PV parameters Adding new playbooks as helpers to update kubernetes PVs that are provisioned by cephfs Storage Class and do not have the parameter 'kernelMountOptions: recover_session=clean'. This parameter is required for the cephfs kernel driver to remount the volume when there is a connection issue or a client eviction issued by the Ceph monitoring script. These playbooks are supposed to be engineering tools to avoid redeploying applications, forcing recreating the PVCs/PVs. Test-Plan: PASS: Create a deployment that has at least one pod that uses a cephfs PVC. Apply the deployment and after the pod is in Running state, run the ansible playbook. Check if the parameter is added to the storage class cephfs and to the PVs. Closes-bug: 2085648 Change-Id: I080ee47cc4d7f60e99a29202128560531143abef Signed-off-by: Felipe Sanches Zanoni --- .../change_cephfs_mounter_options.yml | 131 ++++++++++++++++++ .../src/playbooks/change_cephfs_pv_pvcs.yml | 87 ++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 playbookconfig/src/playbooks/change_cephfs_mounter_options.yml create mode 100644 playbookconfig/src/playbooks/change_cephfs_pv_pvcs.yml diff --git a/playbookconfig/src/playbooks/change_cephfs_mounter_options.yml b/playbookconfig/src/playbooks/change_cephfs_mounter_options.yml new file mode 100644 index 000000000..4ac9323a2 --- /dev/null +++ b/playbookconfig/src/playbooks/change_cephfs_mounter_options.yml @@ -0,0 +1,131 @@ +--- +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This playbook provides the capability to set the "kernelMountOptions: recover_session=clean" +# parameter in the cephfs Storage Class, enabling the cephfs volumes to remount automatically +# when there is a client eviction from +# Ceph mds. +# +# It will also use the 'change_cephfs_pv_pvcs.yml' playbook to change the PVs/PVCs parameters as well +# for already created ones. +# The playbook will scan for PVs/PVCs in the namespace provided in the 'scale_resources' variable. +# It will first scale down the deployments, then will update the PVs/PVCs and later scale up the deployments. +# +# The playbook is supposed to run on Active controller. +# +# Example to run the playbook: +# ansible-playbook /usr/share/ansible/stx-ansible/playbooks/change_cephfs_mounter_options.yml -e @input.yml +# +# Template for the 'inputs.yml' file: +# +# update_storage_class: true +# scale_resources: +# - name: +# type: +# namespace: +# - name: +# type: +# namespace: +# +# If the 'update_storage_class' is not defined, the default will be 'false' and no changes will be made to +# the Storage Class cephfs. +# +# If the 'scale_resources' is not defined, it will not update any PV/PVC. +# + +- name: Update CephFS StorageClass and PVCs/PVs with Scale Down/Up + hosts: localhost + gather_facts: no + vars: + update_sc: "{{ update_storage_class | default(False) | bool }}" + resources: "{{ scale_resources | default([]) }}" + + pre_tasks: + - name: Ask for confirmation + ansible.builtin.pause: + prompt: | + These deployments will have their replicas set to zero, which may impact the availability of the associated pods: + {{ resources | map(attribute='name') }} + Do you want to continue? (yes/no) + register: user_input + + - name: Check user input + ansible.builtin.fail: + msg: "Playbook terminated by user." + when: user_input.user_input | trim | lower != 'yes' + + - name: Set namespaces + set_fact: + namespaces: "{{ resources | map(attribute='namespace') | unique }}" + + - name: Get resource replicas + command: + kubectl get {{ item.type }} {{ item.name }} -n {{ item.namespace }} -o jsonpath='{.spec.replicas}' + loop: "{{ resources }}" + register: resource_replicas_output + changed_when: false + + - name: Set replicas by resource + set_fact: + replica_by_resource: "{{ resource_replicas_output.results }}" + + - name: Create temp directory + tempfile: + state: directory + suffix: update_sc_pv_pvcs + register: temp_dir + + tasks: + - name: Scale down resources + command: > + kubectl scale {{ item.type }} {{ item.name }} -n {{ item.namespace }} --replicas=0 + loop: "{{ resources }}" + + - name: Update StorageClass + block: + - name: Get StorageClass definition + command: kubectl get sc cephfs -o yaml + register: sc_yaml + + - name: Delete StorageClass + command: kubectl delete sc cephfs + + - name: Update StorageClass configuration + copy: + content: > + {{ sc_yaml.stdout | from_yaml + | combine({'parameters': { 'kernelMountOptions': 'recover_session=clean' }}, recursive=True) + | to_yaml }} + dest: "{{ temp_dir.path }}/sc-cephfs.yaml" + + - name: Apply updated StorageClass + command: kubectl apply -f {{ temp_dir.path }}/sc-cephfs.yaml + when: update_sc + + - name: Iterate over namespaces + include_tasks: change_cephfs_pv_pvcs.yml + vars: + temp_dir_path: "{{ temp_dir.path }}" + loop: "{{ namespaces }}" + loop_control: + loop_var: namespace + + - name: Cleanup + block: + - debug: + msg: Run cleanup + always: + - name: Scale up resources + command: > + kubectl scale {{ item.item.type }} {{ item.item.name }} + -n {{ item.item.namespace }} + --replicas={{ item.stdout }} + loop: "{{ replica_by_resource }}" + + - name: Remove temp directory + file: + path: "{{temp_dir.path }}" + state: absent diff --git a/playbookconfig/src/playbooks/change_cephfs_pv_pvcs.yml b/playbookconfig/src/playbooks/change_cephfs_pv_pvcs.yml new file mode 100644 index 000000000..f162c5d68 --- /dev/null +++ b/playbookconfig/src/playbooks/change_cephfs_pv_pvcs.yml @@ -0,0 +1,87 @@ +--- +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This playbook provides the capability to set the "kernelMountOptions: recover_session=clean" +# parameter in every PVs/PVCs from the given namespace, enabling the cephfs volumes to +# remount automatically when there is a client eviction from Ceph mds. +# +# The playbook is supposed to be called by the 'change_cephfs_mounter_options.yml' playbook. +# + +- name: Get PVC list + command: kubectl -n {{ namespace }} get pvc -o yaml + register: pvc_list_output + +- name: Set PVC definitions + set_fact: + pvc_definitions: "{{ pvc_list_output.stdout | from_yaml | json_query('items[?spec.storageClassName==`cephfs`]') }}" +- name: Set PVC formatted data + set_fact: + pvcs: "{{ pvc_definitions | json_query('[*].{pvc: metadata.name, pv: spec.volumeName}') }}" + +- name: Save PVC definitions to files + copy: + content: | + {{ item }} + dest: "/{{ temp_dir_path }}/{{ item | from_yaml | json_query('metadata.name') }}.yaml" + loop: "{{ pvc_definitions | map('to_yaml') }}" + +- name: Get PV definition + command: kubectl get pv {{ item.pv }} -n {{ namespace }} -o yaml + loop: "{{ pvcs }}" + register: pvs_output + changed_when: false + +- name: Set PV definitions and reclaim + set_fact: + pv_definition_list: "{{ pvs_output.results | map(attribute='stdout') }}" + patch_json: '{"spec": {"persistentVolumeReclaimPolicy": "Retain"}}' + +- name: Save and update PV definitions to files + copy: + content: > + {{ + item | from_yaml + | combine( + {'spec': { 'claimRef': None, 'csi': { 'volumeAttributes': { 'kernelMountOptions': 'recover_session=clean' }}}}, + recursive=True) + | to_yaml + }} + dest: "{{ temp_dir_path }}/{{ item | from_yaml | json_query('metadata.name') }}.yaml" + loop: "{{ pv_definition_list }}" + +- name: Patch PV to retain the volume + command: kubectl patch pv {{ item.pv }} -n {{ namespace }} -p {{ patch_json | to_json }} + loop: "{{ pvcs }}" + register: patch_output + ignore_errors: True + +- name: Show error message if cannot continue with PVC + fail: + msg: "Could not change the reclaim policy. It is not secure to continue the changes for the {{ item.item.pv }}" + when: item.rc != 0 + loop: "{{ patch_output.results }}" + ignore_errors: True + +- name: Delete PVCs + command: kubectl delete -n {{ namespace }} pvc {{ item.item.pvc }} + when: item.rc == 0 + loop: "{{ patch_output.results }}" + +- name: Delete PVs + command: kubectl delete -n {{ namespace }} pv {{ item.item.pv }} + when: item.rc == 0 + loop: "{{ patch_output.results }}" + +- name: Apply updated PVCs + command: kubectl apply -n {{ namespace }} -f {{ temp_dir_path }}/{{ item.item.pvc }}.yaml + when: item.rc == 0 + loop: "{{ patch_output.results }}" + +- name: Apply updated PVs + command: kubectl apply -n {{ namespace }} -f {{ temp_dir_path }}/{{ item.item.pv }}.yaml + when: item.rc == 0 + loop: "{{ patch_output.results }}"