Add playbooks to update cephfs k8s PV parameters
Adding new playbooks as helpers to update kubernetes PVs that are provisioned by cephfs Storage Class and do not have the parameter 'kernelMountOptions: recover_session=clean'. This parameter is required for the cephfs kernel driver to remount the volume when there is a connection issue or a client eviction issued by the Ceph monitoring script. These playbooks are supposed to be engineering tools to avoid redeploying applications, forcing recreating the PVCs/PVs. Test-Plan: PASS: Create a deployment that has at least one pod that uses a cephfs PVC. Apply the deployment and after the pod is in Running state, run the ansible playbook. Check if the parameter is added to the storage class cephfs and to the PVs. Closes-bug: 2085648 Change-Id: I080ee47cc4d7f60e99a29202128560531143abef Signed-off-by: Felipe Sanches Zanoni <Felipe.SanchesZanoni@windriver.com>
This commit is contained in:
parent
920106ef17
commit
420daed20b
131
playbookconfig/src/playbooks/change_cephfs_mounter_options.yml
Normal file
131
playbookconfig/src/playbooks/change_cephfs_mounter_options.yml
Normal file
@ -0,0 +1,131 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This playbook provides the capability to set the "kernelMountOptions: recover_session=clean"
|
||||
# parameter in the cephfs Storage Class, enabling the cephfs volumes to remount automatically
|
||||
# when there is a client eviction from
|
||||
# Ceph mds.
|
||||
#
|
||||
# It will also use the 'change_cephfs_pv_pvcs.yml' playbook to change the PVs/PVCs parameters as well
|
||||
# for already created ones.
|
||||
# The playbook will scan for PVs/PVCs in the namespace provided in the 'scale_resources' variable.
|
||||
# It will first scale down the deployments, then will update the PVs/PVCs and later scale up the deployments.
|
||||
#
|
||||
# The playbook is supposed to run on Active controller.
|
||||
#
|
||||
# Example to run the playbook:
|
||||
# ansible-playbook /usr/share/ansible/stx-ansible/playbooks/change_cephfs_mounter_options.yml -e @input.yml
|
||||
#
|
||||
# Template for the 'inputs.yml' file:
|
||||
#
|
||||
# update_storage_class: true
|
||||
# scale_resources:
|
||||
# - name: <deployment-name-1>
|
||||
# type: <deployment|replicaset>
|
||||
# namespace: <namespace-1>
|
||||
# - name: <deployment-name-2>
|
||||
# type: <deployment|replicaset>
|
||||
# namespace: <namespace-2>
|
||||
#
|
||||
# If the 'update_storage_class' is not defined, the default will be 'false' and no changes will be made to
|
||||
# the Storage Class cephfs.
|
||||
#
|
||||
# If the 'scale_resources' is not defined, it will not update any PV/PVC.
|
||||
#
|
||||
|
||||
- name: Update CephFS StorageClass and PVCs/PVs with Scale Down/Up
|
||||
hosts: localhost
|
||||
gather_facts: no
|
||||
vars:
|
||||
update_sc: "{{ update_storage_class | default(False) | bool }}"
|
||||
resources: "{{ scale_resources | default([]) }}"
|
||||
|
||||
pre_tasks:
|
||||
- name: Ask for confirmation
|
||||
ansible.builtin.pause:
|
||||
prompt: |
|
||||
These deployments will have their replicas set to zero, which may impact the availability of the associated pods:
|
||||
{{ resources | map(attribute='name') }}
|
||||
Do you want to continue? (yes/no)
|
||||
register: user_input
|
||||
|
||||
- name: Check user input
|
||||
ansible.builtin.fail:
|
||||
msg: "Playbook terminated by user."
|
||||
when: user_input.user_input | trim | lower != 'yes'
|
||||
|
||||
- name: Set namespaces
|
||||
set_fact:
|
||||
namespaces: "{{ resources | map(attribute='namespace') | unique }}"
|
||||
|
||||
- name: Get resource replicas
|
||||
command:
|
||||
kubectl get {{ item.type }} {{ item.name }} -n {{ item.namespace }} -o jsonpath='{.spec.replicas}'
|
||||
loop: "{{ resources }}"
|
||||
register: resource_replicas_output
|
||||
changed_when: false
|
||||
|
||||
- name: Set replicas by resource
|
||||
set_fact:
|
||||
replica_by_resource: "{{ resource_replicas_output.results }}"
|
||||
|
||||
- name: Create temp directory
|
||||
tempfile:
|
||||
state: directory
|
||||
suffix: update_sc_pv_pvcs
|
||||
register: temp_dir
|
||||
|
||||
tasks:
|
||||
- name: Scale down resources
|
||||
command: >
|
||||
kubectl scale {{ item.type }} {{ item.name }} -n {{ item.namespace }} --replicas=0
|
||||
loop: "{{ resources }}"
|
||||
|
||||
- name: Update StorageClass
|
||||
block:
|
||||
- name: Get StorageClass definition
|
||||
command: kubectl get sc cephfs -o yaml
|
||||
register: sc_yaml
|
||||
|
||||
- name: Delete StorageClass
|
||||
command: kubectl delete sc cephfs
|
||||
|
||||
- name: Update StorageClass configuration
|
||||
copy:
|
||||
content: >
|
||||
{{ sc_yaml.stdout | from_yaml
|
||||
| combine({'parameters': { 'kernelMountOptions': 'recover_session=clean' }}, recursive=True)
|
||||
| to_yaml }}
|
||||
dest: "{{ temp_dir.path }}/sc-cephfs.yaml"
|
||||
|
||||
- name: Apply updated StorageClass
|
||||
command: kubectl apply -f {{ temp_dir.path }}/sc-cephfs.yaml
|
||||
when: update_sc
|
||||
|
||||
- name: Iterate over namespaces
|
||||
include_tasks: change_cephfs_pv_pvcs.yml
|
||||
vars:
|
||||
temp_dir_path: "{{ temp_dir.path }}"
|
||||
loop: "{{ namespaces }}"
|
||||
loop_control:
|
||||
loop_var: namespace
|
||||
|
||||
- name: Cleanup
|
||||
block:
|
||||
- debug:
|
||||
msg: Run cleanup
|
||||
always:
|
||||
- name: Scale up resources
|
||||
command: >
|
||||
kubectl scale {{ item.item.type }} {{ item.item.name }}
|
||||
-n {{ item.item.namespace }}
|
||||
--replicas={{ item.stdout }}
|
||||
loop: "{{ replica_by_resource }}"
|
||||
|
||||
- name: Remove temp directory
|
||||
file:
|
||||
path: "{{temp_dir.path }}"
|
||||
state: absent
|
87
playbookconfig/src/playbooks/change_cephfs_pv_pvcs.yml
Normal file
87
playbookconfig/src/playbooks/change_cephfs_pv_pvcs.yml
Normal file
@ -0,0 +1,87 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This playbook provides the capability to set the "kernelMountOptions: recover_session=clean"
|
||||
# parameter in every PVs/PVCs from the given namespace, enabling the cephfs volumes to
|
||||
# remount automatically when there is a client eviction from Ceph mds.
|
||||
#
|
||||
# The playbook is supposed to be called by the 'change_cephfs_mounter_options.yml' playbook.
|
||||
#
|
||||
|
||||
- name: Get PVC list
|
||||
command: kubectl -n {{ namespace }} get pvc -o yaml
|
||||
register: pvc_list_output
|
||||
|
||||
- name: Set PVC definitions
|
||||
set_fact:
|
||||
pvc_definitions: "{{ pvc_list_output.stdout | from_yaml | json_query('items[?spec.storageClassName==`cephfs`]') }}"
|
||||
- name: Set PVC formatted data
|
||||
set_fact:
|
||||
pvcs: "{{ pvc_definitions | json_query('[*].{pvc: metadata.name, pv: spec.volumeName}') }}"
|
||||
|
||||
- name: Save PVC definitions to files
|
||||
copy:
|
||||
content: |
|
||||
{{ item }}
|
||||
dest: "/{{ temp_dir_path }}/{{ item | from_yaml | json_query('metadata.name') }}.yaml"
|
||||
loop: "{{ pvc_definitions | map('to_yaml') }}"
|
||||
|
||||
- name: Get PV definition
|
||||
command: kubectl get pv {{ item.pv }} -n {{ namespace }} -o yaml
|
||||
loop: "{{ pvcs }}"
|
||||
register: pvs_output
|
||||
changed_when: false
|
||||
|
||||
- name: Set PV definitions and reclaim
|
||||
set_fact:
|
||||
pv_definition_list: "{{ pvs_output.results | map(attribute='stdout') }}"
|
||||
patch_json: '{"spec": {"persistentVolumeReclaimPolicy": "Retain"}}'
|
||||
|
||||
- name: Save and update PV definitions to files
|
||||
copy:
|
||||
content: >
|
||||
{{
|
||||
item | from_yaml
|
||||
| combine(
|
||||
{'spec': { 'claimRef': None, 'csi': { 'volumeAttributes': { 'kernelMountOptions': 'recover_session=clean' }}}},
|
||||
recursive=True)
|
||||
| to_yaml
|
||||
}}
|
||||
dest: "{{ temp_dir_path }}/{{ item | from_yaml | json_query('metadata.name') }}.yaml"
|
||||
loop: "{{ pv_definition_list }}"
|
||||
|
||||
- name: Patch PV to retain the volume
|
||||
command: kubectl patch pv {{ item.pv }} -n {{ namespace }} -p {{ patch_json | to_json }}
|
||||
loop: "{{ pvcs }}"
|
||||
register: patch_output
|
||||
ignore_errors: True
|
||||
|
||||
- name: Show error message if cannot continue with PVC
|
||||
fail:
|
||||
msg: "Could not change the reclaim policy. It is not secure to continue the changes for the {{ item.item.pv }}"
|
||||
when: item.rc != 0
|
||||
loop: "{{ patch_output.results }}"
|
||||
ignore_errors: True
|
||||
|
||||
- name: Delete PVCs
|
||||
command: kubectl delete -n {{ namespace }} pvc {{ item.item.pvc }}
|
||||
when: item.rc == 0
|
||||
loop: "{{ patch_output.results }}"
|
||||
|
||||
- name: Delete PVs
|
||||
command: kubectl delete -n {{ namespace }} pv {{ item.item.pv }}
|
||||
when: item.rc == 0
|
||||
loop: "{{ patch_output.results }}"
|
||||
|
||||
- name: Apply updated PVCs
|
||||
command: kubectl apply -n {{ namespace }} -f {{ temp_dir_path }}/{{ item.item.pvc }}.yaml
|
||||
when: item.rc == 0
|
||||
loop: "{{ patch_output.results }}"
|
||||
|
||||
- name: Apply updated PVs
|
||||
command: kubectl apply -n {{ namespace }} -f {{ temp_dir_path }}/{{ item.item.pv }}.yaml
|
||||
when: item.rc == 0
|
||||
loop: "{{ patch_output.results }}"
|
Loading…
Reference in New Issue
Block a user