Merge "Recover subcloud certs after long period offline"
This commit is contained in:
@@ -0,0 +1,43 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# SUB-TASK DESCRIPTION:
|
||||
# These tasks copy files from systemcontroller to subcloud
|
||||
# They allow copying between two directories with root only access
|
||||
#
|
||||
- block:
|
||||
- name: Create tmp file to facilitate copying between root only access dirs
|
||||
tempfile:
|
||||
state: file
|
||||
prefix: tmp_cert_copy_
|
||||
path: /tmp/
|
||||
register: tmp_cert_file
|
||||
|
||||
- name: Copy file from source to temporary location first
|
||||
copy:
|
||||
src: "{{ item.path }}"
|
||||
dest: "{{ tmp_cert_file.path }}"
|
||||
remote_src: true
|
||||
become: yes
|
||||
connection: local
|
||||
|
||||
- name: Copy from temporary location to remote
|
||||
copy:
|
||||
src: "{{ tmp_cert_file.path }}"
|
||||
dest: "{{ item.path }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "{{ item.perms | default(0644) }}"
|
||||
become: yes
|
||||
|
||||
always:
|
||||
- name: Delete temporary file after use
|
||||
file:
|
||||
path: "{{ tmp_cert_file.path }}"
|
||||
state: absent
|
||||
connection: local
|
||||
become: yes
|
||||
failed_when: false
|
||||
@@ -0,0 +1,18 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# ROLE DESCRIPTION:
|
||||
# This role performs certificate recovery for subclouds when offline for
|
||||
# a long period of time
|
||||
#
|
||||
- name: Recover K8s Root CA certificates (K8s Root CA, ETCD CA, FrontProxy CA)
|
||||
import_tasks: recover-k8s-root-cas.yml
|
||||
|
||||
- name: Renew K8s leaf certificates
|
||||
import_tasks: recover-k8s-leaf-certificates.yml
|
||||
|
||||
- name: Recover dc admin endpoint Root CA, subcloud ICA and leaf certificates
|
||||
import_tasks: recover-dc-admin-ep-certificate-chain.yml
|
||||
@@ -0,0 +1,238 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# SUB-TASK DESCRIPTION:
|
||||
# These tasks create a new ICA for the subcloud and update both the
|
||||
# dc-adminep-root-ca and the new ICA in the subcloud to
|
||||
# create a new valid chain
|
||||
#
|
||||
- block:
|
||||
- name: Retrieve DC admin endpoint Root CA from k8s secret
|
||||
command: >-
|
||||
kubectl get secret dc-adminep-root-ca-certificate
|
||||
-n dc-cert -o jsonpath='{.data.ca\.crt}'
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
register: dc_root_ca_cert_result
|
||||
connection: local
|
||||
|
||||
- name: Get software version
|
||||
shell: source /etc/platform/platform.conf; echo $sw_version
|
||||
register: sw_version_result
|
||||
|
||||
- name: Set location of dc-adminep-root-ca.crt certificate on subcloud
|
||||
set_fact:
|
||||
dc_adminep_root_ca_location: >-
|
||||
/opt/platform/config/{{ sw_version_result.stdout }}/dc-adminep-root-ca.crt
|
||||
|
||||
- name: Verify if dc-adminep-root-ca.crt certificate is expired on subcloud
|
||||
command: openssl x509 -in {{ dc_adminep_root_ca_location }} -checkend 0
|
||||
become: yes
|
||||
register: adminep_root_ca_info
|
||||
failed_when: false
|
||||
|
||||
- name: Get dc-adminep-root-ca.crt certificate from subcloud
|
||||
shell: cat {{ dc_adminep_root_ca_location }} | base64 -w0
|
||||
become: yes
|
||||
register: adminep_root_ca_from_subcloud
|
||||
|
||||
- name: Copy dc-adminep-root-ca.crt from system controller to subcloud
|
||||
copy:
|
||||
content: "{{ dc_root_ca_cert_result.stdout | b64decode }}"
|
||||
dest: "{{ dc_adminep_root_ca_location }}"
|
||||
become: yes
|
||||
when: |
|
||||
adminep_root_ca_info.rc != 0 or
|
||||
dc_root_ca_cert_result.stdout != adminep_root_ca_from_subcloud.stdout
|
||||
|
||||
- name: Create tmp file name for current dc-adminep ICA certificate on subcloud
|
||||
tempfile:
|
||||
state: file
|
||||
prefix: tmp_subcloud_ica_subcloud_
|
||||
suffix: .pem
|
||||
path: /tmp/
|
||||
register: tmp_subcloud_ica_subcloud
|
||||
|
||||
- name: Retrieve current dc-adminep ICA cert from k8s secret on subcloud
|
||||
command: >-
|
||||
kubectl get secret sc-adminep-ca-certificate -n sc-cert -o jsonpath='{.data.tls\.crt}'
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
register: current_subcloud_ica_cert_result
|
||||
retries: 3
|
||||
delay: 5
|
||||
until: current_subcloud_ica_cert_result is not failed
|
||||
|
||||
- name: Save current dc-adminep ICA cert to file
|
||||
copy:
|
||||
content: "{{ current_subcloud_ica_cert_result.stdout | b64decode }}"
|
||||
dest: "{{ tmp_subcloud_ica_subcloud.path }}"
|
||||
|
||||
- name: Verify dc-admin-ep certificate chain is correct on subcloud
|
||||
command: >-
|
||||
openssl verify -CAfile
|
||||
{{ dc_adminep_root_ca_location }} {{ tmp_subcloud_ica_subcloud.path }}
|
||||
register: ca_verification
|
||||
failed_when: false
|
||||
become: yes
|
||||
|
||||
- name: Recover the invalid certificate chain for dc-admin-ep certificates on subcloud
|
||||
block:
|
||||
- name: Verify if subcloud ICA certificate exists
|
||||
command: kubectl -n dc-cert get secret {{ name }}-adminep-ca-certificate
|
||||
register: subcloud_ica_cert_exists
|
||||
failed_when: false
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
connection: local
|
||||
|
||||
- name: Create dc-admin-ep ICA certificate for subcloud if missing
|
||||
block:
|
||||
- name: Create tmp file name for subcloud ICA certificate template
|
||||
tempfile:
|
||||
state: file
|
||||
prefix: tmp_subcloud_cert_template_
|
||||
suffix: .j2
|
||||
path: /tmp/
|
||||
register: tmp_subcloud_ica_template
|
||||
connection: local
|
||||
|
||||
- name: Save certificate template to file {{ tmp_subcloud_ica_template.path }}
|
||||
vars:
|
||||
template_content: |
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: {{ name }}-adminep-ca-certificate
|
||||
namespace: dc-cert
|
||||
spec:
|
||||
isCA: true
|
||||
commonName: {{ name }}
|
||||
duration: 8760h0m0s # 1 year
|
||||
issuerRef:
|
||||
kind: Issuer
|
||||
name: dc-adminep-root-ca-issuer
|
||||
renewBefore: 720h0m0s # 30 days
|
||||
secretName: {{ name }}-adminep-ca-certificate
|
||||
copy:
|
||||
dest: "{{ tmp_subcloud_ica_template.path }}"
|
||||
content: "{{ template_content }}"
|
||||
mode: 0640
|
||||
connection: local
|
||||
|
||||
- name: Apply ICA certificate for subcloud in k8s in system controller
|
||||
shell: |
|
||||
kubectl apply -f "{{ tmp_subcloud_ica_template.path }}"
|
||||
kubectl -n dc-cert delete secret {{ name }}-adminep-ca-certificate
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
register: apply_cert_result
|
||||
connection: local
|
||||
retries: 5
|
||||
delay: 5
|
||||
until: apply_cert_result is not failed
|
||||
|
||||
when: subcloud_ica_cert_exists.rc != 0
|
||||
|
||||
- name: Retrieve subcloud ICA certificate data from from k8s secret
|
||||
command: >-
|
||||
kubectl get secret {{ name }}-adminep-ca-certificate
|
||||
-n dc-cert -o jsonpath='{.data.tls\.crt}'
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
register: subcloud_ica_cert_result
|
||||
connection: local
|
||||
retries: 5
|
||||
delay: 5
|
||||
until: subcloud_ica_cert_result is not failed
|
||||
|
||||
- name: Retrieve subcloud ICA certificate key from from k8s secret
|
||||
command: >-
|
||||
kubectl get secret {{ name }}-adminep-ca-certificate
|
||||
-n dc-cert -o jsonpath='{.data.tls\.key}'
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
register: subcloud_ica_key_result
|
||||
connection: local
|
||||
retries: 5
|
||||
delay: 5
|
||||
until: subcloud_ica_key_result is not failed
|
||||
|
||||
- name: Create tmp file name for the ICA certificate
|
||||
tempfile:
|
||||
state: file
|
||||
prefix: tmp_subcloud_cert_crt
|
||||
suffix: .crt
|
||||
path: /tmp/
|
||||
register: tmp_subcloud_ica_crt
|
||||
|
||||
- name: Create tmp file name for the ICA key
|
||||
tempfile:
|
||||
state: file
|
||||
prefix: tmp_subcloud_cert_key
|
||||
suffix: .key
|
||||
path: /tmp/
|
||||
register: tmp_subcloud_ica_key
|
||||
|
||||
- name: Save the ICA certificate to file
|
||||
copy:
|
||||
content: "{{ subcloud_ica_cert_result.stdout | b64decode }}"
|
||||
dest: "{{ tmp_subcloud_ica_crt.path }}"
|
||||
|
||||
- name: Save the ICA key to file
|
||||
copy:
|
||||
content: "{{ subcloud_ica_key_result.stdout | b64decode }}"
|
||||
dest: "{{ tmp_subcloud_ica_key.path }}"
|
||||
|
||||
- name: Update ICA and certificate chain on subcloud
|
||||
shell: |
|
||||
kubectl -n sc-cert delete secret sc-adminep-ca-certificate --ignore-not-found=true
|
||||
kubectl -n sc-cert create secret tls sc-adminep-ca-certificate \
|
||||
--cert "{{ tmp_subcloud_ica_crt.path }}" --key "{{ tmp_subcloud_ica_key.path }}"
|
||||
kubectl -n sc-cert delete secret sc-adminep-certificate --ignore-not-found=true
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
register: subcloud_ica_update_result
|
||||
retries: 5
|
||||
delay: 5
|
||||
until: subcloud_ica_update_result is not failed
|
||||
|
||||
- name: Wait until management affecting alarms clear up
|
||||
shell: source /etc/platform/openrc; fm alarm-list --mgmt_affecting | grep -c True
|
||||
register: mgmt_affecting_present
|
||||
failed_when: |
|
||||
mgmt_affecting_present.stdout | int != 0 or mgmt_affecting_present.stderr | length > 0
|
||||
# Up to 10 mins to have some buffer, but usually takes ~ 3 mins.
|
||||
retries: 30
|
||||
delay: 20
|
||||
until:
|
||||
- mgmt_affecting_present.stdout | int == 0
|
||||
- mgmt_affecting_present.stderr | length == 0
|
||||
|
||||
- name: Set fact to mark that DC admin endpoint certificate chain recovery was performed
|
||||
set_fact:
|
||||
subcloud_dc_admin_ep_cert_chain_recovered: true
|
||||
|
||||
when: ca_verification.rc != 0
|
||||
|
||||
always:
|
||||
- block:
|
||||
- name: Delete temporary file on systemcontroller
|
||||
file:
|
||||
path: "{{ tmp_subcloud_ica_template.path }}"
|
||||
state: absent
|
||||
connection: local
|
||||
when: tmp_subcloud_ica_template.path is defined
|
||||
|
||||
- name: Delete temporary files on subcloud
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- "{{ tmp_subcloud_ica_crt.path if tmp_subcloud_ica_crt.path is defined }}"
|
||||
- "{{ tmp_subcloud_ica_key.path if tmp_subcloud_ica_key.path is defined }}"
|
||||
- "{{ tmp_subcloud_ica_subcloud.path if tmp_subcloud_ica_subcloud.path is defined }}"
|
||||
@@ -0,0 +1,85 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# SUB-TASK DESCRIPTION:
|
||||
# These tasks recover the kubernetes leaf certificates after expiry
|
||||
# and verify the k8s cluster is healthy afterwards
|
||||
#
|
||||
- name: Verify k8s leaf certificates expiration
|
||||
shell: |
|
||||
K8_OUT=$(kubeadm certs check-expiration --config /etc/kubernetes/kubeadm.yaml)
|
||||
if [ "$?" -eq "0" ]; then
|
||||
echo "$K8_OUT" | grep invalid
|
||||
# if no matches for 'invalid', it means no k8s leaf certificate is expired
|
||||
# proceed to check the kubelet certificate
|
||||
if [ "$?" -ne "0" ]; then
|
||||
# checkend will return 0 if valid and 1 for invalid
|
||||
openssl x509 -in /var/lib/kubelet/pki/kubelet-client-current.pem -checkend 0
|
||||
exit $?
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
register: k8s_certs_expiration
|
||||
failed_when: false
|
||||
become: yes
|
||||
|
||||
- name: Recover K8s cluster after certificate expiration
|
||||
block:
|
||||
- name: Recover k8s controller plane leaf certificates
|
||||
command: bash -x /usr/bin/kube-cert-rotation.sh
|
||||
register: kube_cert_rotation_out
|
||||
become: yes
|
||||
retries: 5
|
||||
delay: 10
|
||||
until: kube_cert_rotation_out is not failed
|
||||
|
||||
- name: Recover kubelet certificates
|
||||
command: bash -x /usr/bin/kube-expired-kubelet-cert-recovery.sh
|
||||
register: kubelet_cert_rotation_out
|
||||
become: yes
|
||||
retries: 5
|
||||
delay: 10
|
||||
until: kubelet_cert_rotation_out is not failed
|
||||
|
||||
- name: Wait till kubectl starts replying
|
||||
command: kubectl get nodes
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
register: k8s_get_nodes
|
||||
# Waits up to 2 minutes, but in a freshly installed system, it takes ~ 20secs
|
||||
retries: 12
|
||||
delay: 10
|
||||
until: k8s_get_nodes is not failed
|
||||
|
||||
- name: Trigger a restart of every pod (deployment,statefulset,daemonset rollout)
|
||||
shell: >-
|
||||
kubectl get deployment,statefulset,daemonset -A --no-headers |
|
||||
awk '{ print " rollout restart " $2 " -n " $1}' | xargs -n5 kubectl
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
|
||||
- name: Wait pods to restart (become READY)
|
||||
shell: >-
|
||||
kubectl get pods -l '!job-name' -A --no-headers \
|
||||
-o 'custom-columns=NAME:.metadata.name,READY:.status.containerStatuses[*].ready' \
|
||||
| grep -cv true
|
||||
environment:
|
||||
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||
register: pods_starting
|
||||
failed_when: pods_starting.stdout | int != 0
|
||||
# Waits for up to one hour. This is the same value used in k8s-rootca-update
|
||||
# A large number is used because the recovery time depends on the number of pods running.
|
||||
# In a freshly installed system, without additional pods, it takes ~ 1min 10secs
|
||||
retries: 360
|
||||
delay: 10
|
||||
until: pods_starting.stdout | int == 0
|
||||
|
||||
- name: Set fact to mark that K8s leaf certificates recovery was performed
|
||||
set_fact:
|
||||
subcloud_k8s_leaf_certs_recovered: true
|
||||
|
||||
when: k8s_certs_expiration.rc != 0
|
||||
@@ -0,0 +1,55 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# SUB-TASK DESCRIPTION:
|
||||
# These tasks recover the K8s Root CA certificates (K8s Root CA, ETCD CA, FrontProxy CA)
|
||||
#
|
||||
- name: Verify if Kubernetes Root CA is expired
|
||||
community.crypto.x509_certificate_info:
|
||||
path: /etc/kubernetes/pki/ca.crt
|
||||
register: k8s_root_ca_info
|
||||
become: yes
|
||||
|
||||
- name: Verify if ETCD Root CA is expired
|
||||
community.crypto.x509_certificate_info:
|
||||
path: /etc/etcd/ca.crt
|
||||
register: etcd_root_ca_info
|
||||
become: yes
|
||||
|
||||
- name: Verify if FrontProxy Root CA is expired
|
||||
community.crypto.x509_certificate_info:
|
||||
path: /etc/kubernetes/pki/front-proxy-ca.crt
|
||||
register: frontproxy_ca_info
|
||||
become: yes
|
||||
|
||||
- name: Copy K8s Root CA cert and key from systemcontroller to the subcloud
|
||||
include_tasks: copy-file-from-local-to-remote.yml
|
||||
loop:
|
||||
- { path: /etc/kubernetes/pki/ca.crt }
|
||||
- { path: /etc/kubernetes/pki/ca.key }
|
||||
when: k8s_root_ca_info.expired
|
||||
|
||||
- name: Copy ETCD Root CA cert and key from systemcontroller to the subcloud
|
||||
include_tasks: copy-file-from-local-to-remote.yml
|
||||
loop:
|
||||
- { path: /etc/etcd/ca.crt, perms: '0755' }
|
||||
- { path: /etc/etcd/ca.key }
|
||||
when: etcd_root_ca_info.expired
|
||||
|
||||
- name: Copy FrontProxy Root CA cert and key from systemcontroller to the subcloud
|
||||
include_tasks: copy-file-from-local-to-remote.yml
|
||||
loop:
|
||||
- { path: /etc/kubernetes/pki/front-proxy-ca.crt }
|
||||
- { path: /etc/kubernetes/pki/front-proxy-ca.key }
|
||||
when: frontproxy_ca_info.expired
|
||||
|
||||
- name: Set fact to mark that K8s Root CAs recovery was performed
|
||||
set_fact:
|
||||
subcloud_k8s_root_ca_recovered: true
|
||||
when:
|
||||
- k8s_root_ca_info.expired
|
||||
- etcd_root_ca_info.expired
|
||||
- frontproxy_ca_info.expired
|
||||
Reference in New Issue
Block a user