Fix cert rotation cron job not renewing conf files
The kube certificate rotation cron job doesn't update admin.conf, scheduler.conf, controller-manager.conf as expected. This update fixed this issue and made several enhancements: - check the expiry date for each of the kubernetes certificates to be renewed by "kubeadm alpha certs check-expiration" - update the conf files by "kubeadm alpha renew", consistent with renewing the cert files. - restart sysinv-conductor, cert-mon after admin.conf is renewed since they use admin.conf for authentication. - specify absolute path for fmClientCli command. - re-struct the code with functions. - added checking/renewal of 3 etcd certificates. Change-Id: I8b2ff1b02651600f3a837e9f8a61ad50601ace9d Closes-Bug: 1937288 Signed-off-by: Andy Ning <andy.ning@windriver.com>
This commit is contained in:
parent
d1186f36a5
commit
a670080e07
@ -3,150 +3,354 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Copyright (C) 2019 Intel Corporation
|
||||
# Copyright (c) 2021 Wind River Systems, Inc.
|
||||
#
|
||||
|
||||
#
|
||||
# This script is to rotate kubernetes cluster certificates automatically
|
||||
#
|
||||
|
||||
# Renew certificates 15 days before expiration
|
||||
declare -r CUTOFF_DAYS=15
|
||||
declare -r CUTOFF_DAYS_S=$((${CUTOFF_DAYS}*24*3600))
|
||||
|
||||
# Temporary working directory
|
||||
TEMP_WORK_DIR="/tmp/kube_cert_rotation"
|
||||
|
||||
# Expiration date of k8s certs
|
||||
CERT_LASTDATE=$(openssl x509 -in /etc/kubernetes/pki/apiserver.crt -text | grep 'Not After' | awk -F ' : ' '{print $2}')
|
||||
CERT_EXP_DATES=$(kubeadm alpha certs check-expiration)
|
||||
|
||||
if [ "x${CERT_LASTDATE}" != "x" ]; then
|
||||
CERT_LASTDATE_S=$(date -d "${CERT_LASTDATE}" +%s)
|
||||
CURRENT_DATE_S=$(date +%s)
|
||||
DAY_LEFT_S=$((${CERT_LASTDATE_S}-${CURRENT_DATE_S}))
|
||||
fi
|
||||
# Time left in seconds for a cert
|
||||
time_left_s() {
|
||||
local time_left_s=""
|
||||
local exp_date=""
|
||||
exp_date=$(echo "${CERT_EXP_DATES}" | grep "$1" | grep -oE '[a-zA-Z]{3} [0-3][0-9], [0-9]{4} ([0-1][0-9]|2[0-3]):[0-5][0-9] UTC')
|
||||
if [ "x${exp_date}" != "x" ]; then
|
||||
exp_date_s=$(date -d "${exp_date}" +%s)
|
||||
current_date_s=$(date +%s)
|
||||
time_left_s=$((${exp_date_s}-${current_date_s}))
|
||||
fi
|
||||
echo ${time_left_s}
|
||||
}
|
||||
|
||||
# Retrieve a certiticate's valid time by openssl
|
||||
time_left_s_by_openssl() {
|
||||
local time_left_s=""
|
||||
local exp_date=""
|
||||
exp_date=$(openssl x509 -in "$1" -enddate -noout| awk -F"=" '{print $2}')
|
||||
if [ "x${exp_date}" != "x" ]; then
|
||||
exp_date_s=$(date -d "${exp_date}" +%s)
|
||||
current_date_s=$(date +%s)
|
||||
time_left_s=$((${exp_date_s}-${current_date_s}))
|
||||
fi
|
||||
echo ${time_left_s}
|
||||
}
|
||||
|
||||
# Renew kubernetes certificates
|
||||
# return value:
|
||||
# 0: renewed successfully
|
||||
# 255: no need to renew
|
||||
# 1: renewal failed
|
||||
renew_cert() {
|
||||
local ret=0
|
||||
local time_left_s=""
|
||||
time_left_s=$(time_left_s "$1")
|
||||
if [ "x${time_left_s}" != "x" ]; then
|
||||
if [ ${time_left_s} -lt ${CUTOFF_DAYS_S} ]; then
|
||||
kubeadm alpha certs renew $1
|
||||
if [ $? -ne 0 ]; then
|
||||
ret=1
|
||||
fi
|
||||
else
|
||||
ret=255
|
||||
fi
|
||||
else
|
||||
ret=1
|
||||
fi
|
||||
return ${ret}
|
||||
}
|
||||
|
||||
# Renew certificate using openssl
|
||||
# return value:
|
||||
# 0: renewed successfully
|
||||
# 255: no need to renew
|
||||
# 1: renewal failed
|
||||
renew_cert_by_openssl() {
|
||||
local ret=0
|
||||
local time_left_s=""
|
||||
time_left_s=$(time_left_s_by_openssl "$1/$2.crt")
|
||||
if [ "x${time_left_s}" != "x" ]; then
|
||||
if [ ${time_left_s} -lt ${CUTOFF_DAYS_S} ]; then
|
||||
# Create csr config file
|
||||
echo "$3" > "${TEMP_WORK_DIR}/$2_csr.conf"
|
||||
if [ $? -ne 0 ]; then
|
||||
ret=1
|
||||
fi
|
||||
# generate private key
|
||||
if [ $ret -eq 0 ]; then
|
||||
openssl genpkey -out "${TEMP_WORK_DIR}/$2.key" -algorithm RSA -pkeyopt rsa_keygen_bits:4096
|
||||
if [ $? -ne 0 ]; then
|
||||
ret=1
|
||||
fi
|
||||
fi
|
||||
# generate CSR
|
||||
if [ $ret -eq 0 ]; then
|
||||
openssl req -new -key "${TEMP_WORK_DIR}/$2.key" -out "${TEMP_WORK_DIR}/$2.csr" -config "${TEMP_WORK_DIR}/$2_csr.conf"
|
||||
if [ $? -ne 0 ]; then
|
||||
ret=1
|
||||
fi
|
||||
fi
|
||||
# generate certificate
|
||||
if [ $ret -eq 0 ]; then
|
||||
openssl x509 -req -in "${TEMP_WORK_DIR}/$2.csr" -CA /etc/etcd/ca.crt -CAkey /etc/etcd/ca.key -CAcreateserial \
|
||||
-out "${TEMP_WORK_DIR}/$2.crt" -days 365 -extensions v3_req -extfile "${TEMP_WORK_DIR}/$2_csr.conf"
|
||||
if [ $? -ne 0 ]; then
|
||||
ret=1
|
||||
fi
|
||||
fi
|
||||
# replace the existing cert file
|
||||
if [ $ret -eq 0 ]; then
|
||||
mv "${TEMP_WORK_DIR}/$2.crt" "$1/$2.crt"
|
||||
if [ $? -ne 0 ]; then
|
||||
ret=1
|
||||
fi
|
||||
fi
|
||||
# replace the existing key file
|
||||
if [ $ret -eq 0 ]; then
|
||||
mv "${TEMP_WORK_DIR}/$2.key" "$1/$2.key"
|
||||
if [ $? -ne 0 ]; then
|
||||
ret=1
|
||||
fi
|
||||
fi
|
||||
else
|
||||
ret=255
|
||||
fi
|
||||
else
|
||||
ret=1
|
||||
fi
|
||||
return ${ret}
|
||||
}
|
||||
|
||||
# Get cluster host floating IP address
|
||||
get_cluster_host_floating_ip() {
|
||||
local floating_ip=""
|
||||
floating_ip=$(cat /etc/kubernetes/admin.conf | grep "server:" | awk -F"//" '{print $2}' | tr -d "[]" | sed -e s/:6443//)
|
||||
echo ${floating_ip}
|
||||
}
|
||||
|
||||
# Renew certificates 90 days before expiration
|
||||
ERR=0
|
||||
declare -r NINETY_DAYS_S=$((90*24*3600))
|
||||
if [ ${DAY_LEFT_S} -lt ${NINETY_DAYS_S} ]; then
|
||||
# Same expiration date of apiserver, apiserver-kubelet-client and front-proxy-client
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
kubeadm alpha certs renew apiserver
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
RESTART_APISERVER=0
|
||||
RESTART_CONTROLLER_MANAGER=0
|
||||
RESTART_SCHEDULER=0
|
||||
RESTART_SYSINV=0
|
||||
RESTART_CERT_MON=0
|
||||
RESTART_ETCD=0
|
||||
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
kubeadm alpha certs renew apiserver-kubelet-client
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
kubeadm alpha certs renew front-proxy-client
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Update cluster configuration files using the renewed certificates
|
||||
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
ADVERTISE_ADDR=$(kubectl get endpoints kubernetes -o jsonpath='{.subsets[0].addresses[0].ip}')
|
||||
else
|
||||
ADVERTISE_ADDR=""
|
||||
fi
|
||||
|
||||
if [ "x${ADVERTISE_ADDR}" != "x" ]; then
|
||||
# Update admin.conf
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
kubeadm alpha kubeconfig user --client-name=kubernetes-admin --apiserver-advertise-address=${ADVERTISE_ADDR} --org system:masters > /tmp/admin.conf
|
||||
if [ $? -eq 0 ]; then
|
||||
mv /tmp/admin.conf /etc/kubernetes/admin.conf
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
else
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Update controller-manager.conf
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
kubeadm alpha kubeconfig user --client-name=system:kube-controller-manager --apiserver-advertise-address=${ADVERTISE_ADDR} --cert-dir /etc/kubernetes/pki/ > /tmp/controller-manager.conf
|
||||
if [ $? -eq 0 ]; then
|
||||
mv /tmp/controller-manager.conf /etc/kubernetes/controller-manager.conf
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
else
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Update scheduler.conf
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
kubeadm alpha kubeconfig user --client-name=system:kube-scheduler --apiserver-advertise-address=${ADVERTISE_ADDR} --cert-dir /etc/kubernetes/pki/ > /tmp/scheduler.conf
|
||||
if [ $? -eq 0 ]; then
|
||||
mv /tmp/scheduler.conf /etc/kubernetes/scheduler.conf
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
else
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Update kubelet.conf
|
||||
# This block could be removed once this issue is resolved. https://github.com/kubernetes/kubeadm/issues/1753
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
kubeadm alpha kubeconfig user --client-name=system:node:${HOSTNAME} --apiserver-advertise-address=${ADVERTISE_ADDR} --org system:nodes > /tmp/kubelet.conf
|
||||
if [ $? -eq 0 ]; then
|
||||
mv /tmp/kubelet.conf /etc/kubernetes/kubelet.conf
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
else
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
else
|
||||
# step 1, renew kubernetes certificates
|
||||
# Renew apiserver certificate
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
# The extra space in 'apiserver ' is to distinguish other names with apiserver in them.
|
||||
renew_cert 'apiserver '
|
||||
result=$?
|
||||
if [ ${result} -eq 0 ]; then
|
||||
RESTART_APISERVER=1
|
||||
elif [ ${result} -eq 1 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
|
||||
# Restart the containers of k8s components to refresh the configurations within container
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
crictl ps | awk '/kube-apiserver/{print$1}' | xargs crictl stop > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
crictl ps | awk '/kube-controller-manager/{print$1}' | xargs crictl stop > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
crictl ps | awk '/kube-scheduler/{print$1}' | xargs crictl stop > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
systemctl daemon-reload
|
||||
systemctl restart kubelet
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${ERR} -eq 2 ]; then
|
||||
# Notify admin to lock and unlock this master node if restart k8s components failed
|
||||
fmClientCli -c "### ###250.003###set###host###host=${HOSTNAME}### ###major###Kubernetes certificates on host ${HOSTNAME} have been renewed but not updated.###operational-violation### ###Lock and unlock host ${HOSTNAME} to update config.### ### ###"
|
||||
elif [ ${ERR} -eq 1 ]; then
|
||||
# Notify admin to rotate kube cert manually if cert renew or config failed
|
||||
fmClientCli -c "### ###250.003###set###host###host=${HOSTNAME}### ###major###Kubernetes certificates automatic rotation failed on host ${HOSTNAME}###operational-violation### ###Rotate kubernetes certificates manually, lock and unlock host ${HOSTNAME} to update config.### ### ###"
|
||||
else
|
||||
# Clear the alarm if cert rotation completed
|
||||
fmClientCli -d "###250.003###host=${HOSTNAME}###"
|
||||
fi
|
||||
# Renew apiserver kubelet client certificate
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
renew_cert 'apiserver-kubelet-client'
|
||||
result=$?
|
||||
if [ ${result} -eq 0 ]; then
|
||||
RESTART_APISERVER=1
|
||||
elif [ ${result} -eq 1 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
# Renew front proxy client certificate
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
renew_cert 'front-proxy-client'
|
||||
if [ $? -eq 1 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
# Renew certs in admin.conf
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
renew_cert 'admin.conf'
|
||||
result=$?
|
||||
if [ ${result} -eq 0 ]; then
|
||||
RESTART_SYSINV=1
|
||||
RESTART_CERT_MON=1
|
||||
elif [ ${result} -eq 1 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
# Renew certs in controller-manager.conf
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
renew_cert 'controller-manager.conf'
|
||||
result=$?
|
||||
if [ ${result} -eq 0 ]; then
|
||||
RESTART_CONTROLLER_MANAGER=1
|
||||
elif [ ${result} -eq 1 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
# Renew certs in scheduler.conf
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
renew_cert 'scheduler.conf'
|
||||
result=$?
|
||||
if [ ${result} -eq 0 ]; then
|
||||
RESTART_SCHEDULER=1
|
||||
elif [ ${result} -eq 1 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create temporary working directory
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
mkdir -p ${TEMP_WORK_DIR}
|
||||
chmod 0600 ${TEMP_WORK_DIR}
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Get cluster host floating IP address
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
floating_ip=$(get_cluster_host_floating_ip)
|
||||
if [ "x${floating_ip}" == "x" ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Renew apiserver-etcd-client certificate
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
config="
|
||||
[req]
|
||||
prompt = no
|
||||
x509_extensions = v3_req
|
||||
distinguished_name = dn
|
||||
[dn]
|
||||
CN = apiserver-etcd-client
|
||||
[v3_req]
|
||||
keyUsage = critical, Digital Signature, Key Encipherment
|
||||
extendedKeyUsage = TLS Web Server Authentication, TLS Web Client Authentication
|
||||
subjectAltName = @alt_names
|
||||
[alt_names]
|
||||
IP.1 = ${floating_ip}
|
||||
IP.2 = 127.0.0.1
|
||||
"
|
||||
renew_cert_by_openssl "/etc/kubernetes/pki/" "apiserver-etcd-client" "${config}"
|
||||
result=$?
|
||||
if [ ${result} -eq 0 ]; then
|
||||
RESTART_APISERVER=1
|
||||
elif [ ${result} -eq 1 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
# Renew etcd-server certificate
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
config="
|
||||
[req]
|
||||
prompt = no
|
||||
x509_extensions = v3_req
|
||||
distinguished_name = dn
|
||||
[dn]
|
||||
CN = etcd-server
|
||||
[v3_req]
|
||||
keyUsage = critical, Digital Signature, Key Encipherment
|
||||
extendedKeyUsage = TLS Web Server Authentication, TLS Web Client Authentication
|
||||
subjectAltName = @alt_names
|
||||
[alt_names]
|
||||
IP.1 = ${floating_ip}
|
||||
IP.2 = 127.0.0.1
|
||||
"
|
||||
renew_cert_by_openssl "/etc/etcd/" "etcd-server" "${config}"
|
||||
result=$?
|
||||
if [ ${result} -eq 0 ]; then
|
||||
RESTART_ETCD=1
|
||||
elif [ ${result} -eq 1 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
# Renew etcd-client certificate
|
||||
if [ ${ERR} -eq 0 ]; then
|
||||
config="
|
||||
[req]
|
||||
prompt = no
|
||||
x509_extensions = v3_req
|
||||
distinguished_name = dn
|
||||
[dn]
|
||||
CN = root
|
||||
[v3_req]
|
||||
keyUsage = critical, Digital Signature, Key Encipherment
|
||||
extendedKeyUsage = TLS Web Server Authentication, TLS Web Client Authentication
|
||||
subjectAltName = @alt_names
|
||||
[alt_names]
|
||||
DNS.1 = root
|
||||
"
|
||||
renew_cert_by_openssl "/etc/etcd/" "etcd-client" "${config}"
|
||||
result=$?
|
||||
if [ ${result} -eq 1 ]; then
|
||||
ERR=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove temporary working directory
|
||||
rm -rf ${TEMP_WORK_DIR}
|
||||
|
||||
# step 2, restart affected kubernetes components and system services
|
||||
# Restart apiserver
|
||||
if [ ${RESTART_APISERVER} -eq 1 ]; then
|
||||
crictl ps | awk '/kube-apiserver/{print$1}' | xargs crictl stop > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
# Restart controller-manager
|
||||
if [ ${RESTART_CONTROLLER_MANAGER} -eq 1 ]; then
|
||||
crictl ps | awk '/kube-controller-manager/{print$1}' | xargs crictl stop > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
# Restart scheduler
|
||||
if [ ${RESTART_SCHEDULER} -eq 1 ]; then
|
||||
crictl ps | awk '/kube-scheduler/{print$1}' | xargs crictl stop > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
# Restart sysinv-conductor since it's using credentials from admin.conf
|
||||
if [ ${RESTART_SYSINV} -eq 1 ]; then
|
||||
sm-restart-safe service sysinv-conductor
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
# Restart cert-mon since it's using credentials from admin.conf
|
||||
if [ ${RESTART_CERT_MON} -eq 1 ]; then
|
||||
sm-restart-safe service cert-mon
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
# Restart etcd server
|
||||
if [ ${RESTART_ETCD} -eq 1 ]; then
|
||||
sm-restart-safe service etcd
|
||||
if [ $? -ne 0 ]; then
|
||||
ERR=2
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${ERR} -eq 2 ]; then
|
||||
# Notify admin to lock and unlock this master node if restart k8s components failed
|
||||
/usr/local/bin/fmClientCli -c "### ###250.003###set###host###host=${HOSTNAME}### ###major###Kubernetes certificates have been renewed but not all services have been updated.###operational-violation### ###Lock and unlock the host to update services with new certificates (Manually renew kubernetes certificates first if renewal failed).### ### ###"
|
||||
elif [ ${ERR} -eq 1 ]; then
|
||||
# Notify admin to renew kube cert manually and restart services by lock/unlock if cert renew or config failed
|
||||
/usr/local/bin/fmClientCli -c "### ###250.003###set###host###host=${HOSTNAME}### ###major###Kubernetes certificates renewal failed.###operational-violation### ###Lock and unlock the host to update services with new certificates (Manually renew kubernetes certificates first if renewal failed).### ### ###"
|
||||
else
|
||||
# Clear the alarm if cert rotation completed
|
||||
/usr/local/bin/fmClientCli -d "###250.003###host=${HOSTNAME}###"
|
||||
fi
|
||||
|
Loading…
Reference in New Issue
Block a user