#!/bin/bash # # Copyright (c) 2020-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # # # chkconfig: 2345 76 25 # ### BEGIN INIT INFO # Provides: k8s-pod-recovery # Default-Start: 3 5 # Required-Start: # Required-Stop: # Default-Stop: 0 1 2 6 # Short-Description: Service to recovery pods after host boot ### END INIT INFO . /etc/platform/platform.conf export PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/usr/local/sbin export KUBECONFIG=/etc/kubernetes/admin.conf CONF_DIR=/etc/k8s-post-recovery.d SLEEP_DELAY_SEC=15 NAME=$(basename $0) PIDFILE=/var/run/${NAME}.pid HOST=$(hostname) # Log info message to /var/log/daemon.log function LOG { logger -p daemon.info -t "${NAME}($$): " "$@" } # Log error message to /var/log/daemon.log function ERROR { logger -p daemon.error -t "${NAME}($$): " "$@" } function _check_for_k8s_config { # If this node has not been configured, then there is nothing to recovery if [ ! -f ${KUBECONFIG} ]; then LOG "${KUBECONFIG} does not exist. No pods to recover." exit 0 fi } function _check_for_existing_process { # Abort if another instantiation is already running if [ -e ${PIDFILE} ]; then PID=$(cat ${PIDFILE}) PROCESS=$(cat /proc/${PID}/comm) if [ -n "${PID}" -a -e /proc/${PID} -a ${PROCESS} == ${NAME} ]; then ERROR "Aborting, ${PID} already running: ${PIDFILE}." exit 1 else OUT=$(rm -v -f ${PIDFILE}) LOG "${OUT}" fi fi # Create pidfile to indicate the script is running echo $$ > ${PIDFILE} } function _wait_for_systemd { while true; do if systemctl is-system-running | grep -q -e running -e degraded; then break fi LOG "Waiting for systemd to finish booting..." sleep ${SLEEP_DELAY_SEC} done } function _do_cni_cache_cleanup { # Cleanup any stale CNI cache files (not associated with any running pod) # that are older than 1 hour old LOG "Starting CNI cache cleanup..." k8s-cni-cache-cleanup -o 1 -d if [[ ${?} -ne 0 ]]; then ERROR "Failed to run CNI cache cleanup." fi } function _wait_for_pod_stabilization { local extra_args=$1 local time_between_polls=$2 local stable_cycles=$3 last_count=0 stability_count=0 while [[ $stability_count -lt $stable_cycles ]] ; do pods_in_flux=$(KUBECONFIG=/etc/kubernetes/admin.conf kubectl get pods --no-headers --all-namespaces $extra_args | grep -v -e Running -e Completed | wc -l) if [[ $pods_in_flux -ne $last_count ]]; then LOG "Waiting on pod transitions to stabilize... $pods_in_flux pods are not Running/Completed" last_count=$pods_in_flux stability_count=0 else LOG "Pods transitions are stable... for $((stability_count*time_between_polls)) seconds." stability_count=$((stability_count+1)) fi sleep $time_between_polls done } function _unknown_pods { # $1: actions # Target specific namespaces and pods on this host SUPPORTED_NAMESPACES=('armada' 'openstack' 'monitor') shopt -s nullglob for conf_file in ${CONF_DIR}/*.conf; do grep -q '^namespace=' $conf_file || continue SUPPORTED_NAMESPACES+=($(grep '^namespace=' $conf_file | awk -F '=' '{print $2}')) done if [ "$1" == 'recover' ]; then # Recovers pods that are: Running/Unknown and Pending/Init:Unknown for ns in ${SUPPORTED_NAMESPACES[@]}; do PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /Unknown/'{print $1}') for pod in $PODS ; do LOG "Unknown pods: Recovering: $ns/$pod" kubectl delete pods -n $ns $pod --wait=false done done elif [ "$1" == 'verify' ]; then for ns in ${SUPPORTED_NAMESPACES[@]}; do PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /Unknown/'{print $1}') if [ -z "${PODS}" ]; then LOG "Unknown pods: None present for namespace: $ns" else ERROR "Unknown pods: still present for namespace: $ns" fi done else ERROR "Unknown action: $1" fi } function _outofhugepages_pods { # $1: actions # Target all namespaces and pods on this host NAMESPACES=$(kubectl get ns | tail -n +2 | awk '{ print $1 }') if [ "$1" == 'recover' ]; then # Recovers pods that are: Running/OutOfhugepages for ns in ${NAMESPACES[@]}; do PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /OutOfhugepages/'{print $1}') for pod in $PODS ; do LOG "OutOfhugepages pods: Recovering: $ns/$pod" kubectl delete pods -n $ns $pod --wait=false done done elif [ "$1" == 'verify' ]; then for ns in ${NAMESPACES[@]}; do PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /OutOfhugepages/'{print $1}') if [ -z "${PODS}" ]; then LOG "OutOfhugepages pods: None present for namespace: $ns" else ERROR "OutOfhugepages pods: still present for namespace: $ns" fi done else ERROR "Unknown action: $1" fi } function _node_affinity_pods { # $1: actions if [ "$1" == 'recover' ]; then PODS=$(kubectl get pods --all-namespaces --field-selector status.phase=Failed,spec.nodeName=${HOST} 2>/dev/null | awk /NodeAffinity/'{print $1"/"$2}') for pod in $PODS ; do LOG "NodeAffinity pods: Recovering: $pod" kubectl delete pods -n ${pod//// } --wait=false done elif [ "$1" == 'verify' ]; then PODS=$(kubectl get pods --all-namespaces --field-selector status.phase=Failed,spec.nodeName=${HOST} 2>/dev/null | awk /NodeAffnity/'{print $1"/"$2}') if [ -z "${PODS}" ]; then LOG "NodeAffinity pods: None present." else ERROR "NodeAffinity pods: still present" fi else ERROR "Unknown action: $1" fi } function _labeled_pods { # $1: actions if [ "$1" == 'recover' ]; then POLLING_INTERVAL=5 STABILITY_COUNT=6 _wait_for_pod_stabilization "--selector=restart-on-reboot=true --field-selector=spec.nodeName=${HOST}" $POLLING_INTERVAL $STABILITY_COUNT PODS=$(kubectl get pods --all-namespaces --no-headers --field-selector=spec.nodeName=${HOST} --selector=restart-on-reboot=true 2>/dev/null | awk '{print $1"/"$2}') # Don't have to restart device-plugin if no labeled pods are present. System may not be configured for SRIOV. if [ ! -z "${PODS}" ]; then LOG "Waiting for SRIOV device plugin pod to become available" # Check if device-plugin is ready, but do not wait kubectl wait pods -n kube-system --selector=app=sriovdp --field-selector=spec.nodeName=${HOST} --for=condition=Ready --timeout=0s # If device plugin is not ready, restart it and wait if [ "$?" -ne 0 ]; then kubectl delete pods -n kube-system --selector=app=sriovdp --field-selector=spec.nodeName=${HOST} --wait=false kubectl wait pods -n kube-system --selector=app=sriovdp --field-selector=spec.nodeName=${HOST} --for=condition=Ready --timeout=360s if [ "$?" -ne 0 ]; then ERROR "SRIOV device plugin timed out on ready wait. Continuing anyway. SRIOV pods may not recover." fi fi fi # Delete pods with the restart-on-reboot=true label for pod in $PODS; do LOG "restart-on-reboot labeled pods: Recovering: ${pod//// }" kubectl delete pods -n ${pod//// } --wait=false done elif [ "$1" == 'verify' ]; then PODS=$(kubectl get pods --all-namespaces --no-headers --field-selector=spec.nodeName=${HOST} --selector=restart-on-reboot=true 2>/dev/null | awk '{print $1"/"$2}') for pod in $PODS; do LOG "restart-on-reboot labeled pods: Verifying: ${pod//// }" STATUS=$(kubectl get pod --no-headers -n ${pod//// } 2>/dev/null | awk '{print $3}') if [[ "${STATUS}" != "Running" ]]; then ERROR "$pod: not recovered: $STATUS" else LOG "$pod: recovered" fi done else ERROR "Unknown action: $1" fi } function _force_reset_pods { # $1: actions # Handle resetting openstack libvirt pod as it sometimes is in a Running but # unusable state if kubectl get namespace openstack > /dev/null 2>&1; then # Get the libvirt pods on this host that are Running without all # conditions True # # Conditions: # Initialized True # Ready True # ContainersReady True # PodScheduled True # # NAME STATUS CONDITIONS NODE # libvirt-libvirt-controller-0-937646f6-xst4r Running True,True,True,True controller-0 # CUSTOM_COLUMNS='custom-columns=NAME:.metadata.name,STATUS:status.phase,CONDITIONS:status.conditions[*].status,NODE:spec.nodeName' FIELD_SELECTOR="spec.nodeName=${HOST}" PODS=$(kubectl get pods -n openstack -l application=libvirt --field-selector ${FIELD_SELECTOR} -o ${CUSTOM_COLUMNS} | grep -v NAME | grep -v 'True,True,True,True' | awk '{print $1}') if [ "$1" == 'recover' ]; then for pod in $PODS ; do LOG "Recovering libvirt pod: $pod" kubectl delete pods -n openstack $pod --wait=false done elif [ "$1" == 'verify' ]; then if [ -z "${PODS}" ]; then LOG "Openstack libvirt pod on ${HOST} is running." else ERROR "Openstack libvirt pod on ${HOST} has not been recovered." fi else ERROR "Unknown action: $1" fi fi } function _examine_pods { # $1: actions # Manage labeled pods first _labeled_pods $1 # Wait for pods transitions to stop _wait_for_pod_stabilization "" $SLEEP_DELAY_SEC 6 # Check for recovery actions _unknown_pods $1 _node_affinity_pods $1 _force_reset_pods $1 _outofhugepages_pods $1 } function start { _check_for_k8s_config _check_for_existing_process LOG "Starting." _wait_for_systemd _examine_pods 'recover' _examine_pods 'verify' _do_cni_cache_cleanup } function stop { LOG "Stopping." } function status { : } function reset { : } case "$1" in start) start ;; stop) stop ;; restart|force-reload|reload) stop start ;; status) status ;; reset) reset ;; *) echo "Usage: $0 {start|stop|force-reload|restart|reload|status|reset}" exit 1 ;; esac exit 0