Merge "Add support for kube-upgrade-abort"

This commit is contained in:
Zuul
2023-06-01 14:06:31 +00:00
committed by Gerrit Code Review
3 changed files with 97 additions and 7 deletions

View File

@@ -15,6 +15,7 @@ else
install -m 755 -D bin/puppet-manifest-apply.sh $(BINDIR)/puppet-manifest-apply.sh
endif
install -m 755 -D bin/apply_network_config.sh $(BINDIR)/apply_network_config.sh
install -m 755 -D bin/kube-wait-control-plane-terminated.sh $(BINDIR)/kube-wait-control-plane-terminated.sh
install -m 755 -D bin/network_ifupdown.sh $(BINDIR)/network_ifupdown.sh
install -m 755 -D bin/network_sysconfig.sh $(BINDIR)/network_sysconfig.sh
install -m 755 -D bin/puppet-update-default-grub.sh $(BINDIR)/puppet-update-default-grub.sh

View File

@@ -0,0 +1,35 @@
#!/bin/bash
################################################################################
# Copyright (c) 2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
################################################################################
# The purpose of this script is to wait until the control plane pods
# process exit and then forcibly kill those specific pids if the timeout expires.
PATH=/bin:/usr/bin:/sbin:/usr/sbin
NAME=$(basename $0)
TIMEOUT=30
SECONDS=0
# Log info message to /var/log/daemon.log
function LOG {
logger -p daemon.info -t "${NAME}($$): " "$@"
}
LOG "wait for control plane pods on this host to terminate"
while [ ${SECONDS} -lt ${TIMEOUT} ]; do
if pgrep -f '^kube-apiserver|^kube-scheduler|^kube-controller-manager' 2>/dev/null; then
sleep 1
else
LOG "control plane pods gracefully terminated"
exit 0
fi
done
LOG "killing control plane processes"
pkill -e -KILL -f '^kube-scheduler|^kube-controller-manager|^kube-apiserver' 2>/dev/null | LOG
exit 0

View File

@@ -47,6 +47,14 @@ class platform::kubernetes::params (
$controller_manager_key = undef,
$kubelet_cert = undef,
$kubelet_key = undef,
$etcd_cert_file = undef,
$etcd_key_file = undef,
$etcd_ca_cert = undef,
$etcd_endpoints = undef,
$etcd_snapshot_file = '/opt/backups/k8s-control-plane/etcd/stx_etcd.snap',
$static_pod_manifests = '/opt/backups/k8s-control-plane/static-pod-manifests',
$etcd_name = 'controller',
$etcd_initial_cluster = 'controller=http://localhost:2380',
# The file holding the root CA cert/key to update to
$rootca_certfile_new = '/etc/kubernetes/pki/ca_new.crt',
$rootca_keyfile_new = '/etc/kubernetes/pki/ca_new.key',
@@ -1591,19 +1599,65 @@ class platform::kubernetes::update_kubelet_config::runtime
}
}
class platform::kubernetes::cordon_node {
exec { 'drain the node':
command => "kubectl --kubeconfig=/etc/kubernetes/admin.conf drain ${::platform::params::hostname} --ignore-daemonsets --delete-emptydir-data --force --skip-wait-for-delete-timeout=10", # lint:ignore:140chars
logoutput => true,
}
}
class platform::kubernetes::upgrade_abort
inherits ::platform::kubernetes::params {
$software_version = $::platform::params::software_version
include platform::kubernetes::cordon_node
include platform::kubernetes::mask_stop_kubelet
include platform::kubernetes::unmask_start_kubelet
include platform::kubernetes::bindmounts
include platform::kubernetes::unmask_start_kubelet
exec { 'restore static manifest files':
command => '/usr/bin/cp -r /var/rootdirs/opt/backups/k8s-control-plane/static-pod-manifests/* /etc/kubernetes/manifests',
require => Class['platform::kubernetes::mask_stop_kubelet']
exec { 'remove the control-plane pods':
command => '/usr/bin/rm -f /etc/kubernetes/manifests/*.yaml',
require => Class['platform::kubernetes::cordon_node'],
onlyif => "test -d ${static_pod_manifests}",
}
-> exec { 'restart etcd':
command => '/usr/bin/systemctl restart etcd',
-> exec { 'wait for control plane terminated':
command => '/usr/local/bin/kube-wait-control-plane-terminated.sh',
onlyif => "test -d ${static_pod_manifests}",
}
-> Class['platform::kubernetes::mask_stop_kubelet']
-> exec { 'stop all containers':
command => '/usr/sbin/k8s-container-cleanup.sh force-clean',
logoutput => true,
}
-> exec { 'mask containerd service':
command => '/usr/bin/systemctl mask --runtime --now containerd',
}
-> exec { 'mask docker service':
command => '/usr/bin/systemctl mask --runtime --now docker',
}
-> exec { 'mask etcd service':
command => '/usr/bin/systemctl mask --runtime --now etcd',
}
-> exec{ 'remove etcd data dir':
command => "rm -rf /opt/etcd/${software_version}/controller.etcd",
onlyif => "test -f ${etcd_snapshot_file}",
}
-> exec { 'restore etcd snapshot':
command => "etcdctl --cert ${etcd_cert_file} --key ${etcd_key_file} --cacert ${etcd_ca_cert} --endpoints ${etcd_endpoints} snapshot restore ${etcd_snapshot_file} --data-dir /opt/etcd/${software_version}/controller.etcd --name ${etcd_name} --initial-cluster ${etcd_initial_cluster} ", # lint:ignore:140chars
environment => [ 'ETCDCTL_API=3' ],
onlyif => "test -f ${etcd_snapshot_file}"
}
-> exec { 'restore static manifest files':
command => "/usr/bin/cp -f ${static_pod_manifests}/*.yaml /etc/kubernetes/manifests",
onlyif => "test -d ${static_pod_manifests}",
}
-> exec { 'unmask etcd service':
command => '/usr/bin/systemctl unmask --runtime --now etcd',
}
-> exec { 'unmask docker service':
command => '/usr/bin/systemctl unmask --runtime --now docker',
}
-> exec { 'unmask containerd service':
command => '/usr/bin/systemctl unmask --runtime --now containerd',
}
-> Class['platform::kubernetes::bindmounts']
-> Class['platform::kubernetes::unmask_start_kubelet']