Merge "Add support for kube-upgrade-abort"
This commit is contained in:
@@ -15,6 +15,7 @@ else
|
||||
install -m 755 -D bin/puppet-manifest-apply.sh $(BINDIR)/puppet-manifest-apply.sh
|
||||
endif
|
||||
install -m 755 -D bin/apply_network_config.sh $(BINDIR)/apply_network_config.sh
|
||||
install -m 755 -D bin/kube-wait-control-plane-terminated.sh $(BINDIR)/kube-wait-control-plane-terminated.sh
|
||||
install -m 755 -D bin/network_ifupdown.sh $(BINDIR)/network_ifupdown.sh
|
||||
install -m 755 -D bin/network_sysconfig.sh $(BINDIR)/network_sysconfig.sh
|
||||
install -m 755 -D bin/puppet-update-default-grub.sh $(BINDIR)/puppet-update-default-grub.sh
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Copyright (c) 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# The purpose of this script is to wait until the control plane pods
|
||||
# process exit and then forcibly kill those specific pids if the timeout expires.
|
||||
|
||||
|
||||
PATH=/bin:/usr/bin:/sbin:/usr/sbin
|
||||
NAME=$(basename $0)
|
||||
TIMEOUT=30
|
||||
SECONDS=0
|
||||
|
||||
# Log info message to /var/log/daemon.log
|
||||
function LOG {
|
||||
logger -p daemon.info -t "${NAME}($$): " "$@"
|
||||
}
|
||||
|
||||
LOG "wait for control plane pods on this host to terminate"
|
||||
while [ ${SECONDS} -lt ${TIMEOUT} ]; do
|
||||
if pgrep -f '^kube-apiserver|^kube-scheduler|^kube-controller-manager' 2>/dev/null; then
|
||||
sleep 1
|
||||
else
|
||||
LOG "control plane pods gracefully terminated"
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
|
||||
LOG "killing control plane processes"
|
||||
pkill -e -KILL -f '^kube-scheduler|^kube-controller-manager|^kube-apiserver' 2>/dev/null | LOG
|
||||
exit 0
|
||||
@@ -47,6 +47,14 @@ class platform::kubernetes::params (
|
||||
$controller_manager_key = undef,
|
||||
$kubelet_cert = undef,
|
||||
$kubelet_key = undef,
|
||||
$etcd_cert_file = undef,
|
||||
$etcd_key_file = undef,
|
||||
$etcd_ca_cert = undef,
|
||||
$etcd_endpoints = undef,
|
||||
$etcd_snapshot_file = '/opt/backups/k8s-control-plane/etcd/stx_etcd.snap',
|
||||
$static_pod_manifests = '/opt/backups/k8s-control-plane/static-pod-manifests',
|
||||
$etcd_name = 'controller',
|
||||
$etcd_initial_cluster = 'controller=http://localhost:2380',
|
||||
# The file holding the root CA cert/key to update to
|
||||
$rootca_certfile_new = '/etc/kubernetes/pki/ca_new.crt',
|
||||
$rootca_keyfile_new = '/etc/kubernetes/pki/ca_new.key',
|
||||
@@ -1591,19 +1599,65 @@ class platform::kubernetes::update_kubelet_config::runtime
|
||||
}
|
||||
}
|
||||
|
||||
class platform::kubernetes::cordon_node {
|
||||
exec { 'drain the node':
|
||||
command => "kubectl --kubeconfig=/etc/kubernetes/admin.conf drain ${::platform::params::hostname} --ignore-daemonsets --delete-emptydir-data --force --skip-wait-for-delete-timeout=10", # lint:ignore:140chars
|
||||
logoutput => true,
|
||||
}
|
||||
}
|
||||
|
||||
class platform::kubernetes::upgrade_abort
|
||||
inherits ::platform::kubernetes::params {
|
||||
|
||||
$software_version = $::platform::params::software_version
|
||||
include platform::kubernetes::cordon_node
|
||||
include platform::kubernetes::mask_stop_kubelet
|
||||
include platform::kubernetes::unmask_start_kubelet
|
||||
include platform::kubernetes::bindmounts
|
||||
include platform::kubernetes::unmask_start_kubelet
|
||||
|
||||
exec { 'restore static manifest files':
|
||||
command => '/usr/bin/cp -r /var/rootdirs/opt/backups/k8s-control-plane/static-pod-manifests/* /etc/kubernetes/manifests',
|
||||
require => Class['platform::kubernetes::mask_stop_kubelet']
|
||||
exec { 'remove the control-plane pods':
|
||||
command => '/usr/bin/rm -f /etc/kubernetes/manifests/*.yaml',
|
||||
require => Class['platform::kubernetes::cordon_node'],
|
||||
onlyif => "test -d ${static_pod_manifests}",
|
||||
}
|
||||
-> exec { 'restart etcd':
|
||||
command => '/usr/bin/systemctl restart etcd',
|
||||
-> exec { 'wait for control plane terminated':
|
||||
command => '/usr/local/bin/kube-wait-control-plane-terminated.sh',
|
||||
onlyif => "test -d ${static_pod_manifests}",
|
||||
}
|
||||
-> Class['platform::kubernetes::mask_stop_kubelet']
|
||||
-> exec { 'stop all containers':
|
||||
command => '/usr/sbin/k8s-container-cleanup.sh force-clean',
|
||||
logoutput => true,
|
||||
}
|
||||
-> exec { 'mask containerd service':
|
||||
command => '/usr/bin/systemctl mask --runtime --now containerd',
|
||||
}
|
||||
-> exec { 'mask docker service':
|
||||
command => '/usr/bin/systemctl mask --runtime --now docker',
|
||||
}
|
||||
-> exec { 'mask etcd service':
|
||||
command => '/usr/bin/systemctl mask --runtime --now etcd',
|
||||
}
|
||||
-> exec{ 'remove etcd data dir':
|
||||
command => "rm -rf /opt/etcd/${software_version}/controller.etcd",
|
||||
onlyif => "test -f ${etcd_snapshot_file}",
|
||||
}
|
||||
-> exec { 'restore etcd snapshot':
|
||||
command => "etcdctl --cert ${etcd_cert_file} --key ${etcd_key_file} --cacert ${etcd_ca_cert} --endpoints ${etcd_endpoints} snapshot restore ${etcd_snapshot_file} --data-dir /opt/etcd/${software_version}/controller.etcd --name ${etcd_name} --initial-cluster ${etcd_initial_cluster} ", # lint:ignore:140chars
|
||||
environment => [ 'ETCDCTL_API=3' ],
|
||||
onlyif => "test -f ${etcd_snapshot_file}"
|
||||
}
|
||||
-> exec { 'restore static manifest files':
|
||||
command => "/usr/bin/cp -f ${static_pod_manifests}/*.yaml /etc/kubernetes/manifests",
|
||||
onlyif => "test -d ${static_pod_manifests}",
|
||||
}
|
||||
-> exec { 'unmask etcd service':
|
||||
command => '/usr/bin/systemctl unmask --runtime --now etcd',
|
||||
}
|
||||
-> exec { 'unmask docker service':
|
||||
command => '/usr/bin/systemctl unmask --runtime --now docker',
|
||||
}
|
||||
-> exec { 'unmask containerd service':
|
||||
command => '/usr/bin/systemctl unmask --runtime --now containerd',
|
||||
}
|
||||
-> Class['platform::kubernetes::bindmounts']
|
||||
-> Class['platform::kubernetes::unmask_start_kubelet']
|
||||
|
||||
Reference in New Issue
Block a user