magnum/magnum/drivers/common/templates/kubernetes/fragments/upgrade-kubernetes.sh

147 lines
6.3 KiB
Bash

echo "START: upgrade k8s versions and operating system"
set +x
if [ ! -f "/etc/sysconfig/heat-params" ]; then
echo "File /etc/sysconfig/heat-params can not be found. Cluster update is involving a node rebuild."
exit 0
fi
. /etc/sysconfig/heat-params
set -x
set -eu -o pipefail
ssh_cmd="ssh -F /srv/magnum/.ssh/config root@localhost"
KUBECONFIG="/etc/kubernetes/kubelet-config.yaml"
if [ "$(echo $USE_PODMAN | tr '[:upper:]' '[:lower:]')" == "true" ]; then
kubecontrol="/srv/magnum/bin/kubectl --kubeconfig $KUBECONFIG"
else
kubecontrol="/var/lib/containers/atomic/heat-container-agent.0/rootfs/usr/bin/kubectl --kubeconfig $KUBECONFIG"
fi
new_kube_tag="$kube_tag_input"
new_kube_image_digest="$kube_image_digest_input"
new_ostree_remote="$ostree_remote_input"
new_ostree_commit="$ostree_commit_input"
is_upgrade_triggered=false
function drain {
is_upgrade_triggered=true
# If there is only one master and this is the master node, skip the drain, just cordon it
# If there is only one worker and this is the worker node, skip the drain, just cordon it
all_masters=$(${ssh_cmd} ${kubecontrol} get nodes --selector=node-role.kubernetes.io/master= -o name)
all_workers=$(${ssh_cmd} ${kubecontrol} get nodes --selector=node-role.kubernetes.io/master!= -o name)
if [ "node/${INSTANCE_NAME}" != "${all_masters}" ] && [ "node/${INSTANCE_NAME}" != "${all_workers}" ]; then
${ssh_cmd} ${kubecontrol} drain ${INSTANCE_NAME} --ignore-daemonsets --delete-local-data --force
else
${ssh_cmd} ${kubecontrol} cordon ${INSTANCE_NAME}
fi
}
if [ "${new_kube_tag}" != "${KUBE_TAG}" ]; then
drain
if [ "$(echo $USE_PODMAN | tr '[:upper:]' '[:lower:]')" == "true" ]; then
SERVICE_LIST=$($ssh_cmd podman ps -f name=kube --format {{.Names}})
for service in ${SERVICE_LIST}; do
${ssh_cmd} systemctl stop ${service}
${ssh_cmd} podman rm ${service}
done
${ssh_cmd} podman rmi ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube:${KUBE_TAG}
echo "KUBE_TAG=$new_kube_tag" >> /etc/sysconfig/heat-params
for service in ${SERVICE_LIST}; do
${ssh_cmd} systemctl start ${service}
done
i=0
until [ "`${ssh_cmd} podman image exists ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube:${new_kube_tag} && echo $?`" = 0 ]
do
i=$((i+1))
[ $i -lt 30 ] || break;
echo "Pulling image: hyperkube:${new_kube_tag}"
sleep 5s
done
KUBE_DIGEST=$($ssh_cmd podman image inspect ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube:${new_kube_tag} --format "{{.Digest}}")
if [ -n "${new_kube_image_digest}" ] && [ "${new_kube_image_digest}" != "${KUBE_DIGEST}" ]; then
printf "The sha256 ${KUBE_DIGEST} of current hyperkube image cannot match the given one: ${new_kube_image_digest}."
exit 1
fi
i=0
until ${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME}
do
i=$((i+1))
[ $i -lt 30 ] || break;
echo "Trying to uncordon node..."
sleep 5s
done
else
declare -A service_image_mapping
service_image_mapping=( ["kubelet"]="kubernetes-kubelet" ["kube-controller-manager"]="kubernetes-controller-manager" ["kube-scheduler"]="kubernetes-scheduler" ["kube-proxy"]="kubernetes-proxy" ["kube-apiserver"]="kubernetes-apiserver" )
SERVICE_LIST=$($ssh_cmd atomic containers list -f container=kube -q --no-trunc)
for service in ${SERVICE_LIST}; do
${ssh_cmd} systemctl stop ${service}
done
for service in ${SERVICE_LIST}; do
${ssh_cmd} atomic pull --storage ostree "${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}${service_image_mapping[${service}]}:${new_kube_tag}"
done
for service in ${SERVICE_LIST}; do
${ssh_cmd} atomic containers update --rebase ${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}${service_image_mapping[${service}]}:${new_kube_tag} ${service}
done
for service in ${SERVICE_LIST}; do
systemctl restart ${service}
done
${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME}
for service in ${SERVICE_LIST}; do
${ssh_cmd} atomic --assumeyes images "delete ${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}${service_image_mapping[${service}]}:${KUBE_TAG}"
done
${ssh_cmd} atomic images prune
fi
fi
# NOTE(flwang): Record starts with "*" means the current one
current_ostree_commit=`${ssh_cmd} rpm-ostree status | grep -A 3 "* ostree://" | grep Commit | awk '{print $2}'`
current_ostree_remote=`${ssh_cmd} rpm-ostree status | awk '/* ostree/{print $0}' | awk '{match($0,"* ostree://([^ ]+)",a)}END{print a[1]}'`
remote_list=`${ssh_cmd} ostree remote list`
if [[ $current_ostree_remote == *"fedora:fedora/x86_64/coreos/stable"* ]]; then
# By default there is no RPM PGP key for FC33 if the server built on
# FC31, so add the GPG key to make sure server can upgrade from FC31 to FC33.
${ssh_cmd} curl https://getfedora.org/static/fedora.gpg > /etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-latest-primary
${ssh_cmd} ostree remote delete fedora-latest-magnum || true
${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-latest-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-latest-magnum https://ostree.fedoraproject.org
fi
# NOTE(flwang): 1. Either deploy or rebase for only one upgrade
# 2. Using rpm-ostree command instead of atomic command to keep the possibility of supporting fedora coreos 30
if [ "$new_ostree_commit" != "" ] && [ "$current_ostree_commit" != "$new_ostree_commit" ]; then
drain
${ssh_cmd} rpm-ostree deploy $new_ostree_commit
${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME} && shutdown --reboot --no-wall -t 1
elif [ "$new_ostree_remote" != "" ] && [ "$current_ostree_remote" != "$new_ostree_remote" ]; then
drain
${ssh_cmd} rpm-ostree rebase $new_ostree_remote
${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME} && shutdown --reboot --no-wall -t 1
fi
# We need to double check if this is a k8s version upgrade only action, if so, the command should exit to avoid run the later
# cert rotate scripts.
if [ "${is_upgrade_triggered}" == true ]; then
echo "END: Upgrade k8s versions and operating system"
exit 0
fi