147 lines
6.3 KiB
Bash
147 lines
6.3 KiB
Bash
echo "START: upgrade k8s versions and operating system"
|
|
|
|
set +x
|
|
|
|
if [ ! -f "/etc/sysconfig/heat-params" ]; then
|
|
echo "File /etc/sysconfig/heat-params can not be found. Cluster update is involving a node rebuild."
|
|
exit 0
|
|
fi
|
|
|
|
. /etc/sysconfig/heat-params
|
|
set -x
|
|
|
|
set -eu -o pipefail
|
|
|
|
ssh_cmd="ssh -F /srv/magnum/.ssh/config root@localhost"
|
|
KUBECONFIG="/etc/kubernetes/kubelet-config.yaml"
|
|
if [ "$(echo $USE_PODMAN | tr '[:upper:]' '[:lower:]')" == "true" ]; then
|
|
kubecontrol="/srv/magnum/bin/kubectl --kubeconfig $KUBECONFIG"
|
|
else
|
|
kubecontrol="/var/lib/containers/atomic/heat-container-agent.0/rootfs/usr/bin/kubectl --kubeconfig $KUBECONFIG"
|
|
fi
|
|
new_kube_tag="$kube_tag_input"
|
|
new_kube_image_digest="$kube_image_digest_input"
|
|
new_ostree_remote="$ostree_remote_input"
|
|
new_ostree_commit="$ostree_commit_input"
|
|
is_upgrade_triggered=false
|
|
|
|
function drain {
|
|
is_upgrade_triggered=true
|
|
# If there is only one master and this is the master node, skip the drain, just cordon it
|
|
# If there is only one worker and this is the worker node, skip the drain, just cordon it
|
|
all_masters=$(${ssh_cmd} ${kubecontrol} get nodes --selector=node-role.kubernetes.io/master= -o name)
|
|
all_workers=$(${ssh_cmd} ${kubecontrol} get nodes --selector=node-role.kubernetes.io/master!= -o name)
|
|
if [ "node/${INSTANCE_NAME}" != "${all_masters}" ] && [ "node/${INSTANCE_NAME}" != "${all_workers}" ]; then
|
|
${ssh_cmd} ${kubecontrol} drain ${INSTANCE_NAME} --ignore-daemonsets --delete-local-data --force
|
|
else
|
|
${ssh_cmd} ${kubecontrol} cordon ${INSTANCE_NAME}
|
|
fi
|
|
}
|
|
|
|
if [ "${new_kube_tag}" != "${KUBE_TAG}" ]; then
|
|
|
|
drain
|
|
|
|
if [ "$(echo $USE_PODMAN | tr '[:upper:]' '[:lower:]')" == "true" ]; then
|
|
SERVICE_LIST=$($ssh_cmd podman ps -f name=kube --format {{.Names}})
|
|
|
|
for service in ${SERVICE_LIST}; do
|
|
${ssh_cmd} systemctl stop ${service}
|
|
${ssh_cmd} podman rm ${service}
|
|
done
|
|
|
|
${ssh_cmd} podman rmi ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube:${KUBE_TAG}
|
|
echo "KUBE_TAG=$new_kube_tag" >> /etc/sysconfig/heat-params
|
|
|
|
for service in ${SERVICE_LIST}; do
|
|
${ssh_cmd} systemctl start ${service}
|
|
done
|
|
|
|
i=0
|
|
until [ "`${ssh_cmd} podman image exists ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube:${new_kube_tag} && echo $?`" = 0 ]
|
|
do
|
|
i=$((i+1))
|
|
[ $i -lt 30 ] || break;
|
|
echo "Pulling image: hyperkube:${new_kube_tag}"
|
|
sleep 5s
|
|
done
|
|
|
|
KUBE_DIGEST=$($ssh_cmd podman image inspect ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube:${new_kube_tag} --format "{{.Digest}}")
|
|
if [ -n "${new_kube_image_digest}" ] && [ "${new_kube_image_digest}" != "${KUBE_DIGEST}" ]; then
|
|
printf "The sha256 ${KUBE_DIGEST} of current hyperkube image cannot match the given one: ${new_kube_image_digest}."
|
|
exit 1
|
|
fi
|
|
|
|
i=0
|
|
until ${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME}
|
|
do
|
|
i=$((i+1))
|
|
[ $i -lt 30 ] || break;
|
|
echo "Trying to uncordon node..."
|
|
sleep 5s
|
|
done
|
|
else
|
|
declare -A service_image_mapping
|
|
service_image_mapping=( ["kubelet"]="kubernetes-kubelet" ["kube-controller-manager"]="kubernetes-controller-manager" ["kube-scheduler"]="kubernetes-scheduler" ["kube-proxy"]="kubernetes-proxy" ["kube-apiserver"]="kubernetes-apiserver" )
|
|
|
|
SERVICE_LIST=$($ssh_cmd atomic containers list -f container=kube -q --no-trunc)
|
|
|
|
for service in ${SERVICE_LIST}; do
|
|
${ssh_cmd} systemctl stop ${service}
|
|
done
|
|
|
|
for service in ${SERVICE_LIST}; do
|
|
${ssh_cmd} atomic pull --storage ostree "${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}${service_image_mapping[${service}]}:${new_kube_tag}"
|
|
done
|
|
|
|
for service in ${SERVICE_LIST}; do
|
|
${ssh_cmd} atomic containers update --rebase ${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}${service_image_mapping[${service}]}:${new_kube_tag} ${service}
|
|
done
|
|
|
|
for service in ${SERVICE_LIST}; do
|
|
systemctl restart ${service}
|
|
done
|
|
|
|
${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME}
|
|
|
|
for service in ${SERVICE_LIST}; do
|
|
${ssh_cmd} atomic --assumeyes images "delete ${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}${service_image_mapping[${service}]}:${KUBE_TAG}"
|
|
done
|
|
|
|
${ssh_cmd} atomic images prune
|
|
fi
|
|
fi
|
|
|
|
|
|
# NOTE(flwang): Record starts with "*" means the current one
|
|
current_ostree_commit=`${ssh_cmd} rpm-ostree status | grep -A 3 "* ostree://" | grep Commit | awk '{print $2}'`
|
|
current_ostree_remote=`${ssh_cmd} rpm-ostree status | awk '/* ostree/{print $0}' | awk '{match($0,"* ostree://([^ ]+)",a)}END{print a[1]}'`
|
|
remote_list=`${ssh_cmd} ostree remote list`
|
|
|
|
if [[ $current_ostree_remote == *"fedora:fedora/x86_64/coreos/stable"* ]]; then
|
|
# By default there is no RPM PGP key for FC33 if the server built on
|
|
# FC31, so add the GPG key to make sure server can upgrade from FC31 to FC33.
|
|
${ssh_cmd} curl https://getfedora.org/static/fedora.gpg > /etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-latest-primary
|
|
${ssh_cmd} ostree remote delete fedora-latest-magnum || true
|
|
${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-latest-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-latest-magnum https://ostree.fedoraproject.org
|
|
fi
|
|
|
|
# NOTE(flwang): 1. Either deploy or rebase for only one upgrade
|
|
# 2. Using rpm-ostree command instead of atomic command to keep the possibility of supporting fedora coreos 30
|
|
if [ "$new_ostree_commit" != "" ] && [ "$current_ostree_commit" != "$new_ostree_commit" ]; then
|
|
drain
|
|
${ssh_cmd} rpm-ostree deploy $new_ostree_commit
|
|
${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME} && shutdown --reboot --no-wall -t 1
|
|
elif [ "$new_ostree_remote" != "" ] && [ "$current_ostree_remote" != "$new_ostree_remote" ]; then
|
|
drain
|
|
${ssh_cmd} rpm-ostree rebase $new_ostree_remote
|
|
${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME} && shutdown --reboot --no-wall -t 1
|
|
fi
|
|
|
|
# We need to double check if this is a k8s version upgrade only action, if so, the command should exit to avoid run the later
|
|
# cert rotate scripts.
|
|
if [ "${is_upgrade_triggered}" == true ]; then
|
|
echo "END: Upgrade k8s versions and operating system"
|
|
exit 0
|
|
fi
|