magnum/magnum/drivers/common/templates/kubernetes/fragments/upgrade-kubernetes.sh

echo "START: upgrade k8s versions and operating system"

set +x

if [ ! -f "/etc/sysconfig/heat-params" ]; then
    echo "File /etc/sysconfig/heat-params can not be found. Cluster update is involving a node rebuild."
    exit 0
fi

. /etc/sysconfig/heat-params
set -x

set -eu -o pipefail

ssh_cmd="ssh -F /srv/magnum/.ssh/config root@localhost"
KUBECONFIG="/etc/kubernetes/kubelet-config.yaml"
if [ "$(echo $USE_PODMAN | tr '[:upper:]' '[:lower:]')" == "true" ]; then
    kubecontrol="/srv/magnum/bin/kubectl --kubeconfig $KUBECONFIG"
else
    kubecontrol="/var/lib/containers/atomic/heat-container-agent.0/rootfs/usr/bin/kubectl --kubeconfig $KUBECONFIG"
fi
new_kube_tag="$kube_tag_input"
new_kube_image_digest="$kube_image_digest_input"
new_ostree_remote="$ostree_remote_input"
new_ostree_commit="$ostree_commit_input"
is_upgrade_triggered=false

function drain {
    is_upgrade_triggered=true
    # If there is only one master and this is the master node, skip the drain, just cordon it
    # If there is only one worker and this is the worker node, skip the drain, just cordon it
    all_masters=$(${ssh_cmd} ${kubecontrol} get nodes --selector=node-role.kubernetes.io/master= -o name)
    all_workers=$(${ssh_cmd} ${kubecontrol} get nodes --selector=node-role.kubernetes.io/master!= -o name)
    if [ "node/${INSTANCE_NAME}" != "${all_masters}" ] && [ "node/${INSTANCE_NAME}" != "${all_workers}" ]; then
        ${ssh_cmd} ${kubecontrol} drain ${INSTANCE_NAME} --ignore-daemonsets --delete-local-data --force
    else
        ${ssh_cmd} ${kubecontrol} cordon ${INSTANCE_NAME}
    fi
}

if [ "${new_kube_tag}" != "${KUBE_TAG}" ]; then

    drain

    if [ "$(echo $USE_PODMAN | tr '[:upper:]' '[:lower:]')" == "true" ]; then
        SERVICE_LIST=$($ssh_cmd podman ps -f name=kube --format {{.Names}})

        for service in ${SERVICE_LIST}; do
            ${ssh_cmd} systemctl stop ${service}
            ${ssh_cmd} podman rm ${service}
        done

        ${ssh_cmd} podman rmi ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube:${KUBE_TAG}
        echo "KUBE_TAG=$new_kube_tag" >> /etc/sysconfig/heat-params

        for service in ${SERVICE_LIST}; do
            ${ssh_cmd} systemctl start ${service}
        done

        i=0
        until [ "`${ssh_cmd} podman image exists ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube:${new_kube_tag} && echo $?`" = 0 ]
        do
            i=$((i+1))
            [ $i -lt 30 ] || break;
            echo "Pulling image: hyperkube:${new_kube_tag}"
            sleep 5s
        done

        KUBE_DIGEST=$($ssh_cmd podman image inspect ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube:${new_kube_tag} --format "{{.Digest}}")
        if [ -n "${new_kube_image_digest}"  ] && [ "${new_kube_image_digest}" != "${KUBE_DIGEST}" ]; then
            printf "The sha256 ${KUBE_DIGEST} of current hyperkube image cannot match the given one: ${new_kube_image_digest}."
            exit 1
        fi

        i=0
        until ${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME}
        do
            i=$((i+1))
            [ $i -lt 30 ] || break;
            echo "Trying to uncordon node..."
            sleep 5s
        done
    else
        declare -A service_image_mapping
        service_image_mapping=( ["kubelet"]="kubernetes-kubelet" ["kube-controller-manager"]="kubernetes-controller-manager" ["kube-scheduler"]="kubernetes-scheduler" ["kube-proxy"]="kubernetes-proxy" ["kube-apiserver"]="kubernetes-apiserver" )

        SERVICE_LIST=$($ssh_cmd atomic containers list -f container=kube -q --no-trunc)

        for service in ${SERVICE_LIST}; do
            ${ssh_cmd} systemctl stop ${service}
        done

        for service in ${SERVICE_LIST}; do
            ${ssh_cmd} atomic pull --storage ostree "${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}${service_image_mapping[${service}]}:${new_kube_tag}"
        done

        for service in ${SERVICE_LIST}; do
            ${ssh_cmd} atomic containers update --rebase ${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}${service_image_mapping[${service}]}:${new_kube_tag} ${service}
        done

        for service in ${SERVICE_LIST}; do
            systemctl restart ${service}
        done

        ${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME}

        for service in ${SERVICE_LIST}; do
            ${ssh_cmd} atomic --assumeyes images "delete ${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}${service_image_mapping[${service}]}:${KUBE_TAG}"
        done

        ${ssh_cmd} atomic images prune
    fi
fi


# NOTE(flwang): Record starts with "*" means the current one
current_ostree_commit=`${ssh_cmd} rpm-ostree status | grep -A 3 "* ostree://" | grep Commit | awk '{print $2}'`
current_ostree_remote=`${ssh_cmd} rpm-ostree status | awk '/* ostree/{print $0}' | awk '{match($0,"* ostree://([^ ]+)",a)}END{print a[1]}'`
remote_list=`${ssh_cmd} ostree remote list`

if [[ $current_ostree_remote == *"fedora:fedora/x86_64/coreos/stable"* ]]; then
    # By default there is no RPM PGP key for FC33 if the server built on
    # FC31, so add the GPG key to make sure server can upgrade from FC31 to FC33.
    ${ssh_cmd} curl https://getfedora.org/static/fedora.gpg > /etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-latest-primary
    ${ssh_cmd} ostree remote delete fedora-latest-magnum || true
    ${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-latest-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-latest-magnum https://ostree.fedoraproject.org
fi

# NOTE(flwang): 1. Either deploy or rebase for only one upgrade
#               2. Using rpm-ostree command instead of atomic command to keep the possibility of supporting fedora coreos 30
if [ "$new_ostree_commit" != "" ] && [ "$current_ostree_commit" != "$new_ostree_commit" ]; then
    drain
    ${ssh_cmd} rpm-ostree deploy $new_ostree_commit
    ${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME} && shutdown --reboot --no-wall -t 1
elif [ "$new_ostree_remote" != "" ] && [ "$current_ostree_remote" != "$new_ostree_remote" ]; then
    drain
    ${ssh_cmd} rpm-ostree rebase $new_ostree_remote
    ${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME} && shutdown --reboot --no-wall -t 1
fi

# We need to double check if this is a k8s version upgrade only action, if so, the command should exit to avoid run the later
# cert rotate scripts.
if [ "${is_upgrade_triggered}" == true  ]; then
    echo "END: Upgrade k8s versions and operating system"
    exit 0
fi