Fix rolling upgrade

There are several changes in this patch:

1) Fix the kube_image_digest for both master and worker nodes
2) Source the bashrc to get the kubectl command worked
3) Fix the upgrade scenario if the nodes are based on image instead
   of volume

Story: 2008628
Task: 41832

Change-Id: Iccde333b8f2d219de9c5923d4adb822c7d3f19f4
This commit is contained in:
Feilong Wang 2021-02-17 09:01:40 +13:00
parent 42f8c97bbf
commit e7c33dc4f9
7 changed files with 64 additions and 43 deletions

View File

@ -1,6 +1,17 @@
echo "START: upgrade k8s versions and operating system"
set +x
if [ ! -f "/etc/sysconfig/heat-params" ]; then
echo "File /etc/sysconfig/heat-params can not be found. Cluster update is involving a node rebuild."
exit 0
fi
. /etc/sysconfig/heat-params
set -x
set -eu -o pipefail
ssh_cmd="ssh -F /srv/magnum/.ssh/config root@localhost"
KUBECONFIG="/etc/kubernetes/kubelet-config.yaml"
if [ "$(echo $USE_PODMAN | tr '[:upper:]' '[:lower:]')" == "true" ]; then
@ -12,8 +23,10 @@ new_kube_tag="$kube_tag_input"
new_kube_image_digest="$kube_image_digest_input"
new_ostree_remote="$ostree_remote_input"
new_ostree_commit="$ostree_commit_input"
is_upgrade_triggered=false
function drain {
is_upgrade_triggered=true
# If there is only one master and this is the master node, skip the drain, just cordon it
# If there is only one worker and this is the worker node, skip the drain, just cordon it
all_masters=$(${ssh_cmd} ${kubecontrol} get nodes --selector=node-role.kubernetes.io/master= -o name)
@ -99,59 +112,35 @@ if [ "${new_kube_tag}" != "${KUBE_TAG}" ]; then
fi
fi
function setup_uncordon {
# Create a service to uncordon the node itself after reboot
if [ ! -f /etc/systemd/system/uncordon.service ]; then
$ssh_cmd cat > /etc/systemd/system/uncordon.service << EOF
[Unit]
Description=magnum-uncordon
After=network.target kubelet.service
[Service]
Restart=always
RemainAfterExit=yes
RestartSec=10
ExecStart=${kubecontrol} uncordon ${INSTANCE_NAME}
[Install]
WantedBy=multi-user.target
EOF
${ssh_cmd} systemctl enable uncordon.service
fi
}
# NOTE(flwang): Record starts with "*" means the current one
current_ostree_commit=`${ssh_cmd} rpm-ostree status | grep -A 3 "* ostree://" | grep Commit | awk '{print $2}'`
current_ostree_remote=`${ssh_cmd} rpm-ostree status | awk '/* ostree/{print $0}' | awk '{match($0,"* ostree://([^ ]+)",a)}END{print a[1]}'`
remote_list=`${ssh_cmd} ostree remote list`
# NOTE(flwang): This part is only applicable for fedora atomic
if [[ $current_ostree_remote == *"fedora-atomic"* ]]; then
# Fedora Atomic 29 will be the last release before migrating to Fedora CoreOS, so we're OK to add 28 and 29 remotes directly
if [[ ! " ${remote_list[@]} " =~ "fedora-atomic-28" ]]; then
${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-28-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-atomic-28 https://kojipkgs.fedoraproject.org/atomic/repo/
fi
if [[ ! " ${remote_list[@]} " =~ "fedora-atomic-29" ]]; then
${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-29-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-atomic-29 https://kojipkgs.fedoraproject.org/atomic/repo/
fi
# The uri of existing Fedora Atomic 27 remote is not accessible now, so replace it with correct uri
if [[ " ${remote_list[@]} " =~ "fedora-atomic" ]]; then
sed -i '
/^url=/ s|=.*|=https://kojipkgs.fedoraproject.org/atomic/repo/|
' /etc/ostree/remotes.d/fedora-atomic.conf
fi
if [[ $current_ostree_remote == *"fedora:fedora/x86_64/coreos/stable"* ]]; then
# By default there is no RPM PGP key for FC33 if the server built on
# FC31, so add the GPG key to make sure server can upgrade from FC31 to FC33.
${ssh_cmd} curl https://getfedora.org/static/fedora.gpg > /etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-latest-primary
${ssh_cmd} ostree remote delete fedora-latest-magnum || true
${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-latest-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-latest-magnum https://ostree.fedoraproject.org
fi
# NOTE(flwang): 1. Either deploy or rebase for only one upgrade
# 2. Using rpm-ostree command instead of atomic command to keep the possibility of supporting fedora coreos 30
if [ "$new_ostree_commit" != "" ] && [ "$current_ostree_commit" != "$new_ostree_commit" ]; then
drain
setup_uncordon
${ssh_cmd} rpm-ostree deploy $new_ostree_commit
shutdown --reboot --no-wall -t 1
${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME} && shutdown --reboot --no-wall -t 1
elif [ "$new_ostree_remote" != "" ] && [ "$current_ostree_remote" != "$new_ostree_remote" ]; then
drain
setup_uncordon
${ssh_cmd} rpm-ostree rebase $new_ostree_remote
shutdown --reboot --no-wall -t 1
${ssh_cmd} ${kubecontrol} uncordon ${INSTANCE_NAME} && shutdown --reboot --no-wall -t 1
fi
# We need to double check if this is a k8s version upgrade only action, if so, the command should exit to avoid run the later
# cert rotate scripts.
if [ "${is_upgrade_triggered}" == true ]; then
echo "END: Upgrade k8s versions and operating system"
exit 0
fi

View File

@ -316,15 +316,17 @@ class KubernetesDriver(HeatDriver):
class FedoraKubernetesDriver(KubernetesDriver):
"""Base driver for Kubernetes clusters."""
def get_heat_params(self, cluster_template):
def get_heat_params(self, context, cluster, cluster_template):
heat_params = {}
try:
kube_tag = cluster_template.labels["kube_tag"]
image_digest = cluster_template.labels.get("kube_image_digest")
kube_tag_params = {
"kube_tag": kube_tag,
"kube_version": kube_tag,
"master_kube_tag": kube_tag,
"minion_kube_tag": kube_tag,
"kube_image_digest": image_digest,
}
heat_params.update(kube_tag_params)
except KeyError:
@ -352,6 +354,21 @@ class FedoraKubernetesDriver(KubernetesDriver):
', '.join(upgrade_labels))
raise exception.InvalidClusterTemplateForUpgrade(reason=reason)
# NOTE(flwang): Only support rebuild the instance if it's image-based
# until the volume based instance rebuild can be supported properly
# in Heat.
heat_client = clients.OpenStackClients(context).heat()
node_volumes = heat_client.resources.list(
cluster.stack_id, nested_depth=2,
filters={"name": "kube_node_volume"})
current_template = conductor_utils.retrieve_cluster_template(context,
cluster)
if (len(node_volumes) == 0 and
(current_template.image_id != cluster_template.image_id)):
heat_params.update({"master_image": cluster_template.image_id,
"minion_image": cluster_template.image_id})
return heat_params
@staticmethod
@ -400,7 +417,8 @@ class FedoraKubernetesDriver(KubernetesDriver):
# hardcode what we want to send to heat.
# Rules: 1. No downgrade 2. Explicitly override 3. Merging based on set
# Update heat_params based on the data generated above
heat_params.update(self.get_heat_params(cluster_template))
heat_params.update(self.get_heat_params(context, cluster,
cluster_template))
stack_id = nodegroup.stack_id
if nodegroup is not None and not nodegroup.is_default:

View File

@ -124,7 +124,7 @@ storage:
done
/usr/bin/update-ca-trust
mkdir /etc/kubernetes/
mkdir -p /etc/kubernetes/
cp /etc/pki/tls/certs/ca-bundle.crt /etc/kubernetes/ca-bundle.crt
HTTP_PROXY="__HTTP_PROXY__"

View File

@ -1396,6 +1396,7 @@ resources:
type: OS::Heat::SoftwareDeployment
properties:
actions: ['CREATE']
name: master_config_deployment_2
signal_transport: HEAT_SIGNAL
config:
get_resource: kube_cluster_config
@ -1515,6 +1516,7 @@ resources:
ostree_commit: {get_param: ostree_commit}
use_podman: {get_param: use_podman}
selinux_mode: {get_param: selinux_mode}
kube_image_digest: {get_param: kube_image_digest}
container_runtime: {get_param: container_runtime}
containerd_version: {get_param: containerd_version}
containerd_tarball_url: {get_param: containerd_tarball_url}

View File

@ -915,6 +915,7 @@ resources:
master_config_deployment:
type: OS::Heat::SoftwareDeployment
properties:
name: master_config_deployment_1
signal_transport: HEAT_SIGNAL
config: {get_resource: master_config}
server: {if: ["volume_based", {get_resource: kube-master-bfv}, {get_resource: kube-master}]}
@ -1060,12 +1061,14 @@ resources:
- "\n"
-
- "#!/bin/bash"
- "source /etc/bashrc"
- get_file: ../../common/templates/kubernetes/fragments/upgrade-kubernetes.sh
- get_file: ../../common/templates/kubernetes/fragments/make-cert.sh
- get_file: ../../common/templates/kubernetes/fragments/rotate-kubernetes-ca-certs-master.sh
upgrade_kubernetes_deployment:
type: OS::Heat::SoftwareDeployment
depends_on: master_config_deployment
properties:
signal_transport: HEAT_SIGNAL
config: {get_resource: upgrade_kubernetes}

View File

@ -334,6 +334,12 @@ parameters:
description: >
Choose SELinux mode
kube_image_digest:
type: string
description: >
The digest of the image which should match the given kube_tag
default: ''
container_runtime:
type: string
description: The container runtime to install
@ -601,6 +607,7 @@ resources:
group: script
inputs:
- name: kube_tag_input
- name: kube_image_digest_input
- name: ostree_remote_input
- name: ostree_commit_input
- name: kube_service_account_key_input
@ -610,6 +617,7 @@ resources:
- "\n"
-
- "#!/bin/bash"
- "source /etc/bashrc"
- get_file: ../../common/templates/kubernetes/fragments/upgrade-kubernetes.sh
- get_file: ../../common/templates/kubernetes/fragments/make-cert-client.sh
- get_file: ../../common/templates/kubernetes/fragments/rotate-kubernetes-ca-certs-worker.sh
@ -623,6 +631,7 @@ resources:
actions: ['UPDATE']
input_values:
kube_tag_input: {get_param: kube_tag}
kube_image_digest_input: {get_param: kube_image_digest}
ostree_remote_input: {get_param: ostree_remote}
ostree_commit_input: {get_param: ostree_commit}
kube_service_account_key_input: {get_param: kube_service_account_key}

View File

@ -114,7 +114,7 @@
"name": "root"
},
"contents": {
"source": "data:,%23!%2Fbin%2Fbash%0A%0Aset%20-x%0Aset%20-e%0Aset%20%2Bu%0A%0Auntil%20%5B%20-f%20%2Fetc%2Fpki%2Fca-trust%2Fsource%2Fanchors%2Fopenstack-ca.pem%20%5D%0Ado%0A%20%20%20%20echo%20%22waiting%20for%20%2Fetc%2Fpki%2Fca-trust%2Fsource%2Fanchors%2Fopenstack-ca.pem%22%0A%20%20%20%20sleep%203s%0Adone%0A%0A%2Fusr%2Fbin%2Fupdate-ca-trust%0Amkdir%20%2Fetc%2Fkubernetes%2F%0Acp%20%2Fetc%2Fpki%2Ftls%2Fcerts%2Fca-bundle.crt%20%2Fetc%2Fkubernetes%2Fca-bundle.crt%0A%0AHTTP_PROXY%3D%22__HTTP_PROXY__%22%0AHTTPS_PROXY%3D%22__HTTPS_PROXY__%22%0ANO_PROXY%3D%22__NO_PROXY__%22%0A%0Aif%20%5B%20-n%20%22%24%7BHTTP_PROXY%7D%22%20%5D%3B%20then%0A%20%20%20%20export%20HTTP_PROXY%0A%20%20%20%20echo%20%22http_proxy%3D%24%7BHTTP_PROXY%7D%22%20%3E%3E%20%2Fetc%2Fenvironment%0Afi%0A%0Aif%20%5B%20-n%20%22%24%7BHTTPS_PROXY%7D%22%20%5D%3B%20then%0A%20%20%20%20export%20HTTPS_PROXY%0A%20%20%20%20echo%20%22https_proxy%3D%24%7BHTTPS_PROXY%7D%22%20%3E%3E%20%2Fetc%2Fenvironment%0Afi%0A%0Aif%20%5B%20-n%20%22%24%7BNO_PROXY%7D%22%20%5D%3B%20then%0A%20%20%20%20export%20NO_PROXY%0A%20%20%20%20echo%20%22no_proxy%3D%24%7BNO_PROXY%7D%22%20%3E%3E%20%2Fetc%2Fenvironment%0Afi%0A%0A%23%20Create%20a%20keypair%20for%20the%20heat-container-agent%20to%0A%23%20access%20the%20node%20over%20ssh.%20It%20is%20useful%20to%20operate%0A%23%20in%20host%20mount%20namespace%20and%20apply%20configuration.%0Aid%0Amkdir%20-p%20%2Fsrv%2Fmagnum%2F.ssh%0Achmod%200700%20%2Fsrv%2Fmagnum%2F.ssh%0A%23touch%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa%0Assh-keygen%20-q%20-t%20rsa%20-N%20''%20-f%20%2Ftmp%2Fheat_agent_rsa%0Amv%20%2Ftmp%2Fheat_agent_rsa%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa%0Amv%20%2Ftmp%2Fheat_agent_rsa.pub%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa.pub%0Achmod%200400%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa%0Achmod%200400%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa.pub%0A%23%20Add%20the%20public%20to%20the%20host%20authorized_keys%20file.%0Amkdir%20-p%20%2Froot%2F.ssh%0Achmod%200700%20%2Froot%2F.ssh%0Acat%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa.pub%20%3E%20%2Froot%2F.ssh%2Fauthorized_keys%0A%23%20Add%20localost%20to%20know_hosts%0Assh-keyscan%20127.0.0.1%20%3E%20%2Fsrv%2Fmagnum%2F.ssh%2Fknown_hosts%0A%23%20ssh%20configguration%20file%2C%20to%20be%20specified%20with%20ssh%20-F%0Acat%20%3E%20%2Fsrv%2Fmagnum%2F.ssh%2Fconfig%20%3C%3CEOF%0AHost%20localhost%0A%20%20%20%20%20HostName%20127.0.0.1%0A%20%20%20%20%20User%20root%0A%20%20%20%20%20IdentityFile%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa%0A%20%20%20%20%20UserKnownHostsFile%20%2Fsrv%2Fmagnum%2F.ssh%2Fknown_hosts%0AEOF%0A%0Ased%20-i%20'%2F%5EPermitRootLogin%2F%20s%2F%20.*%2F%20without-password%2F'%20%2Fetc%2Fssh%2Fsshd_config%0A%23%20Security%20enhancement%3A%20Disable%20password%20authentication%0Ased%20-i%20'%2F%5EPasswordAuthentication%20yes%2F%20s%2F%20yes%2F%20no%2F'%20%2Fetc%2Fssh%2Fsshd_config%0A%0Asystemctl%20restart%20sshd%0A"
"source": "data:,%23!%2Fbin%2Fbash%0A%0Aset%20-x%0Aset%20-e%0Aset%20%2Bu%0A%0Auntil%20%5B%20-f%20%2Fetc%2Fpki%2Fca-trust%2Fsource%2Fanchors%2Fopenstack-ca.pem%20%5D%0Ado%0A%20%20%20%20echo%20%22waiting%20for%20%2Fetc%2Fpki%2Fca-trust%2Fsource%2Fanchors%2Fopenstack-ca.pem%22%0A%20%20%20%20sleep%203s%0Adone%0A%0A%2Fusr%2Fbin%2Fupdate-ca-trust%0Amkdir%20-p%20%2Fetc%2Fkubernetes%2F%0Acp%20%2Fetc%2Fpki%2Ftls%2Fcerts%2Fca-bundle.crt%20%2Fetc%2Fkubernetes%2Fca-bundle.crt%0A%0AHTTP_PROXY%3D%22__HTTP_PROXY__%22%0AHTTPS_PROXY%3D%22__HTTPS_PROXY__%22%0ANO_PROXY%3D%22__NO_PROXY__%22%0A%0Aif%20%5B%20-n%20%22%24%7BHTTP_PROXY%7D%22%20%5D%3B%20then%0A%20%20%20%20export%20HTTP_PROXY%0A%20%20%20%20echo%20%22http_proxy%3D%24%7BHTTP_PROXY%7D%22%20%3E%3E%20%2Fetc%2Fenvironment%0Afi%0A%0Aif%20%5B%20-n%20%22%24%7BHTTPS_PROXY%7D%22%20%5D%3B%20then%0A%20%20%20%20export%20HTTPS_PROXY%0A%20%20%20%20echo%20%22https_proxy%3D%24%7BHTTPS_PROXY%7D%22%20%3E%3E%20%2Fetc%2Fenvironment%0Afi%0A%0Aif%20%5B%20-n%20%22%24%7BNO_PROXY%7D%22%20%5D%3B%20then%0A%20%20%20%20export%20NO_PROXY%0A%20%20%20%20echo%20%22no_proxy%3D%24%7BNO_PROXY%7D%22%20%3E%3E%20%2Fetc%2Fenvironment%0Afi%0A%0A%23%20Create%20a%20keypair%20for%20the%20heat-container-agent%20to%0A%23%20access%20the%20node%20over%20ssh.%20It%20is%20useful%20to%20operate%0A%23%20in%20host%20mount%20namespace%20and%20apply%20configuration.%0Aid%0Amkdir%20-p%20%2Fsrv%2Fmagnum%2F.ssh%0Achmod%200700%20%2Fsrv%2Fmagnum%2F.ssh%0A%23touch%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa%0Assh-keygen%20-q%20-t%20rsa%20-N%20''%20-f%20%2Ftmp%2Fheat_agent_rsa%0Amv%20%2Ftmp%2Fheat_agent_rsa%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa%0Amv%20%2Ftmp%2Fheat_agent_rsa.pub%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa.pub%0Achmod%200400%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa%0Achmod%200400%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa.pub%0A%23%20Add%20the%20public%20to%20the%20host%20authorized_keys%20file.%0Amkdir%20-p%20%2Froot%2F.ssh%0Achmod%200700%20%2Froot%2F.ssh%0Acat%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa.pub%20%3E%20%2Froot%2F.ssh%2Fauthorized_keys%0A%23%20Add%20localost%20to%20know_hosts%0Assh-keyscan%20127.0.0.1%20%3E%20%2Fsrv%2Fmagnum%2F.ssh%2Fknown_hosts%0A%23%20ssh%20configguration%20file%2C%20to%20be%20specified%20with%20ssh%20-F%0Acat%20%3E%20%2Fsrv%2Fmagnum%2F.ssh%2Fconfig%20%3C%3CEOF%0AHost%20localhost%0A%20%20%20%20%20HostName%20127.0.0.1%0A%20%20%20%20%20User%20root%0A%20%20%20%20%20IdentityFile%20%2Fsrv%2Fmagnum%2F.ssh%2Fheat_agent_rsa%0A%20%20%20%20%20UserKnownHostsFile%20%2Fsrv%2Fmagnum%2F.ssh%2Fknown_hosts%0AEOF%0A%0Ased%20-i%20'%2F%5EPermitRootLogin%2F%20s%2F%20.*%2F%20without-password%2F'%20%2Fetc%2Fssh%2Fsshd_config%0A%23%20Security%20enhancement%3A%20Disable%20password%20authentication%0Ased%20-i%20'%2F%5EPasswordAuthentication%20yes%2F%20s%2F%20yes%2F%20no%2F'%20%2Fetc%2Fssh%2Fsshd_config%0A%0Asystemctl%20restart%20sshd%0A"
},
"mode": 448
},