diff --git a/doc/source/user/rolling-upgrade.rst b/doc/source/user/rolling-upgrade.rst index d35d255454..329f729006 100644 --- a/doc/source/user/rolling-upgrade.rst +++ b/doc/source/user/rolling-upgrade.rst @@ -1,30 +1,38 @@ -Rolling upgrade is one of most important features user want to see for a -managed Kubernetes service. And in Magnum, we're thinking more deeper to -provide better user experience. +Rolling upgrade is an important feature a user may want for a managed +Kubernetes service. +.. note:: + + Kubernetes version upgrade is only supported by the Fedora Atomic and + the Fedora CoreOS drivers. + +A user can run a command as shown below to trigger a rolling ugprade for +Kubernetes version upgrade or node operating system version upgrade. .. code-block:: bash - #!/bin/bash -x + openstack coe cluster upgrade - IP="192.168.122.1" - CLUSTER="797b39e1-fac2-48d3-8377-d6e6cc443d39" - CT="e32c8cf7-394b-45e6-a17e-4fe6a30ad64b" +The key parameter in the command is the new cluster template ID. For +Kubernetes version upgrade, a newer version for label `kube_tag` should be +provided. Downgrade is not supported. - # Upgrade curl - req_body=$(cat << EOF - { - "max_batch_size": 1, - "nodegroup": "master", - "cluster_template": "${CT}" - } - EOF - ) - USER_TOKEN=$(openstack token issue -c id -f value) - curl -g -i -X PATCH https://${IP}:9511/v1/clusters/${CLUSTER}/actions/upgrade \ - -H "OpenStack-API-Version: container-infra latest" \ - -H "X-Auth-Token: $USER_TOKEN" \ - -H "Content-Type: application/json" \ - -H "Accept: application/json" \ - -H "User-Agent: None" \ - -d "$req_body" +A simple operating system upgrade can be applied using a new image ID in the +new cluster template. However, this entails a downtime for applications running +on the cluster, because all the nodes will be rebuilt one by one. + +The Fedora Atomic driver supports a more gradeful operating system upgrade. +Similar to the Kubernetes version upgrade, it will cordon and drain the nodes +before upgrading the operating system with rpm-ostree command. There are one of +two labels which must be provided to support this feature: + +* `ostree_commit`: this is a commit ID of ostree the current system should be + upgraded to. An example of a commit ID is + `1766b4526f1a738ba1e6e0a66264139f65340bcc28e7045f10cbe6d161eb1925`, +* `ostree_remote`: this is a remote name of ostree the current system should be + rebased to. An example of a remote name is + `fedora-atomic:fedora/29/x86_64/atomic-host`. + +If both labels are present, `ostree_commit` takes precedence. To check if there +are updates available, run `sudo rpm-ostree upgrade --check` on the Atomic host +which will show you the latest commit ID that can be upgraded to. diff --git a/magnum/drivers/common/templates/kubernetes/fragments/upgrade-kubernetes.sh b/magnum/drivers/common/templates/kubernetes/fragments/upgrade-kubernetes.sh index 76a6aa879e..1826941e73 100644 --- a/magnum/drivers/common/templates/kubernetes/fragments/upgrade-kubernetes.sh +++ b/magnum/drivers/common/templates/kubernetes/fragments/upgrade-kubernetes.sh @@ -6,8 +6,11 @@ set -x ssh_cmd="ssh -F /srv/magnum/.ssh/config root@localhost" KUBECONFIG="/etc/kubernetes/kubelet-config.yaml" new_kube_tag="$kube_tag_input" +new_ostree_remote="$ostree_remote_input" +new_ostree_commit="$ostree_commit_input" +HOSTNAME_OVERRIDE="$(cat /etc/hostname | head -1 | sed 's/\.novalocal//')" -if [ ${new_kube_tag}!=${KUBE_TAG} ]; then +function drain { # If there is only one master and this is the master node, skip the drain, just cordon it # If there is only one worker and this is the worker node, skip the drain, just cordon it all_masters=$(kubectl get nodes --selector=node-role.kubernetes.io/master= -o name) @@ -17,6 +20,11 @@ if [ ${new_kube_tag}!=${KUBE_TAG} ]; then else kubectl cordon ${INSTANCE_NAME} fi +} + +if [ "${new_kube_tag}" != "${KUBE_TAG}" ]; then + + drain SERVICE_LIST=$($ssh_cmd podman ps -f name=kube --format {{.Names}}) @@ -35,9 +43,61 @@ if [ ${new_kube_tag}!=${KUBE_TAG} ]; then i=0 until kubectl uncordon ${INSTANCE_NAME} do - ((i++)) + i=$((i+1)) [ $i -lt 30 ] || break; echo "Trying to uncordon node..." sleep 5s done fi + +function setup_uncordon { + # Create a service to uncordon the node itself after reboot + if [ ! -f /etc/systemd/system/uncordon.service ]; then + $ssh_cmd cat > /etc/systemd/system/uncordon.service << EOF +[Unit] +Description=magnum-uncordon +After=network.target kubelet.service + +[Service] +Restart=Always +RemainAfterExit=yes +ExecStart=${kubecontrol} uncordon ${HOSTNAME_OVERRIDE} + +[Install] +WantedBy=multi-user.target +EOF + ${ssh_cmd} systemctl enable uncordon.service + fi +} + +remote_list=`${ssh_cmd} ostree remote list` +# Fedora Atomic 29 will be the last release before migrating to Fedora CoreOS, so we're OK to add 28 and 29 remotes directly +if [[ ! " ${remote_list[@]} " =~ "fedora-atomic-28" ]]; then + ${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-28-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-atomic-28 https://kojipkgs.fedoraproject.org/atomic/repo/ +fi +if [[ ! " ${remote_list[@]} " =~ "fedora-atomic-29" ]]; then + ${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-29-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-atomic-29 https://kojipkgs.fedoraproject.org/atomic/repo/ +fi +# The uri of existing Fedora Atomic 27 remote is not accessible now, so replace it with correct uri +if [[ " ${remote_list[@]} " =~ "fedora-atomic" ]]; then + sed -i ' + /^url=/ s|=.*|=https://kojipkgs.fedoraproject.org/atomic/repo/| + ' /etc/ostree/remotes.d/fedora-atomic.conf +fi + +current_ostree_commit=`${ssh_cmd} rpm-ostree status | grep Commit | awk '{print $2}'` +current_ostree_remote=`${ssh_cmd} rpm-ostree status | awk '/* ostree/{print $0}' | awk '{match($0,"* ostree://([^ ]+)",a)}END{print a[1]}'` + +# NOTE(flwang): 1. Either deploy or rebase for only one upgrade +# 2. Using rpm-ostree command instead of atomic command to keep the possibility of supporting fedora coreos 30 +if [ "$new_ostree_commit" != "" ] && [ "$current_ostree_commit" != "$new_ostree_commit" ]; then + drain + setup_uncordon + ${ssh_cmd} rpm-ostree deploy $new_ostree_commit + shutdown --reboot --no-wall -t 1 +elif [ "$new_ostree_remote" != "" ] && [ "$current_ostree_remote" != "$new_ostree_remote" ]; then + drain + setup_uncordon + ${ssh_cmd} rpm-ostree rebase $new_ostree_remote + shutdown --reboot --no-wall -t 1 +fi diff --git a/magnum/drivers/heat/driver.py b/magnum/drivers/heat/driver.py index a7bc3cd0c4..070ebd20fb 100755 --- a/magnum/drivers/heat/driver.py +++ b/magnum/drivers/heat/driver.py @@ -324,6 +324,40 @@ class KubernetesDriver(HeatDriver): class FedoraKubernetesDriver(KubernetesDriver): """Base driver for Kubernetes clusters.""" + def get_heat_params(self, cluster_template): + heat_params = {} + try: + kube_tag = cluster_template.labels["kube_tag"] + kube_tag_params = { + "kube_tag": kube_tag, + "kube_version": kube_tag, + "master_kube_tag": kube_tag, + "minion_kube_tag": kube_tag, + } + heat_params.update(kube_tag_params) + except KeyError: + LOG.debug(("Cluster template %s does not contain a " + "valid kube_tag"), cluster_template.name) + + for ostree_tag in ["ostree_commit", "ostree_remote"]: + try: + ostree_param = { + ostree_tag: cluster_template.labels[ostree_tag] + } + heat_params.update(ostree_param) + except KeyError: + LOG.debug("Cluster template %s does not define %s", + (cluster_template.name, ostree_tag)) + + upgrade_labels = ['kube_tag', 'ostree_remote', 'ostree_commit'] + if not any([u in heat_params.keys() for u in upgrade_labels]): + reason = ("Cluster template %s does not contain any supported " + "upgrade labels: [%s]") % (cluster_template.name, + ', '.join(upgrade_labels)) + raise exception.InvalidClusterTemplateForUpgrade(reason=reason) + + return heat_params + def upgrade_cluster(self, context, cluster, cluster_template, max_batch_size, nodegroup, scale_manager=None, rollback=False): @@ -331,7 +365,10 @@ class FedoraKubernetesDriver(KubernetesDriver): osc = clients.OpenStackClients(context) # Use this just to check that we are not downgrading. - heat_params = {} + heat_params = { + "update_max_batch_size": max_batch_size, + } + if 'kube_tag' in nodegroup.labels: heat_params['kube_tag'] = nodegroup.labels['kube_tag'] @@ -360,19 +397,7 @@ class FedoraKubernetesDriver(KubernetesDriver): # hardcode what we want to send to heat. # Rules: 1. No downgrade 2. Explicitly override 3. Merging based on set # Update heat_params based on the data generated above - try: - heat_params = { - "kube_tag": cluster_template.labels["kube_tag"], - "kube_version": cluster_template.labels["kube_tag"], - "master_kube_tag": cluster_template.labels["kube_tag"], - "minion_kube_tag": cluster_template.labels["kube_tag"], - "update_max_batch_size": max_batch_size - } - except KeyError: - # Corner case but if the user defined an invalid CT just abort - reason = ("Cluster template %s does not contain a " - "valid kube_tag") % cluster_template.name - raise exception.InvalidClusterTemplateForUpgrade(reason=reason) + heat_params.update(self.get_heat_params(cluster_template)) stack_id = nodegroup.stack_id if nodegroup is not None and not nodegroup.is_default: diff --git a/magnum/drivers/heat/k8s_fedora_template_def.py b/magnum/drivers/heat/k8s_fedora_template_def.py index a53ea6211c..6f39497d12 100644 --- a/magnum/drivers/heat/k8s_fedora_template_def.py +++ b/magnum/drivers/heat/k8s_fedora_template_def.py @@ -98,7 +98,8 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition): 'auto_healing_enabled', 'auto_scaling_enabled', 'auto_healing_controller', 'magnum_auto_healer_tag', 'draino_tag', 'autoscaler_tag', - 'min_node_count', 'max_node_count', 'npd_enabled'] + 'min_node_count', 'max_node_count', 'npd_enabled', + 'ostree_remote', 'ostree_commit'] labels = self._get_relevant_labels(cluster, kwargs) diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml index 68de802cfb..5165ab312b 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml @@ -755,6 +755,16 @@ parameters: default: true + ostree_remote: + type: string + description: The ostree remote branch to upgrade + default: '' + + ostree_commit: + type: string + description: The ostree commit to deploy + default: '' + resources: ###################################################################### @@ -1069,6 +1079,8 @@ resources: min_node_count: {get_param: min_node_count} max_node_count: {get_param: max_node_count} npd_enabled: {get_param: npd_enabled} + ostree_remote: {get_param: ostree_remote} + ostree_commit: {get_param: ostree_commit} kube_cluster_config: condition: create_cluster_resources @@ -1232,6 +1244,8 @@ resources: auto_healing_enabled: {get_param: auto_healing_enabled} npd_enabled: {get_param: npd_enabled} auto_healing_controller: {get_param: auto_healing_controller} + ostree_remote: {get_param: ostree_remote} + ostree_commit: {get_param: ostree_commit} outputs: diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml index 228758933e..e8238393b5 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml @@ -530,6 +530,14 @@ parameters: default: true + ostree_remote: + type: string + description: The ostree remote branch to upgrade + + ostree_commit: + type: string + description: The ostree commit to deploy + conditions: image_based: {equals: [{get_param: boot_volume_size}, 0]} @@ -539,6 +547,7 @@ conditions: - get_param: boot_volume_size - 0 + resources: ###################################################################### # @@ -839,6 +848,8 @@ resources: group: script inputs: - name: kube_tag_input + - name: ostree_remote_input + - name: ostree_commit_input config: get_file: ../../common/templates/kubernetes/fragments/upgrade-kubernetes.sh @@ -851,6 +862,8 @@ resources: actions: ['UPDATE'] input_values: kube_tag_input: {get_param: kube_tag} + ostree_remote_input: {get_param: ostree_remote} + ostree_commit_input: {get_param: ostree_commit} outputs: diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml index be5b5fb122..b2c5059a2c 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml @@ -312,6 +312,16 @@ parameters: default: true + ostree_remote: + type: string + description: The ostree remote branch to upgrade + default: '' + + ostree_commit: + type: string + description: The ostree commit to deploy + default: '' + conditions: image_based: {equals: [{get_param: boot_volume_size}, 0]} @@ -321,6 +331,7 @@ conditions: - get_param: boot_volume_size - 0 + resources: agent_config: @@ -526,6 +537,8 @@ resources: group: script inputs: - name: kube_tag_input + - name: ostree_remote_input + - name: ostree_commit_input config: get_file: ../../common/templates/kubernetes/fragments/upgrade-kubernetes.sh @@ -538,6 +551,8 @@ resources: actions: ['UPDATE'] input_values: kube_tag_input: {get_param: kube_tag} + ostree_remote_input: {get_param: ostree_remote} + ostree_commit_input: {get_param: ostree_commit} outputs: diff --git a/magnum/tests/unit/drivers/test_template_definition.py b/magnum/tests/unit/drivers/test_template_definition.py index 3ef1bbd4dd..e891820ffc 100644 --- a/magnum/tests/unit/drivers/test_template_definition.py +++ b/magnum/tests/unit/drivers/test_template_definition.py @@ -571,6 +571,8 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): boot_volume_size = mock_cluster.labels.get('boot_volume_size') boot_volume_type = mock_cluster.labels.get('boot_volume_type') etcd_volume_type = mock_cluster.labels.get('etcd_volume_type') + ostree_remote = mock_cluster.labels.get('ostree_remote') + ostree_commit = mock_cluster.labels.get('ostree_commit') k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition() @@ -654,7 +656,9 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'minion_kube_tag': kube_tag, 'boot_volume_size': boot_volume_size, 'boot_volume_type': boot_volume_type, - 'etcd_volume_type': etcd_volume_type + 'etcd_volume_type': etcd_volume_type, + 'ostree_remote': ostree_remote, + 'ostree_commit': ostree_commit, }} mock_get_params.assert_called_once_with(mock_context, mock_cluster_template, @@ -1008,6 +1012,8 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): boot_volume_size = mock_cluster.labels.get('boot_volume_size') boot_volume_type = mock_cluster.labels.get('boot_volume_type') etcd_volume_type = mock_cluster.labels.get('etcd_volume_type') + ostree_remote = mock_cluster.labels.get('ostree_remote') + ostree_commit = mock_cluster.labels.get('ostree_commit') k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition() @@ -1093,7 +1099,9 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'minion_kube_tag': kube_tag, 'boot_volume_size': boot_volume_size, 'boot_volume_type': boot_volume_type, - 'etcd_volume_type': etcd_volume_type + 'etcd_volume_type': etcd_volume_type, + 'ostree_remote': ostree_remote, + 'ostree_commit': ostree_commit, }} mock_get_params.assert_called_once_with(mock_context, mock_cluster_template, diff --git a/releasenotes/notes/support-fedora-atomic-os-upgrade-9f47182b21c6c028.yaml b/releasenotes/notes/support-fedora-atomic-os-upgrade-9f47182b21c6c028.yaml new file mode 100644 index 0000000000..e33fc9806f --- /dev/null +++ b/releasenotes/notes/support-fedora-atomic-os-upgrade-9f47182b21c6c028.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Along with the kubernetes version upgrade support we just released, we're + adding the support to upgrade the operating system of the k8s cluster + (including master and worker nodes). It's an inplace upgrade leveraging the + atomic/ostree upgrade capability.