From 58eb391396998d712d8f27ce8de0df0964f038c4 Mon Sep 17 00:00:00 2001 From: Ramesh Kumar Sivanandam Date: Wed, 17 May 2023 15:17:39 -0400 Subject: [PATCH] AIO-SX K8s upgrade optimization: "cordon" and "uncordon" as optional The cordon and uncordon commands are not necessary to execute during k8s upgrade in AIO-SX. This change provides the cordon and uncordon system commands as optional. Test Plan: PASS: Fresh install ISO as AIO-SX. Successfully upgraded from 1.23.1 to 1.24 using the manual K8s upgrade without executing cordon and uncordon system commands. Successfully upgraded from 1.24 to 1.25 with the cordon and uncordon system commands. After executing cordon system command some of the pods went to the 'Pending' status. After executing uncordon system command pending pods are return to 'Running' status. Story: 2010565 Task: 48042 Change-Id: Ia4b7b8345d33cb6662c6de6fbb13d6314e4c109f Signed-off-by: Ramesh Kumar Sivanandam --- .../sysinv/sysinv/api/controllers/v1/host.py | 1 + .../sysinv/api/controllers/v1/kube_upgrade.py | 51 ++++++++++++------- .../sysinv/sysinv/sysinv/common/kubernetes.py | 12 +++++ .../sysinv/tests/api/test_kube_upgrade.py | 11 ++++ 4 files changed, 58 insertions(+), 17 deletions(-) diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index 2e5d28fb99..27d6703a2b 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -7114,6 +7114,7 @@ class HostController(rest.RestController): if utils.get_system_mode() == constants.SYSTEM_MODE_SIMPLEX: check_upgraded_state = [ + kubernetes.KUBE_UPGRADED_NETWORKING, kubernetes.KUBE_UPGRADED_FIRST_MASTER, kubernetes.KUBE_UPGRADE_CORDON_COMPLETE] else: diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_upgrade.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_upgrade.py index 505b830d94..90de6a0ad7 100755 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_upgrade.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_upgrade.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -468,14 +468,31 @@ class KubeUpgradeController(rest.RestController): # Make sure upgrade is in the correct state to complete system = pecan.request.dbapi.isystem_get_one() if system.system_mode == constants.SYSTEM_MODE_SIMPLEX: - if kube_upgrade_obj.state not in [ - kubernetes.KUBE_UPGRADE_UNCORDON_COMPLETE]: - raise wsme.exc.ClientSideError(_( - "Kubernetes upgrade must be in %s state to complete" % - kubernetes.KUBE_UPGRADE_UNCORDON_COMPLETE)) + # If the node is unschedulable=True then the cordon command + # executed already and some of the pods are in pending status. + # The uncordon command needs to be triggered. + # If the node is unschedulable=None then the pods are in the + # running status. + unschedulable = None + node_status = self._kube_operator.kube_get_node_status(constants.CONTROLLER_0_HOSTNAME) + LOG.debug("Node status: %s" % node_status) + if node_status: + unschedulable = node_status.spec.unschedulable - kube_host_upgrades = \ - pecan.request.dbapi.kube_host_upgrade_get_list() + if unschedulable: + if kube_upgrade_obj.state not in [ + kubernetes.KUBE_UPGRADE_UNCORDON_COMPLETE]: + raise wsme.exc.ClientSideError(_( + "Kubernetes upgrade must be in %s state to complete" % + kubernetes.KUBE_UPGRADE_UNCORDON_COMPLETE)) + else: + if kube_upgrade_obj.state not in [ + kubernetes.KUBE_UPGRADING_KUBELETS, + kubernetes.KUBE_UPGRADE_UNCORDON_COMPLETE]: + raise wsme.exc.ClientSideError(_( + "Kubernetes upgrade must be in %s or %s state to complete" % + (kubernetes.KUBE_UPGRADING_KUBELETS, + kubernetes.KUBE_UPGRADE_UNCORDON_COMPLETE))) else: if kube_upgrade_obj.state not in [ kubernetes.KUBE_UPGRADING_KUBELETS]: @@ -483,15 +500,15 @@ class KubeUpgradeController(rest.RestController): "Kubernetes upgrade must be in %s state to complete" % kubernetes.KUBE_UPGRADING_KUBELETS)) - # Make sure no hosts are in a transitory or failed state - kube_host_upgrades = \ - pecan.request.dbapi.kube_host_upgrade_get_list() - for kube_host_upgrade in kube_host_upgrades: - if kube_host_upgrade.status != \ - kubernetes.KUBE_HOST_UPGRADED_KUBELET: - raise wsme.exc.ClientSideError(_( - "At least one host has not completed the kubernetes " - "upgrade")) + # Make sure no hosts are in a transitory or failed state + kube_host_upgrades = \ + pecan.request.dbapi.kube_host_upgrade_get_list() + for kube_host_upgrade in kube_host_upgrades: + if kube_host_upgrade.status != \ + kubernetes.KUBE_HOST_UPGRADED_KUBELET: + raise wsme.exc.ClientSideError(_( + "At least one host has not completed the kubernetes " + "upgrade")) # Make sure the target version is active version_states = self._kube_operator.kube_get_version_states() diff --git a/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py b/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py index 86f95ea581..b5e02cebfd 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py +++ b/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py @@ -392,6 +392,18 @@ class KubeOperator(object): LOG.warn("Kubernetes exception in kube_get_nodes: %s" % e) raise + @retry(stop_max_attempt_number=API_RETRY_ATTEMPT_NUMBER, + wait_fixed=API_RETRY_INTERVAL, + retry_on_exception=_retry_on_urllibs3_MaxRetryError) + def kube_get_node_status(self, name): + try: + api_response = self._get_kubernetesclient_core().read_node_status(name) + LOG.debug("Response: %s" % api_response) + return api_response + except Exception as e: + LOG.warn("Kubernetes exception in kube_get_node_status: %s" % e) + raise + def kube_namespaced_pods_exist(self, namespace): LOG.debug("kube_namespaced_pods_exist, namespace=%s" % (namespace)) diff --git a/sysinv/sysinv/sysinv/sysinv/tests/api/test_kube_upgrade.py b/sysinv/sysinv/sysinv/sysinv/tests/api/test_kube_upgrade.py index d7e84bb4e3..e506f0e041 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/api/test_kube_upgrade.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/api/test_kube_upgrade.py @@ -132,6 +132,17 @@ class TestKubeUpgrade(base.FunctionalTest): self.mocked_get_kube_versions.start() self.addCleanup(self.mocked_get_kube_versions.stop) + # Mock the KubeOperator + self.kube_get_node_status_result = None + + def mock_kube_get_node_status(obj): + return self.kube_get_node_status_result + self.mocked_kube_get_node_status = mock.patch( + 'sysinv.common.kubernetes.KubeOperator.kube_get_node_status', + mock_kube_get_node_status) + self.mocked_kube_get_node_status.start() + self.addCleanup(self.mocked_kube_get_node_status.stop) + # Mock the KubeOperator self.kube_get_kubernetes_version_result = 'v1.43.1'