Defer app reapply evaluation during k8s upgrades

Prevent triggering the app reapply evaluation process during
Kubernetes upgrades.

The app reapply evaluation process can raise alarms that would
prevent Kubernetes upgrades from proceeding. Concurrently,
reapplying apps would be deferred because of the upgrade process
itself thus causing a deadlock condition.

This commit prevents reapply alarms from being raised during
Kubernetes upgrades and resumes app reapply evaluation when
upgrades are complete.

Test Plan:
PASS: AIO-DX full system upgrade
PASS: Kubernetes upgrade orchestration from v1.23.1 to v1.24.4

Closes-Bug: 2003260
Signed-off-by: Igor Soares <Igor.PiresSoares@windriver.com>
Change-Id: I999ca3f1a454954d2759d3a7d347e51d5875b187
This commit is contained in:
Igor Soares 2023-01-17 14:57:20 -05:00
parent 34c0e10189
commit 72d9329711
4 changed files with 41 additions and 11 deletions

View File

@ -444,6 +444,12 @@ class KubeUpgradeController(rest.RestController):
role = system.get('distributed_cloud_role')
if role == constants.DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER:
dc_api.notify_dcmanager_kubernetes_upgrade_completed()
# Check if apps need to be reapplied
pecan.request.rpcapi.evaluate_apps_reapply(
pecan.request.context,
trigger={'type': constants.APP_EVALUATE_REAPPLY_TYPE_KUBE_UPGRADE_COMPLETE})
return KubeUpgrade.convert_with_links(kube_upgrade_obj)
else:

View File

@ -1893,6 +1893,7 @@ APP_EVALUATE_REAPPLY_TYPE_RUNTIME_APPLY_PUPPET = 'runtime-apply-puppet'
APP_EVALUATE_REAPPLY_HOST_AVAILABILITY = 'host-availability-updated'
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY = 'system-modify'
APP_EVALUATE_REAPPLY_TYPE_DETECTED_SWACT = 'detected-swact'
APP_EVALUATE_REAPPLY_TYPE_KUBE_UPGRADE_COMPLETE = 'kube-upgrade-complete'
APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = {
UNLOCK_ACTION:

View File

@ -8774,6 +8774,21 @@ class ConductorManager(service.PeriodicService):
if success:
self.check_pending_app_reapply(context)
def verify_k8s_upgrade_not_in_progress(self):
""" Check if there is a kubernetes upgrade in progress.
Raise an exception if one is found.
"""
try:
kube_upgrade = self.dbapi.kube_upgrade_get_one()
if kube_upgrade.state == kubernetes.KUBE_UPGRADE_COMPLETE:
return
except exception.NotFound:
pass
else:
raise exception.SysinvException(_(
"Kubernetes upgrade is in progress and not completed."))
def verify_upgrade_not_in_progress(self):
""" Check if there is an upgrade in progress.
@ -8787,14 +8802,9 @@ class ConductorManager(service.PeriodicService):
raise exception.SysinvException(_("Platform upgrade in progress."))
try:
kube_upgrade = self.dbapi.kube_upgrade_get_one()
if kube_upgrade.state == kubernetes.KUBE_UPGRADE_COMPLETE:
return
except exception.NotFound:
pass
else:
raise exception.SysinvException(_(
"Kubernetes upgrade is in progress and not completed."))
self.verify_k8s_upgrade_not_in_progress()
except Exception as e:
raise e
def report_partition_mgmt_success(self, host_uuid, idisk_uuid,
partition_uuid):
@ -14017,10 +14027,11 @@ class ConductorManager(service.PeriodicService):
"""Synchronously, determine whether an application
re-apply is needed, and if so, raise the re-apply flag.
Run 2 checks before doing an app evaluation.
First check is a semantic check calling a lifecycle hook which can
Run 3 checks before doing an app evaluation.
First check is to verify whether Kubernetes upgrades are not in progress.
Second check is a semantic check calling a lifecycle hook which can
implement complex logic.
Second check is specified in metadata which allows faster development
Third check is specified in metadata which allows faster development
time, doing simple key:value comparisons. Check that the 'trigger'
parameter of the function contains a list of key:value pairs at a
specified location. Default location for searching is root of 'trigger'
@ -14031,6 +14042,14 @@ class ConductorManager(service.PeriodicService):
:param trigger: dictionary containing at least the 'type' field
"""
# Defer apps reapply evaluation if Kubernetes upgrades are in progress
try:
self.verify_k8s_upgrade_not_in_progress()
except Exception as e:
LOG.info("Deferring apps reapply evaluation. {}".format(str(e)))
return
LOG.info("Evaluating apps reapply {} ".format(trigger))
apps = self.determine_apps_reapply_order(name_only=False, filter_active=True)

View File

@ -74,6 +74,7 @@ class FakeConductorAPI(object):
def __init__(self):
self.kube_download_images = mock.MagicMock()
self.kube_upgrade_networking = mock.MagicMock()
self.evaluate_apps_reapply = mock.MagicMock()
self.service = ConductorManager('test-host', 'test-topic')
def get_system_health(self, context, force=False, upgrade=False,
@ -765,6 +766,9 @@ class TestPatch(TestKubeUpgrade,
self.assertEqual(result['to_version'], 'v1.43.2')
self.assertEqual(result['state'], new_state)
# Verify that apps reapply evaluation was triggered
self.fake_conductor_api.evaluate_apps_reapply.assert_called_once()
def test_update_state_complete_incomplete_host(self):
# Test updating the state of an upgrade to complete when a host has
# not completed its upgrade