From 94970d72f786136b50c10cac7783432ccff4d9e5 Mon Sep 17 00:00:00 2001 From: Tee Ngo Date: Fri, 17 Jun 2022 22:41:27 -0400 Subject: [PATCH] Upgrade orchestration updates for virtual subclouds A couple of small adjustments to accommodate slow or virtual subclouds. Test Plan: - Verify successful orchestrated upgrade of a large number of virtual subclouds in parallel. Story: 2009725 Task: 45648 Change-Id: Ie116eaa8ee25f2b7b990817ea47cb1d772e11d30 Signed-off-by: Tee Ngo --- .../orchestrator/states/unlock_host.py | 33 +++++++++++-------- .../orchestrator/states/upgrade/activating.py | 4 +-- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/distributedcloud/dcmanager/orchestrator/states/unlock_host.py b/distributedcloud/dcmanager/orchestrator/states/unlock_host.py index 071062813..82258c75c 100644 --- a/distributedcloud/dcmanager/orchestrator/states/unlock_host.py +++ b/distributedcloud/dcmanager/orchestrator/states/unlock_host.py @@ -77,19 +77,26 @@ class UnlockHostState(BaseState): # handle possible unlock failures that can occur in corner cases unlock_counter = 0 - while True: - try: - response = self.get_sysinv_client( - strategy_step.subcloud.name).unlock_host(host.id) - if (response.ihost_action != 'unlock' or response.task != 'Unlocking'): - raise Exception("Unable to unlock host %s" % self.target_hostname) - break - except Exception as e: - if unlock_counter >= self.max_unlock_retries: - raise - unlock_counter += 1 - self.error_log(strategy_step, str(e)) - time.sleep(self.unlock_sleep_duration) + # For simplex subcloud upgrade, the host unlock is already done + # in data migration step. If it gets here, the host is still + # in degraded state, skip the unlock and proceed to the wait loop + # below. + if host.administrative != consts.ADMIN_UNLOCKED: + while True: + try: + response = self.get_sysinv_client( + strategy_step.subcloud.name).unlock_host(host.id) + if (response.ihost_action != 'unlock' or + response.task != 'Unlocking'): + raise Exception("Unable to unlock host %s" + % self.target_hostname) + break + except Exception as e: + if unlock_counter >= self.max_unlock_retries: + raise + unlock_counter += 1 + self.error_log(strategy_step, str(e)) + time.sleep(self.unlock_sleep_duration) # unlock triggers a reboot. # must ignore certain errors until the system completes the reboot diff --git a/distributedcloud/dcmanager/orchestrator/states/upgrade/activating.py b/distributedcloud/dcmanager/orchestrator/states/upgrade/activating.py index a62017e30..afd0f76fb 100644 --- a/distributedcloud/dcmanager/orchestrator/states/upgrade/activating.py +++ b/distributedcloud/dcmanager/orchestrator/states/upgrade/activating.py @@ -16,8 +16,8 @@ ACTIVATING_RETRY_STATES = ['activation-failed', ] ACTIVATING_IN_PROGRESS_STATES = ['activating', 'activating-hosts', ] -# Max time: 45 minutes = 45 queries x 60 seconds sleep between queries -DEFAULT_MAX_QUERIES = 45 +# Max time: 60 minutes = 60 queries x 60 seconds sleep between queries +DEFAULT_MAX_QUERIES = 60 DEFAULT_SLEEP_DURATION = 60 MAX_FAILED_RETRIES = 10