Upgrade orchestration updates for virtual subclouds

A couple of small adjustments to accommodate slow or
virtual subclouds.

Test Plan:
  - Verify successful orchestrated upgrade of a large
    number of virtual subclouds in parallel.

Story: 2009725
Task: 45648
Change-Id: Ie116eaa8ee25f2b7b990817ea47cb1d772e11d30
Signed-off-by: Tee Ngo <tee.ngo@windriver.com>
This commit is contained in:
Tee Ngo 2022-06-17 22:41:27 -04:00
parent 1102960e72
commit 94970d72f7
2 changed files with 22 additions and 15 deletions

View File

@ -77,19 +77,26 @@ class UnlockHostState(BaseState):
# handle possible unlock failures that can occur in corner cases
unlock_counter = 0
while True:
try:
response = self.get_sysinv_client(
strategy_step.subcloud.name).unlock_host(host.id)
if (response.ihost_action != 'unlock' or response.task != 'Unlocking'):
raise Exception("Unable to unlock host %s" % self.target_hostname)
break
except Exception as e:
if unlock_counter >= self.max_unlock_retries:
raise
unlock_counter += 1
self.error_log(strategy_step, str(e))
time.sleep(self.unlock_sleep_duration)
# For simplex subcloud upgrade, the host unlock is already done
# in data migration step. If it gets here, the host is still
# in degraded state, skip the unlock and proceed to the wait loop
# below.
if host.administrative != consts.ADMIN_UNLOCKED:
while True:
try:
response = self.get_sysinv_client(
strategy_step.subcloud.name).unlock_host(host.id)
if (response.ihost_action != 'unlock' or
response.task != 'Unlocking'):
raise Exception("Unable to unlock host %s"
% self.target_hostname)
break
except Exception as e:
if unlock_counter >= self.max_unlock_retries:
raise
unlock_counter += 1
self.error_log(strategy_step, str(e))
time.sleep(self.unlock_sleep_duration)
# unlock triggers a reboot.
# must ignore certain errors until the system completes the reboot

View File

@ -16,8 +16,8 @@ ACTIVATING_RETRY_STATES = ['activation-failed', ]
ACTIVATING_IN_PROGRESS_STATES = ['activating', 'activating-hosts', ]
# Max time: 45 minutes = 45 queries x 60 seconds sleep between queries
DEFAULT_MAX_QUERIES = 45
# Max time: 60 minutes = 60 queries x 60 seconds sleep between queries
DEFAULT_MAX_QUERIES = 60
DEFAULT_SLEEP_DURATION = 60
MAX_FAILED_RETRIES = 10