Files
distcloud/distributedcloud/dcmanager/orchestrator/states/upgrade/upgrading_duplex.py
Hugo Brito 4438b8fd55 Update tox pylint/pep8 for dcmanager
This commit enables the check of new pylint/pep8
violations.

PYLINT - All convention related checks, except:
- missing-class-docstring
- missing-function-docstring
- missing-module-docstring
- consider-using-f-string
- invalid-name
- import-outside-toplevel
- too-many-lines
- consider-iterating-dictionary
- unnecessary-lambda-assignment

PEP8:
- E117: over-indented
- E123: closing bracket does not match indentation
  of opening bracket's line
- E125: continuation line with the same indent as the next
  logical line
- E305: expected 2 blank lines after class or function
  definition
- E402: module level import not at top of file
- E501: line too long
- H216: flag use of third party mock

Test Plan:
1. Perform `tox` command
- Pass in py39, pylint, pep8

Closes-bug: 2033294

Change-Id: I635df8e809905cff582bd9d5eb57b91133560cf9
Signed-off-by: Hugo Brito <hugo.brito@windriver.com>
2024-01-18 21:51:25 +00:00

112 lines
4.4 KiB
Python

#
# Copyright (c) 2020-2021, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
# When an unlock occurs, a reboot is triggered. During reboot, API calls fail.
# The max time allowed here is 30 minutes (ie: 180 queries with 10 secs sleep)
DEFAULT_MAX_FAILED_QUERIES = 180
DEFAULT_FAILED_SLEEP = 10
# Max time: 30 minutes = 180 queries x 10 seconds
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
class UpgradingDuplexState(BaseState):
"""Update state for upgrading a non-simplex subcloud host"""
def __init__(self, region_name):
super(UpgradingDuplexState, self).__init__(
next_state=consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1,
region_name=region_name
)
self.target_hostname = "controller-1"
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
def perform_state_action(self, strategy_step):
"""Upgrade a duplex host on a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
self.info_log(strategy_step, "Performing duplex upgrade for subcloud")
region = self.get_region_name(strategy_step)
host = self.get_sysinv_client(
region).get_host(self.target_hostname)
self.get_sysinv_client(region).upgrade_host(host.id)
# Wait for controller-1 to reinstall with the load N+1
# and become locked-disabled-online state.
# this action is asynchronous, query until it completes or times out
# Allow separate durations for failures (ie: reboot) and api retries
fail_counter = 0
api_counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
try:
upgrades = self.get_sysinv_client(region).get_upgrades()
if len(upgrades) != 0:
if (
upgrades[0].state ==
consts.UPGRADE_STATE_DATA_MIGRATION_FAILED or
upgrades[0].state ==
consts.UPGRADE_STATE_DATA_MIGRATION_COMPLETE
):
msg = "Upgrade state is %s now" % (upgrades[0].state)
self.info_log(strategy_step, msg)
break
fail_counter = 0
except Exception:
# Handle other exceptions due to being unreachable
# for a significant period of time when there is a
# controller swact
fail_counter += 1
if fail_counter >= self.max_failed_queries:
raise Exception("Timeout waiting for reboot to complete")
time.sleep(self.failed_sleep_duration)
# skip the api_counter
continue
api_counter += 1
if api_counter >= self.max_queries:
raise Exception(
"Timeout waiting for update state to be updated to "
"'data-migration-failed' or 'data-migration-complete'. "
"Please check sysinv.log on the subcloud for details."
)
time.sleep(self.sleep_duration)
# If the upgrade state is 'data-migration-complete' we move to the
# next state, else if it is 'data-migration-failed' we go to the failed
# state.
upgrades = self.get_sysinv_client(region).get_upgrades()
if len(upgrades) == 0:
raise Exception("No upgrades were found")
# The list of upgrades will never contain more than one entry.
if upgrades[0].state == consts.UPGRADE_STATE_DATA_MIGRATION_FAILED:
raise Exception(
"Data migration failed on host %s" % self.target_hostname
)
# If we reach at this point, the upgrade state is 'data-migration-complete'
# and we can move to the next state.
return self.next_state