4438b8fd55
This commit enables the check of new pylint/pep8 violations. PYLINT - All convention related checks, except: - missing-class-docstring - missing-function-docstring - missing-module-docstring - consider-using-f-string - invalid-name - import-outside-toplevel - too-many-lines - consider-iterating-dictionary - unnecessary-lambda-assignment PEP8: - E117: over-indented - E123: closing bracket does not match indentation of opening bracket's line - E125: continuation line with the same indent as the next logical line - E305: expected 2 blank lines after class or function definition - E402: module level import not at top of file - E501: line too long - H216: flag use of third party mock Test Plan: 1. Perform `tox` command - Pass in py39, pylint, pep8 Closes-bug: 2033294 Change-Id: I635df8e809905cff582bd9d5eb57b91133560cf9 Signed-off-by: Hugo Brito <hugo.brito@windriver.com>
110 lines
4.4 KiB
Python
110 lines
4.4 KiB
Python
#
|
|
# Copyright (c) 2020-2022, 2024 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
import time
|
|
|
|
from dcmanager.common import consts
|
|
from dcmanager.common.exceptions import StrategyStoppedException
|
|
from dcmanager.orchestrator.states.base import BaseState
|
|
|
|
# When a swact occurs, services become unavailable and API calls may fail.
|
|
# The max time allowed here is 30 minutes (ie: 180 queries with 10 secs sleep)
|
|
DEFAULT_MAX_FAILED_QUERIES = 120
|
|
DEFAULT_FAILED_SLEEP = 10
|
|
|
|
# Max time: 10 minutes = 60 queries x 10 seconds
|
|
DEFAULT_MAX_QUERIES = 60
|
|
DEFAULT_SLEEP_DURATION = 10
|
|
|
|
# After a swact, there is a sleep before proceeding to the next state
|
|
# Added another minute to ensure controller is stable
|
|
DEFAULT_SWACT_SLEEP = 180
|
|
|
|
|
|
class SwactHostState(BaseState):
|
|
"""Orchestration state for host swact"""
|
|
|
|
def __init__(self, next_state, region_name, active, standby):
|
|
super(SwactHostState, self).__init__(
|
|
next_state=next_state, region_name=region_name)
|
|
self.active = active
|
|
self.standby = standby
|
|
# max time to wait (in seconds) is: sleep_duration * max_queries
|
|
self.sleep_duration = DEFAULT_SLEEP_DURATION
|
|
self.max_queries = DEFAULT_MAX_QUERIES
|
|
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
|
|
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
|
|
|
|
def perform_state_action(self, strategy_step):
|
|
"""Swact host on the subcloud
|
|
|
|
Returns the next state in the state machine on success.
|
|
Any exceptions raised by this method set the strategy to FAILED.
|
|
"""
|
|
|
|
region = self.get_region_name(strategy_step)
|
|
|
|
active_host = self.get_sysinv_client(region).get_host(self.active)
|
|
standby_host = self.get_sysinv_client(region).get_host(self.standby)
|
|
|
|
# if the desired active host is already the Active Controller, no need for
|
|
# action
|
|
if active_host.capabilities.get('Personality') == \
|
|
consts.PERSONALITY_CONTROLLER_ACTIVE:
|
|
msg = "Host: %s already the active controller." % (self.active)
|
|
self.info_log(strategy_step, msg)
|
|
return self.next_state
|
|
|
|
# Perform swact action
|
|
response = self.get_sysinv_client(region).swact_host(standby_host.id)
|
|
if response.task != 'Swacting':
|
|
raise Exception("Unable to swact to host %s" % self.active)
|
|
|
|
# Allow separate durations for failures and api retries
|
|
fail_counter = 0
|
|
api_counter = 0
|
|
|
|
while True:
|
|
# If event handler stop has been triggered, fail the state
|
|
if self.stopped():
|
|
raise StrategyStoppedException()
|
|
try:
|
|
# query the administrative state to see if it is the new state.
|
|
host = self.get_sysinv_client(region).get_host(self.active)
|
|
if host.capabilities.get('Personality') == \
|
|
consts.PERSONALITY_CONTROLLER_ACTIVE:
|
|
msg = "Host: %s is now the active controller." % (self.active)
|
|
self.info_log(strategy_step, msg)
|
|
break
|
|
fail_counter = 0
|
|
except Exception:
|
|
# Handle other exceptions due to being unreachable
|
|
# for a significant period of time when there is a
|
|
# controller swact
|
|
fail_counter += 1
|
|
if fail_counter >= self.max_failed_queries:
|
|
raise Exception("Timeout waiting for swact to complete")
|
|
time.sleep(self.failed_sleep_duration)
|
|
# skip the api_counter
|
|
continue
|
|
api_counter += 1
|
|
if api_counter >= self.max_queries:
|
|
raise Exception("Timeout waiting for swact to complete. "
|
|
"Please check sysinv.log on the subcloud "
|
|
"for details.")
|
|
time.sleep(self.sleep_duration)
|
|
|
|
# If we are here, the loop broke out cleanly and the action succeeded
|
|
# When we return from this method without throwing an exception, the
|
|
# state machine can proceed to the next state
|
|
# Adding a 3 minute delay (DEFAULT_SWACT_SLEEP) before moving to the
|
|
# next state
|
|
self.info_log(strategy_step,
|
|
"Waiting %s seconds before proceeding"
|
|
% DEFAULT_SWACT_SLEEP)
|
|
time.sleep(DEFAULT_SWACT_SLEEP)
|
|
return self.next_state
|