Remove upgrade strategy type for orchestration

The `upgrade orchestration` is no longer required. Users should
now use software deploy orchestration.

Test Plan:
PASS: Ensure strategy creation for the upgrade type doesn't exist.

Story: 2010676
Task: 50398

Depends-on: https://review.opendev.org/c/starlingx/distcloud-client/+/922428

Change-Id: Icfc95d8c3ad64cecc889d96445566f6dad35f71d
Signed-off-by: Hugo Brito <hugo.brito@windriver.com>
This commit is contained in:
Hugo Brito 2024-06-20 16:35:00 -03:00
parent 69970aac60
commit b7f466f5c6
65 changed files with 63 additions and 7807 deletions

View File

@ -47,7 +47,7 @@ sw_update_strategy_type:
description: |
Filter to query a particular type of update strategy if it exists.
One of: `firmware`, `kube-rootca-update`, `kubernetes`, `patch`,
`prestage`, or `upgrade`.
`prestage`, or `sw-deploy`.
in: path
required: false
type: string

View File

@ -245,4 +245,4 @@ valid-classmethod-first-arg=cls
[EXCEPTIONS]
# Exceptions that will emit a warning when being caught. Defaults to
# "Exception"
overgeneral-exceptions=Exception
overgeneral-exceptions=builtins.Exception

View File

@ -28,7 +28,6 @@ STRATEGY_NAME_FW_UPDATE = "fw-update"
STRATEGY_NAME_KUBE_ROOTCA_UPDATE = "kube-rootca-update"
STRATEGY_NAME_KUBE_UPGRADE = "kube-upgrade"
STRATEGY_NAME_SW_PATCH = "sw-patch"
STRATEGY_NAME_SW_UPGRADE = "sw-upgrade"
# TODO(nicodemos): Change this to 'sw-deploy' once the new strategy is created
STRATEGY_NAME_SW_USM = "sw-upgrade"
STRATEGY_NAME_SYS_CONFIG_UPDATE = "system-config-update"

View File

@ -44,7 +44,6 @@ SUPPORTED_STRATEGY_TYPES = [
consts.SW_UPDATE_TYPE_KUBERNETES,
consts.SW_UPDATE_TYPE_PATCH,
consts.SW_UPDATE_TYPE_PRESTAGE,
consts.SW_UPDATE_TYPE_UPGRADE,
consts.SW_UPDATE_TYPE_SOFTWARE,
]
@ -53,7 +52,6 @@ FORCE_ALL_TYPES = [
consts.SW_UPDATE_TYPE_KUBE_ROOTCA_UPDATE,
consts.SW_UPDATE_TYPE_KUBERNETES,
consts.SW_UPDATE_TYPE_PRESTAGE,
consts.SW_UPDATE_TYPE_UPGRADE
]

View File

@ -80,7 +80,6 @@ SW_UPDATE_TYPE_KUBE_ROOTCA_UPDATE = "kube-rootca-update"
SW_UPDATE_TYPE_KUBERNETES = "kubernetes"
SW_UPDATE_TYPE_PATCH = "patch"
SW_UPDATE_TYPE_PRESTAGE = "prestage"
SW_UPDATE_TYPE_UPGRADE = "upgrade"
SW_UPDATE_TYPE_SOFTWARE = "sw-deploy"
# Software update states
@ -124,32 +123,12 @@ STRATEGY_STATE_ABORTED = "aborted"
STRATEGY_STATE_FAILED = "failed"
# Patch orchestrations states
STRATEGY_STATE_PRE_CHECK = "pre check"
STRATEGY_STATE_UPDATING_PATCHES = "updating patches"
STRATEGY_STATE_CREATING_VIM_PATCH_STRATEGY = "creating VIM patch strategy"
STRATEGY_STATE_DELETING_VIM_PATCH_STRATEGY = "deleting VIM patch strategy"
STRATEGY_STATE_APPLYING_VIM_PATCH_STRATEGY = "applying VIM patch strategy"
# Upgrade orchestration states
STRATEGY_STATE_PRE_CHECK = "pre check"
STRATEGY_STATE_INSTALLING_LICENSE = "installing license"
STRATEGY_STATE_IMPORTING_LOAD = "importing load"
STRATEGY_STATE_UPDATING_PATCHES = "updating patches"
STRATEGY_STATE_FINISHING_PATCH_STRATEGY = "finishing patch strategy"
STRATEGY_STATE_STARTING_UPGRADE = "starting upgrade"
STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE = "transferring CA certificate"
STRATEGY_STATE_LOCKING_CONTROLLER_0 = "locking controller-0"
STRATEGY_STATE_LOCKING_CONTROLLER_1 = "locking controller-1"
STRATEGY_STATE_UPGRADING_SIMPLEX = "upgrading simplex"
STRATEGY_STATE_UPGRADING_DUPLEX = "upgrading duplex"
STRATEGY_STATE_MIGRATING_DATA = "migrating data"
STRATEGY_STATE_UNLOCKING_CONTROLLER_0 = "unlocking controller-0"
STRATEGY_STATE_UNLOCKING_CONTROLLER_1 = "unlocking controller-1"
STRATEGY_STATE_SWACTING_TO_CONTROLLER_0 = "swacting to controller-0"
STRATEGY_STATE_SWACTING_TO_CONTROLLER_1 = "swacting to controller-1"
STRATEGY_STATE_ACTIVATING_UPGRADE = "activating upgrade"
STRATEGY_STATE_COMPLETING_UPGRADE = "completing upgrade"
STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY = "creating VIM upgrade strategy"
STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY = "applying VIM upgrade strategy"
STRATEGY_STATE_DELETING_LOAD = "deleting load"
# Software orchestration states
STRATEGY_STATE_SW_PRE_CHECK = "software pre check"
@ -314,15 +293,6 @@ BACKUP_STATE_UNKNOWN = 'unknown'
BACKUP_STATE_COMPLETE_LOCAL = 'complete-local'
BACKUP_STATE_COMPLETE_CENTRAL = 'complete-central'
# Upgrades States
UPGRADE_STATE_DATA_MIGRATION = 'data-migration'
UPGRADE_STATE_DATA_MIGRATION_COMPLETE = 'data-migration-complete'
UPGRADE_STATE_DATA_MIGRATION_FAILED = 'data-migration-failed'
UPGRADE_STATE_UPGRADING_CONTROLLERS = 'upgrading-controllers'
UPGRADE_STATE_UPGRADING_HOSTS = 'upgrading-hosts'
UPGRADE_STATE_ACTIVATION_FAILED = 'activation-failed'
UPGRADE_STATE_ACTIVATION_COMPLETE = 'activation-complete'
# Prestage States
PRESTAGE_STATE_PACKAGES = STRATEGY_STATE_PRESTAGE_PACKAGES
PRESTAGE_STATE_IMAGES = STRATEGY_STATE_PRESTAGE_IMAGES

View File

@ -37,7 +37,7 @@ class SoftwareOrchThread(OrchThread):
database as it goes, with state and progress information.
"""
# every state in sw upgrade orchestration should have an operator
# every state in sw deploy orchestration should have an operator
STATE_OPERATORS = {
consts.STRATEGY_STATE_SW_PRE_CHECK: PreCheckState,
consts.STRATEGY_STATE_SW_INSTALL_LICENSE: InstallLicenseState,
@ -51,7 +51,7 @@ class SoftwareOrchThread(OrchThread):
strategy_lock,
audit_rpc_client,
consts.SW_UPDATE_TYPE_SOFTWARE, # software update strategy type
vim.STRATEGY_NAME_SW_UPGRADE, # strategy type used by vim
vim.STRATEGY_NAME_SW_USM, # strategy type used by vim
consts.STRATEGY_STATE_SW_PRE_CHECK) # starting state
# Initialize shared cache instances for the states that require them

View File

@ -1,77 +0,0 @@
#
# Copyright (c) 2020-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
# Max time: 10 minutes = 60 queries x 10 seconds
DEFAULT_MAX_QUERIES = 60
DEFAULT_SLEEP_DURATION = 10
class LockHostState(BaseState):
"""Orchestration state for locking a host"""
def __init__(self, next_state, region_name, hostname):
super(LockHostState, self).__init__(
next_state=next_state, region_name=region_name)
self.target_hostname = hostname
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
def perform_state_action(self, strategy_step):
"""Locks a host on the subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
# Create a sysinv client on the subcloud
sysinv_client = self.get_sysinv_client(strategy_step.subcloud.region_name)
host = sysinv_client.get_host(self.target_hostname)
# if the host is already in the desired state, no need for action
if host.administrative == consts.ADMIN_LOCKED:
msg = "Host: %s already: %s." % (self.target_hostname,
host.administrative)
self.info_log(strategy_step, msg)
return self.next_state
# Invoke the action
# ihost_action is 'lock' and task is set to 'Locking'
response = sysinv_client.lock_host(host.id)
if (response.ihost_action != 'lock' or response.task != 'Locking'):
raise Exception("Unable to lock host %s" % self.target_hostname)
# this action is asynchronous, query until it completes or times out
counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
# query the administrative state to see if it is the new state.
host = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_host(self.target_hostname)
if host.administrative == consts.ADMIN_LOCKED:
msg = "Host: %s is now: %s" % (self.target_hostname,
host.administrative)
self.info_log(strategy_step, msg)
break
counter += 1
if counter >= self.max_queries:
raise Exception("Timeout waiting for lock to complete. "
"Please check sysinv.log on the subcloud "
"for details.")
time.sleep(self.sleep_duration)
# If we are here, the loop broke out cleanly and the action succeeded
# When we return from this method without throwing an exception, the
# state machine can proceed to the next state
return self.next_state

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2023 Wind River Systems, Inc.
# Copyright (c) 2023-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -30,7 +30,7 @@ class SharedClientCache(object):
:type cache_type: str
:param cache_specification: specifications on how the cache should
operate
:type cache_specification: dcmanager.orchestrator.states.upgrade.cache
:type cache_specification: dcmanager.orchestrator.states.software.cache
.cache_specifications.CacheSpecification
"""
self._client_lock = lockutils.ReaderWriterLock()

View File

@ -1,109 +0,0 @@
#
# Copyright (c) 2020-2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
# When a swact occurs, services become unavailable and API calls may fail.
# The max time allowed here is 30 minutes (ie: 180 queries with 10 secs sleep)
DEFAULT_MAX_FAILED_QUERIES = 120
DEFAULT_FAILED_SLEEP = 10
# Max time: 10 minutes = 60 queries x 10 seconds
DEFAULT_MAX_QUERIES = 60
DEFAULT_SLEEP_DURATION = 10
# After a swact, there is a sleep before proceeding to the next state
# Added another minute to ensure controller is stable
DEFAULT_SWACT_SLEEP = 180
class SwactHostState(BaseState):
"""Orchestration state for host swact"""
def __init__(self, next_state, region_name, active, standby):
super(SwactHostState, self).__init__(
next_state=next_state, region_name=region_name)
self.active = active
self.standby = standby
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
def perform_state_action(self, strategy_step):
"""Swact host on the subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
region = self.get_region_name(strategy_step)
active_host = self.get_sysinv_client(region).get_host(self.active)
standby_host = self.get_sysinv_client(region).get_host(self.standby)
# if the desired active host is already the Active Controller, no need for
# action
if active_host.capabilities.get('Personality') == \
consts.PERSONALITY_CONTROLLER_ACTIVE:
msg = "Host: %s already the active controller." % (self.active)
self.info_log(strategy_step, msg)
return self.next_state
# Perform swact action
response = self.get_sysinv_client(region).swact_host(standby_host.id)
if response.task != 'Swacting':
raise Exception("Unable to swact to host %s" % self.active)
# Allow separate durations for failures and api retries
fail_counter = 0
api_counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
try:
# query the administrative state to see if it is the new state.
host = self.get_sysinv_client(region).get_host(self.active)
if host.capabilities.get('Personality') == \
consts.PERSONALITY_CONTROLLER_ACTIVE:
msg = "Host: %s is now the active controller." % (self.active)
self.info_log(strategy_step, msg)
break
fail_counter = 0
except Exception:
# Handle other exceptions due to being unreachable
# for a significant period of time when there is a
# controller swact
fail_counter += 1
if fail_counter >= self.max_failed_queries:
raise Exception("Timeout waiting for swact to complete")
time.sleep(self.failed_sleep_duration)
# skip the api_counter
continue
api_counter += 1
if api_counter >= self.max_queries:
raise Exception("Timeout waiting for swact to complete. "
"Please check sysinv.log on the subcloud "
"for details.")
time.sleep(self.sleep_duration)
# If we are here, the loop broke out cleanly and the action succeeded
# When we return from this method without throwing an exception, the
# state machine can proceed to the next state
# Adding a 3 minute delay (DEFAULT_SWACT_SLEEP) before moving to the
# next state
self.info_log(strategy_step,
"Waiting %s seconds before proceeding"
% DEFAULT_SWACT_SLEEP)
time.sleep(DEFAULT_SWACT_SLEEP)
return self.next_state

View File

@ -1,154 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
import retrying
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
# When an unlock occurs, a reboot is triggered. During reboot, API calls fail.
# The max time allowed here is 30 minutes (ie: 30 queries with 1 minute sleep)
DEFAULT_MAX_FAILED_QUERIES = 30
DEFAULT_FAILED_SLEEP = 60
# Before and after reboot, the unlock needs to prepare for shutdown and
# do post-reboot activities during which time the API will succeed, but the
# expected states will not yet be set.
# The max time allowed here is 30 minutes (ie: 30 queries with 1 minute sleep)
DEFAULT_MAX_API_QUERIES = 30
DEFAULT_API_SLEEP = 60
# The unlock command sometime fails, for instance, on SR-IOV VF changes,
# sometimes, the runtime manifest takes a while to apply the changes while
# locked
# The unlock shall retry up to 20 minutes (ie: 10 retries with 2 minutes sleep)
DEFAULT_MAX_UNLOCK_RETRIES = 10
DEFAULT_UNLOCK_SLEEP = 120
class UnlockHostState(BaseState):
"""Orchestration state for unlocking a host."""
def __init__(self, next_state, region_name, hostname):
super(UnlockHostState, self).__init__(
next_state=next_state, region_name=region_name)
self.target_hostname = hostname
self.max_api_queries = DEFAULT_MAX_API_QUERIES
self.api_sleep_duration = DEFAULT_API_SLEEP
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
self.max_unlock_retries = DEFAULT_MAX_UNLOCK_RETRIES
self.unlock_sleep_duration = DEFAULT_UNLOCK_SLEEP
def check_host_ready(self, host):
"""Returns True if host is unlocked, enabled and available."""
return (host.administrative == consts.ADMIN_UNLOCKED and
host.operational == consts.OPERATIONAL_ENABLED and
host.availability == consts.AVAILABILITY_AVAILABLE)
def perform_state_action(self, strategy_step):
"""Unlocks a host on the subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
# Retrieve host from sysinv client on the subcloud
host = self._get_host_with_retry(strategy_step.subcloud.region_name)
# if the host is already in the desired state, no need for action
if self.check_host_ready(host):
msg = "Host: %s is already: %s %s %s" % (self.target_hostname,
host.administrative,
host.operational,
host.availability)
self.info_log(strategy_step, msg)
return self.next_state
# Invoke the action
# ihost_action is 'unlock' and task is set to 'Unlocking'
# handle possible unlock failures that can occur in corner cases
unlock_counter = 0
# For simplex subcloud upgrade, the host unlock is already done
# in data migration step. If it gets here, the host is still
# in degraded state, skip the unlock and proceed to the wait loop
# below.
if host.administrative != consts.ADMIN_UNLOCKED:
while True:
try:
response = self.get_sysinv_client(
strategy_step.subcloud.region_name).unlock_host(host.id)
if (response.ihost_action != 'unlock' or
response.task != 'Unlocking'):
raise Exception("Unable to unlock host %s"
% self.target_hostname)
break
except Exception as e:
if unlock_counter >= self.max_unlock_retries:
raise
unlock_counter += 1
self.error_log(strategy_step, str(e))
time.sleep(self.unlock_sleep_duration)
# unlock triggers a reboot.
# must ignore certain errors until the system completes the reboot
# or a timeout occurs
# Allow separate durations for failures (ie: reboot) and api retries
api_counter = 0
fail_counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
try:
# query the administrative state to see if it is the new state.
host = self.get_sysinv_client(
strategy_step.subcloud.region_name
).get_host(self.target_hostname)
if self.check_host_ready(host):
# Success. Break out of the loop.
msg = "Host: %s is now: %s %s %s" % (self.target_hostname,
host.administrative,
host.operational,
host.availability)
self.info_log(strategy_step, msg)
break
# no exception was raised so reset fail checks
fail_counter = 0
except Exception:
# Handle other exceptions due to being unreachable
# for a significant period of time when there is a
# controller swact, or in the case of AIO-SX,
# when the controller reboots.
fail_counter += 1
if fail_counter >= self.max_failed_queries:
raise Exception("Timeout waiting for reboot to complete")
time.sleep(self.failed_sleep_duration)
# skip the api_counter
continue
# If the max counter is exceeeded, raise a timeout exception
api_counter += 1
if api_counter >= self.max_api_queries:
raise Exception("Timeout waiting for unlock to complete")
time.sleep(self.api_sleep_duration)
# If we are here, the loop broke out cleanly and the action succeeded
# When we return from this method without throwing an exception, the
# state machine can proceed to the next state
return self.next_state
@retrying.retry(stop_max_attempt_number=consts.PLATFORM_RETRY_MAX_ATTEMPTS,
wait_fixed=consts.PLATFORM_RETRY_SLEEP_MILLIS)
def _get_host_with_retry(self, subcloud_name):
return self.get_sysinv_client(subcloud_name).get_host(self.target_hostname)

View File

@ -1,167 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
ACTIVATING_COMPLETED_STATES = ['activation-complete',
'aborting']
ACTIVATING_RETRY_STATES = ['activation-failed', ]
ACTIVATING_IN_PROGRESS_STATES = ['activating', 'activating-hosts', ]
# Max time: 60 minutes = 60 queries x 60 seconds sleep between queries
DEFAULT_MAX_QUERIES = 60
DEFAULT_SLEEP_DURATION = 60
MAX_FAILED_RETRIES = 3
class ActivatingUpgradeState(BaseState):
"""Upgrade state actions for activating an upgrade"""
def __init__(self, region_name):
super(ActivatingUpgradeState, self).__init__(
next_state=consts.STRATEGY_STATE_COMPLETING_UPGRADE,
region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
self.max_failed_retries = MAX_FAILED_RETRIES
def get_upgrade_state(self, strategy_step):
try:
upgrades = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_upgrades()
except Exception as exception:
self.warn_log(strategy_step,
"Encountered exception: %s, "
"retry upgrade activation for subcloud %s."
% (str(exception), strategy_step.subcloud.name))
return ACTIVATING_RETRY_STATES[0]
if len(upgrades) == 0:
raise Exception("No upgrades were found to activate")
# The list of upgrades will never contain more than one entry.
return upgrades[0].state
def perform_state_action(self, strategy_step):
"""Activate an upgrade on a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
try:
upgrade_state = self.get_upgrade_state(strategy_step)
except Exception as ex:
self.info_log(strategy_step, "%s for %s."
% (str(ex), strategy_step.subcloud.name))
return self.next_state
# Check if an existing upgrade is already activated
if upgrade_state in ACTIVATING_COMPLETED_STATES:
self.info_log(strategy_step,
"Already in an activating state:%s" % upgrade_state)
return self.next_state
# Need to loop
# - attempt an initial activate one or more times
# - loop until state changed to a activating completed state
# - re-attempt activate if activation fails
audit_counter = 0
activate_retry_counter = 0
first_activate = True
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
# if max retries have occurred, fail the state
if activate_retry_counter >= self.max_failed_retries:
error_msg = utils.get_failure_msg(strategy_step.subcloud.region_name)
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=error_msg[0:consts.ERROR_DESCRIPTION_LENGTH])
details = ("Failed to activate upgrade. Please check "
"sysinv.log on the subcloud or "
"%s on central for details." %
(consts.ERROR_DESC_CMD))
raise Exception(details)
# We may need multiple attempts to issue the first activate
# if keystone is down, impacting the ability to send the activate
if first_activate:
# invoke the API 'upgrade-activate'.
# Normally only auth failures deserve retry
# (no upgrade found, bad host state, auth)
try:
self.get_sysinv_client(
strategy_step.subcloud.region_name).upgrade_activate()
first_activate = False # clear first activation flag
activate_retry_counter = 0 # reset activation retries
except Exception as exception:
# increment the retry counter on failure
activate_retry_counter += 1
self.warn_log(strategy_step,
"Encountered exception: %s, "
"retry upgrade activation for subcloud %s."
% (str(exception),
strategy_step.subcloud.name))
# cannot flow into the remaining code. sleep / continue
time.sleep(self.sleep_duration)
continue
upgrade_state = self.get_upgrade_state(strategy_step)
if upgrade_state in ACTIVATING_RETRY_STATES:
# We failed. Better try again
time.sleep(self.sleep_duration * activate_retry_counter)
activate_retry_counter += 1
self.info_log(strategy_step,
"Activation failed, retrying... State=%s"
% upgrade_state)
try:
self.get_sysinv_client(
strategy_step.subcloud.region_name).upgrade_activate()
except Exception as exception:
self.warn_log(strategy_step,
"Encountered exception: %s, "
"retry upgrade activation for subcloud %s."
% (str(exception),
strategy_step.subcloud.name))
elif upgrade_state in ACTIVATING_IN_PROGRESS_STATES:
self.info_log(strategy_step,
"Activation in progress, waiting... State=%s"
% upgrade_state)
elif upgrade_state in ACTIVATING_COMPLETED_STATES:
self.info_log(strategy_step,
"Activation completed. State=%s"
% upgrade_state)
break
audit_counter += 1
if audit_counter >= self.max_queries:
error_msg = utils.get_failure_msg(strategy_step.subcloud.region_name)
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=error_msg[0:consts.ERROR_DESCRIPTION_LENGTH])
details = ("Timeout waiting for activation to complete. "
"Please check sysinv.log on the subcloud or "
"%s on central for details." %
(consts.ERROR_DESC_CMD))
raise Exception(details)
time.sleep(self.sleep_duration)
# When we return from this method without throwing an exception, the
# state machine can proceed to the next state
db_api.subcloud_update(self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_UPGRADE_ACTIVATED)
return self.next_state

View File

@ -1,19 +0,0 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states.applying_vim_strategy \
import ApplyingVIMStrategyState
class ApplyingVIMUpgradeStrategyState(ApplyingVIMStrategyState):
"""State for applying the VIM upgrade strategy."""
def __init__(self, region_name):
super(ApplyingVIMUpgradeStrategyState, self).__init__(
next_state=consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0,
region_name=region_name,
strategy_name=vim.STRATEGY_NAME_SW_UPGRADE)

View File

@ -1,98 +0,0 @@
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.upgrade.cache import clients
from dcmanager.orchestrator.states.upgrade.cache.clients import \
CLIENT_READ_EXCEPTIONS
from dcmanager.orchestrator.states.upgrade.cache.clients import \
CLIENT_READ_MAX_ATTEMPTS
from dcmanager.orchestrator.states.upgrade.cache.clients import \
CLIENT_READ_TIMEOUT_SECONDS
class CacheSpecification(object):
def __init__(self, fetch_implementation,
post_filter_implementation=None, valid_filters=frozenset(),
retry_on_exception=CLIENT_READ_EXCEPTIONS,
max_attempts=CLIENT_READ_MAX_ATTEMPTS,
retry_sleep_msecs=consts.PLATFORM_RETRY_SLEEP_MILLIS):
"""Create cache specification.
:param fetch_implementation: implementation on how to retrieve data from
client
:type fetch_implementation: function
:param post_filter_implementation: implementation on how to post-filter
cached data, if any
:type post_filter_implementation: function
:param valid_filters: valid post-filter parameters
:type valid_filters: set
:param retry_on_exception: exceptions to be retried on client read
:type retry_on_exception: type|tuple
:param max_attempts: Maximum number of client read attempts if retryable
exceptions occur
:param retry_sleep_msecs: Fixed backoff interval
"""
self.fetch_implementation = fetch_implementation
self.post_filter_implementation = post_filter_implementation
self.valid_filters = valid_filters
# Retry configurations
self.retry_on_exception = retry_on_exception
self.max_attempts = max_attempts
self.retry_sleep_msecs = retry_sleep_msecs
"""Cache types"""
REGION_ONE_LICENSE_CACHE_TYPE = 'RegionOne system license'
REGION_ONE_SYSTEM_INFO_CACHE_TYPE = 'RegionOne system info'
REGION_ONE_SYSTEM_LOAD_CACHE_TYPE = 'RegionOne system load'
REGION_ONE_PATCHING_CACHE_TYPE = 'RegionOne patching'
"""Cache specifications"""
REGION_ONE_LICENSE_CACHE_SPECIFICATION = CacheSpecification(
lambda: clients.get_sysinv_client().get_license())
REGION_ONE_SYSTEM_INFO_CACHE_SPECIFICATION = CacheSpecification(
lambda: clients.get_sysinv_client().get_system())
REGION_ONE_SYSTEM_LOAD_CACHE_SPECIFICATION = CacheSpecification(
lambda: clients.get_sysinv_client().get_loads())
REGION_ONE_PATCHING_CACHE_SPECIFICATION = CacheSpecification(
lambda: clients.get_patching_client().query(timeout=CLIENT_READ_TIMEOUT_SECONDS),
# Filter results by patching state, if any is given
lambda patches, **filter_params: {
patch_id: patch for patch_id, patch in patches.items()
if filter_params.get('state') is None
or patch.get('repostate') == filter_params.get('state')
},
{'state'}
)
# Map each expected operation type to its required cache types
CACHE_TYPES_BY_OPERATION_TYPE = {
consts.SW_UPDATE_TYPE_UPGRADE: {REGION_ONE_LICENSE_CACHE_TYPE,
REGION_ONE_SYSTEM_INFO_CACHE_TYPE,
REGION_ONE_SYSTEM_LOAD_CACHE_TYPE,
REGION_ONE_PATCHING_CACHE_TYPE}
}
# Map each cache type to its corresponding cache specification
SPECIFICATION_BY_CACHE_TYPE = {
REGION_ONE_LICENSE_CACHE_TYPE: REGION_ONE_LICENSE_CACHE_SPECIFICATION,
REGION_ONE_SYSTEM_INFO_CACHE_TYPE: REGION_ONE_SYSTEM_INFO_CACHE_SPECIFICATION,
REGION_ONE_SYSTEM_LOAD_CACHE_TYPE: REGION_ONE_SYSTEM_LOAD_CACHE_SPECIFICATION,
REGION_ONE_PATCHING_CACHE_TYPE: REGION_ONE_PATCHING_CACHE_SPECIFICATION
}
def get_specifications_for_operation(operation_type):
# Retrieve all cache specifications required by a given operation type
# Return a mapping between each required type to its corresponding specification
return {cache_type: SPECIFICATION_BY_CACHE_TYPE.get(cache_type)
for cache_type in CACHE_TYPES_BY_OPERATION_TYPE.get(operation_type)}

View File

@ -1,56 +0,0 @@
#
# Copyright (c) 2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import socket
from keystoneauth1 import exceptions as keystone_exceptions
from oslo_log import log as logging
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.patching_v1 import PatchingClient
from dccommon.drivers.openstack.sdk_platform import (
OptimizedOpenStackDriver as OpenStackDriver
)
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dcmanager.common import utils
LOG = logging.getLogger(__name__)
""" Default timeout configurations for client reads """
CLIENT_READ_TIMEOUT_SECONDS = 60
CLIENT_READ_EXCEPTIONS = (socket.timeout, keystone_exceptions.ServiceUnavailable)
CLIENT_READ_MAX_ATTEMPTS = 2
""" Helper functions to retrieve clients for caching """
def get_sysinv_client():
ks_client = get_keystone_client()
return SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, ks_client.session,
endpoint=ks_client.endpoint_cache.get_endpoint('sysinv'),
timeout=CLIENT_READ_TIMEOUT_SECONDS)
def get_patching_client():
ks_client = get_keystone_client()
return PatchingClient(dccommon_consts.DEFAULT_REGION_NAME, ks_client.session,
endpoint=ks_client.endpoint_cache.get_endpoint('patching'))
def get_keystone_client(region_name=dccommon_consts.DEFAULT_REGION_NAME):
"""Construct a (cached) keystone client (and token)"""
try:
os_client = OpenStackDriver(
region_name=region_name,
region_clients=["sysinv"],
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips,
)
return os_client.keystone_client
except Exception:
LOG.warning('Failure initializing KeystoneClient for region: %s'
% region_name)
raise

View File

@ -1,39 +0,0 @@
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_log import log
from dcmanager.common.exceptions import InvalidParameterValue
from dcmanager.orchestrator.states.upgrade.cache import cache_specifications
from dcmanager.orchestrator.states.upgrade.cache.shared_client_cache import \
SharedClientCache
LOG = log.getLogger(__name__)
class SharedCacheRepository(object):
def __init__(self, operation_type):
self._shared_caches = {}
self._operation_type = operation_type
def initialize_caches(self):
# Retrieve specifications for each cache type required by the operation
# Return mapping between each required type to a single cache instance of it
self._shared_caches = {
cache_type: SharedClientCache(cache_type, cache_specification)
for cache_type, cache_specification in
cache_specifications.get_specifications_for_operation(
self._operation_type).items()
}
def read(self, cache_type, **filter_params):
cache = self._shared_caches.get(cache_type)
if cache:
return cache.read(**filter_params)
else:
raise InvalidParameterValue(err="Specified cache type '%s' not "
"present" % cache_type)

View File

@ -1,125 +0,0 @@
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import retrying
from oslo_concurrency import lockutils
from oslo_log import log
from dcmanager.common.exceptions import InvalidParameterValue
LOG = log.getLogger(__name__)
class SharedClientCache(object):
"""Data cache for sharing client or API data between concurrent threads
Used to avoid repeated requests and prevent client overload.
Cache is not self refreshing. User of the cache is responsible for triggering
the refresh.
"""
def __init__(self, cache_type, cache_specification):
"""Create cache instance.
:param cache_type: type of data being cached, for logging
:type cache_type: str
:param cache_specification: specifications on how the cache should
operate
:type cache_specification: dcmanager.orchestrator.states.upgrade.cache
.cache_specifications.CacheSpecification
"""
self._client_lock = lockutils.ReaderWriterLock()
self._cache = None
# Cache configurations
self._cache_type = cache_type
self._valid_filters = cache_specification.valid_filters
# Retry configurations
self._max_attempts = cache_specification.max_attempts
self._retry_sleep_msecs = cache_specification.retry_sleep_msecs
# Add retry to client read if any retryable exception is provided
self._load_data_from_client = cache_specification.fetch_implementation
retry_on_exception = cache_specification.retry_on_exception
if retry_on_exception:
retry = retrying.retry(retry_on_exception=lambda
ex: isinstance(ex, retry_on_exception),
stop_max_attempt_number=self._max_attempts,
wait_fixed=self._retry_sleep_msecs,
wait_func=self._retry_client_read)
self._load_data_from_client = \
retry(cache_specification.fetch_implementation)
# Use default implementation with no filtering if none is provided
self._post_filter_impl = cache_specification.post_filter_implementation\
or (lambda data, **filter_params: data)
def read(self, **filter_params):
"""Retrieve data from cache, if available.
Read from client and (re)populate cache, if not.
Only one thread may access the client at a time to prevent overload.
Concurrent reads are blocked until client read completes/fails. A recheck
for updates on the cache is performed afterwards.
Post-filtering can be applied to the results before returning. Data saved to
the cache will not include any filtering applied to returned data.
:param filter_params: parameters to be used for post-filtering
:type filter_params: string
:return: Cached data, filtered according to parameters given
:raises RuntimeError: If cache read fails due to concurrent client read error
:raises InvalidParameterError: If invalid filter parameters are given
"""
# Use data stored in the cache, if present. Otherwise, read and cache
# data from client
if self._cache is None:
self._cache_data_from_client()
# Filter cached data and return results
return self._post_filter(self._cache, **filter_params)
def _cache_data_from_client(self):
# Read from the client and update cache if no concurrent write is in progress
if self._client_lock.owner != lockutils.ReaderWriterLock.WRITER:
with self._client_lock.write_lock():
# Atomically fetch data from client and update the cache
LOG.info("Reading data from %s client for caching" %
self._cache_type)
self._cache = self._load_data_from_client()
else:
# If a concurrent write is in progress, wait for it and recheck cache
with self._client_lock.read_lock():
if self._cache is None:
raise RuntimeError("Failed to retrieve data from %s cache. "
"Possible failure on concurrent client "
"read." % self._cache_type)
def _retry_client_read(self, attempt, _):
# To be called when a client read operation fails with a retryable error
# After this, read operation should be retried
LOG.warn("Retryable error occurred while reading from %s client "
"(Attempt %s/%s)" % (self._cache_type, attempt, self._max_attempts))
return self._retry_sleep_msecs
def _post_filter(self, data, **filter_params):
# Validate the parameters and apply specified filter implementation
self._validate_filter_params(**filter_params)
return self._post_filter_impl(data, **filter_params)
def _validate_filter_params(self, **filter_params):
# Compare each passed parameter against the specified valid parameters
# Raise an exception if any unexpected parameter is found
if filter_params:
invalid_params = set(filter_params.keys()) - self._valid_filters
if invalid_params:
raise InvalidParameterValue(err="Invalid filter parameters: %s" %
invalid_params)

View File

@ -1,133 +0,0 @@
#
# Copyright (c) 2020-2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
import retrying
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
# Max time: 10 minutes = 60 queries x 10 seconds between each query
DEFAULT_MAX_QUERIES = 60
DEFAULT_SLEEP_DURATION = 10
class CompletingUpgradeState(BaseState):
"""Upgrade state actions for completing an upgrade"""
def __init__(self, region_name):
super(CompletingUpgradeState, self).__init__(
next_state=consts.STRATEGY_STATE_DELETING_LOAD, region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
@retrying.retry(stop_max_attempt_number=consts.PLATFORM_RETRY_MAX_ATTEMPTS,
wait_fixed=consts.PLATFORM_RETRY_SLEEP_MILLIS)
def _get_software_version(self, strategy_step):
"""Internal utility method to query software version from a subcloud
This method is 'retry' wrapped to attempt multiple times with a
small wait period between attempts if any exception is raised
"""
region = self.get_region_name(strategy_step)
return self.get_sysinv_client(region).get_system().software_version
@retrying.retry(stop_max_attempt_number=consts.PLATFORM_RETRY_MAX_ATTEMPTS,
wait_fixed=consts.PLATFORM_RETRY_SLEEP_MILLIS)
def _get_upgrades(self, strategy_step):
"""Internal utility method to query a subcloud for its upgrades
This method is 'retry' wrapped to attempt multiple times with a
small wait period between attempts if any exception is raised
"""
region = self.get_region_name(strategy_step)
return self.get_sysinv_client(region).get_upgrades()
@retrying.retry(stop_max_attempt_number=consts.PLATFORM_RETRY_MAX_ATTEMPTS,
wait_fixed=consts.PLATFORM_RETRY_SLEEP_MILLIS)
def _upgrade_complete(self, strategy_step):
"""Internal utility method to complete an upgrade in a subcloud
This method is 'retry' wrapped to attempt multiple times with a
small wait period between attempts if any exception is raised
returns None
"""
region = self.get_region_name(strategy_step)
return self.get_sysinv_client(region).upgrade_complete()
def finalize_upgrade(self, strategy_step):
software_version = self._get_software_version(strategy_step)
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_DONE,
software_version=software_version)
return self.next_state
# todo(abailey): determine if service restarts can be made predictable
# todo(abailey): other states should have similar retry decorators and
# this may also be reasonable to add within the client API calls.
def perform_state_action(self, strategy_step):
"""Complete an upgrade on a subcloud
We should never cache the client. re-query it.
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
This state runs during a time when manifests are applying and services
are restarting, and therefore any API call in this method can randomly
fail. To accomodate this, every call is wrapped with retries.
"""
# upgrade-complete causes the upgrade to be deleted.
# if no upgrade exists, there is no need to call it.
# The API should always return a list
upgrades = self._get_upgrades(strategy_step)
if len(upgrades) == 0:
self.info_log(strategy_step,
"No upgrades exist. Nothing needs completing")
return self.finalize_upgrade(strategy_step)
# invoke the API 'upgrade-complete'
# This is a partially blocking call that raises exception on failure.
# We will re-attempt even if that failure is encountered
try:
message = self._upgrade_complete(strategy_step)
except Exception as e:
msg = ("Failed to complete upgrade. %s" %
str(e))
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
raise
# 'completion' deletes the upgrade. Need to loop until it is deleted
counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
upgrades = self._get_upgrades(strategy_step)
if len(upgrades) == 0:
self.info_log(strategy_step, "Upgrade completed.")
break
counter += 1
if counter >= self.max_queries:
msg = ("Timeout waiting for completion to complete: %s:" %
message)
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
raise Exception(msg)
time.sleep(self.sleep_duration)
# When we return from this method without throwing an exception, the
# state machine can proceed to the next state
return self.finalize_upgrade(strategy_step)

View File

@ -1,19 +0,0 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states.creating_vim_strategy \
import CreatingVIMStrategyState
class CreatingVIMUpgradeStrategyState(CreatingVIMStrategyState):
"""State for creating the VIM upgrade strategy."""
def __init__(self, region_name):
super(CreatingVIMUpgradeStrategyState, self).__init__(
next_state=consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY,
region_name=region_name,
strategy_name=vim.STRATEGY_NAME_SW_UPGRADE)

View File

@ -1,71 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
# Max time: 10 minutes = 60 queries x 10 seconds between each query
DEFAULT_MAX_QUERIES = 60
DEFAULT_SLEEP_DURATION = 10
class DeletingLoadState(BaseState):
"""Upgrade state actions for deleting the N load after N+1 upgrade"""
def __init__(self, region_name):
super(DeletingLoadState, self).__init__(
next_state=consts.STRATEGY_STATE_COMPLETE, region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
def perform_state_action(self, strategy_step):
"""Delete the N load on the subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
# get the sysinv client for the subcloud
sysinv_client = self.get_sysinv_client(strategy_step.subcloud.region_name)
current_loads = sysinv_client.get_loads()
load_id = None
for load in current_loads:
if load.state == 'imported':
load_id = load.id
load_version = load.software_version
break
if load_id:
sysinv_client.delete_load(load_id)
counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
# Get a sysinv client each time. It will automatically renew the
# token if it is about to expire.
sysinv_client = \
self.get_sysinv_client(strategy_step.subcloud.region_name)
if len(sysinv_client.get_loads()) == 1:
msg = "Load %s deleted." % load_version
self.info_log(strategy_step, msg)
break
counter += 1
if counter >= self.max_queries:
raise Exception("Timeout waiting for load delete to complete")
time.sleep(self.sleep_duration)
# When we return from this method without throwing an exception, the
# state machine can proceed to the next state
return self.next_state

View File

@ -1,101 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import patching_v1
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
from dcmanager.orchestrator.states.upgrade.cache.cache_specifications import \
REGION_ONE_PATCHING_CACHE_TYPE
# Max time: 30 minutes = 180 queries x 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
# todo(jcasteli): Refactor instead of duplicating code from patch_orch_thread.py
class FinishingPatchStrategyState(BaseState):
"""Upgrade state for finishing patch strategy"""
def __init__(self, region_name):
super(FinishingPatchStrategyState, self).__init__(
next_state=consts.STRATEGY_STATE_STARTING_UPGRADE,
region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
def perform_state_action(self, strategy_step):
"""Clean up patches in this subcloud (commit, delete).
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
self.info_log(strategy_step, "Finishing patch strategy")
if strategy_step.subcloud_id is None:
# This is the SystemController. No cleanup is required.
self.info_log(strategy_step, "Skipping finish for SystemController")
return self.next_state
regionone_committed_patches = self._read_from_cache(
REGION_ONE_PATCHING_CACHE_TYPE,
state=patching_v1.PATCH_STATE_COMMITTED
)
self.debug_log(
strategy_step,
"regionone_committed_patches: %s" % regionone_committed_patches)
committed_patch_ids = list()
for patch_id in regionone_committed_patches.keys():
committed_patch_ids.append(patch_id)
self.debug_log(strategy_step,
"RegionOne committed_patch_ids: %s" % committed_patch_ids)
subcloud_patches = self.get_patching_client(
strategy_step.subcloud.region_name).query()
self.debug_log(strategy_step,
"Patches for subcloud: %s" % subcloud_patches)
patches_to_commit = list()
patches_to_delete = list()
# For this subcloud, determine which patches should be committed and
# which should be deleted. We check the patchstate here because
# patches cannot be deleted or committed if they are in a partial
# state (e.g. Partial-Apply or Partial-Remove).
subcloud_patch_ids = list(subcloud_patches.keys())
for patch_id in subcloud_patch_ids:
if subcloud_patches[patch_id]['patchstate'] == \
patching_v1.PATCH_STATE_AVAILABLE:
self.info_log(strategy_step,
"Patch %s will be deleted from subcloud" % patch_id)
patches_to_delete.append(patch_id)
elif subcloud_patches[patch_id]['patchstate'] == \
patching_v1.PATCH_STATE_APPLIED:
if patch_id in committed_patch_ids:
self.info_log(
strategy_step,
"Patch %s will be committed in subcloud" % patch_id)
patches_to_commit.append(patch_id)
if patches_to_delete:
self.info_log(strategy_step, "Deleting patches %s" % patches_to_delete)
self.get_patching_client(
strategy_step.subcloud.name).delete(patches_to_delete)
if self.stopped():
raise StrategyStoppedException()
if patches_to_commit:
self.info_log(strategy_step,
"Committing patches %s in subcloud" % patches_to_commit)
self.get_patching_client(
strategy_step.subcloud.region_name).commit(patches_to_commit)
return self.next_state

View File

@ -1,263 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dccommon.exceptions import LoadMaxReached
from dcmanager.common import consts
from dcmanager.common import utils
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.common.exceptions import VaultLoadMissingError
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
from dcmanager.orchestrator.states.upgrade.cache.cache_specifications import \
REGION_ONE_SYSTEM_INFO_CACHE_TYPE
from dcmanager.orchestrator.states.upgrade.cache.cache_specifications import \
REGION_ONE_SYSTEM_LOAD_CACHE_TYPE
# Max time: 30 minutes = 180 queries x 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
MAX_FAILED_RETRIES = 5
LOAD_IMPORT_REQUEST_TYPE = 'import'
LOAD_DELETE_REQUEST_TYPE = 'delete'
class ImportingLoadState(BaseState):
"""Upgrade state for importing a load"""
def __init__(self, region_name):
super(ImportingLoadState, self).__init__(
next_state=consts.STRATEGY_STATE_UPDATING_PATCHES,
region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
self.max_load_import_retries = MAX_FAILED_RETRIES
def get_load(self, strategy_step, request_info):
self.info_log(strategy_step, "Checking load state...")
load_id = request_info.get('load_id')
load_version = request_info.get('load_version')
request_type = request_info.get('type')
load = None
try:
if request_type == LOAD_DELETE_REQUEST_TYPE:
self.info_log(strategy_step, "Retrieving load list from subcloud...")
# success when only one load, the active load, remains
if len(self.get_sysinv_client(
strategy_step.subcloud.region_name).get_loads()) == 1:
msg = "Load: %s has been removed." % load_version
self.info_log(strategy_step, msg)
return True
else:
load = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_load(load_id)
if load.state == consts.IMPORTED_LOAD_STATE:
# success when load is imported
msg = "Load: %s is now: %s" % (load_version,
load.state)
self.info_log(strategy_step, msg)
return True
except Exception as exception:
self.warn_log(strategy_step,
"Encountered exception: %s, "
"retry load operation %s."
% (str(exception), request_type))
if load and load.state == consts.ERROR_LOAD_STATE:
self.error_log(strategy_step,
"Load %s failed import" % load_version)
raise Exception("Failed to import load. Please check sysinv.log "
"on the subcloud for details.")
# return False to allow for retry if not at limit
return False
def _wait_for_request_to_complete(self, strategy_step, request_info):
counter = 0
request_type = request_info.get('type')
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
# query for load operation success
if self.get_load(strategy_step, request_info):
break
counter += 1
self.debug_log(
strategy_step,
"Waiting for load %s to complete, iter=%d" % (request_type, counter))
if counter >= self.max_queries:
raise Exception("Timeout waiting for %s to complete"
% request_type)
time.sleep(self.sleep_duration)
def _get_subcloud_load_info(self, strategy_step, target_version):
load_info = {}
# Check if the load is already imported by checking the version
current_loads = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_loads()
for load in current_loads:
if load.software_version == target_version:
load_info['load_id'] = load.id
load_info['load_version'] = load.software_version
self.info_log(strategy_step,
"Load:%s already found" % target_version)
return True, load_info
elif load.state == consts.IMPORTED_LOAD_STATE or \
load.state == consts.ERROR_LOAD_STATE:
load_info['load_id'] = load.id
load_info['load_version'] = load.software_version
return False, load_info
def perform_state_action(self, strategy_step):
"""Import a load on a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
# determine the version of the system controller in region one
target_version = self._read_from_cache(REGION_ONE_SYSTEM_INFO_CACHE_TYPE)\
.software_version
load_applied, req_info =\
self._get_subcloud_load_info(strategy_step, target_version)
if load_applied:
return self.next_state
load_id_to_be_deleted = req_info.get('load_id')
if load_id_to_be_deleted is not None:
self.info_log(strategy_step,
"Deleting load %s..." % load_id_to_be_deleted)
self.get_sysinv_client(strategy_step.subcloud.region_name).\
delete_load(load_id_to_be_deleted)
req_info['type'] = LOAD_DELETE_REQUEST_TYPE
self._wait_for_request_to_complete(strategy_step, req_info)
subcloud_type = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_system().system_mode
load_import_retry_counter = 0
load = None
if subcloud_type == consts.SYSTEM_MODE_SIMPLEX:
# For simplex we only import the load record, not the entire ISO
loads = self._read_from_cache(REGION_ONE_SYSTEM_LOAD_CACHE_TYPE)
matches = [
load for load in loads if load.software_version == target_version]
target_load = matches[0].to_dict()
# Send only the required fields
creation_keys = ['software_version',
'compatible_version',
'required_patches']
target_load = {key: target_load[key] for key in creation_keys}
try:
load = self.get_sysinv_client(strategy_step.subcloud.region_name).\
import_load_metadata(target_load)
self.info_log(strategy_step,
"Load: %s is now: %s" % (
load.software_version, load.state))
except Exception as e:
msg = ("Failed to import load metadata. %s" %
str(e))
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
self.error_log(strategy_step, msg)
raise
else:
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
load_import_retry_counter += 1
try:
# ISO and SIG files are found in the vault under a version
# directory
self.info_log(strategy_step, "Getting vault load files...")
iso_path, sig_path = utils.get_vault_load_files(target_version)
if not iso_path:
message = (
"Failed to get upgrade load info for subcloud %s" %
strategy_step.subcloud.name)
raise Exception(message)
# Call the API. import_load blocks until the load state is
# 'importing'
self.info_log(strategy_step, "Sending load import request...")
load = self.get_sysinv_client(
strategy_step.subcloud.region_name
).import_load(iso_path, sig_path)
break
except VaultLoadMissingError:
raise
except LoadMaxReached:
# A prior import request may have encountered an exception but
# the request actually continued with the import operation in the
# subcloud. This has been observed when performing multiple
# parallel upgrade in which resource/link may be saturated.
# In such case allow continue for further checks
# (i.e. at wait_for_request_to_complete)
self.info_log(strategy_step,
"Load at max number of loads")
break
except Exception as e:
self.warn_log(strategy_step,
"load import retry required due to %s iter: %d" %
(e, load_import_retry_counter))
if load_import_retry_counter >= self.max_load_import_retries:
self.error_log(strategy_step, str(e))
raise Exception("Failed to import load. Please check "
"sysinv.log on the subcloud for details.")
time.sleep(self.sleep_duration)
if load is None:
_, load_info = self._get_subcloud_load_info(
strategy_step, target_version)
load_id = load_info.get('load_id')
software_version = load_info['load_version']
else:
load_id = load.id
software_version = load.software_version
if not load_id:
raise Exception("The subcloud load was not found.")
if software_version != target_version:
raise Exception("The imported load was not the expected version.")
try:
self.info_log(
strategy_step,
"Load import request accepted, load software version = %s"
% software_version)
req_info['load_id'] = load_id
req_info['load_version'] = target_version
req_info['type'] = LOAD_IMPORT_REQUEST_TYPE
self.info_log(
strategy_step,
"Waiting for state to change from importing to imported...")
self._wait_for_request_to_complete(strategy_step, req_info)
except Exception as e:
self.error_log(strategy_step, str(e))
raise Exception("Failed to import load. Please check sysinv.log on "
"the subcloud for details.")
# When we return from this method without throwing an exception, the
# state machine can proceed to the next state
return self.next_state

View File

@ -1,103 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon import consts as dccommon_consts
from dcmanager.common import consts
from dcmanager.common import exceptions
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
from dcmanager.orchestrator.states.upgrade.cache.cache_specifications import \
REGION_ONE_LICENSE_CACHE_TYPE
# When a license is not installed, this will be part of the API error string
LICENSE_FILE_NOT_FOUND_SUBSTRING = "License file not found"
class InstallingLicenseState(BaseState):
"""Upgrade state action for installing a license"""
def __init__(self, region_name):
super(InstallingLicenseState, self).__init__(
next_state=consts.STRATEGY_STATE_IMPORTING_LOAD, region_name=region_name)
@staticmethod
def license_up_to_date(target_license, existing_license):
return target_license == existing_license
def perform_state_action(self, strategy_step):
"""Install the License for a software upgrade in this subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
# check if the system controller has a license
system_controller_license = self._read_from_cache(
REGION_ONE_LICENSE_CACHE_TYPE)
# get_license returns a dictionary with keys: content and error
# 'content' can be an empty string in success or failure case.
# 'error' is an empty string only in success case.
target_license = system_controller_license.get('content')
target_error = system_controller_license.get('error')
# If the system controller does not have a license, do not attempt
# to install licenses on subclouds, simply proceed to the next stage
if len(target_error) != 0:
if LICENSE_FILE_NOT_FOUND_SUBSTRING in target_error:
self.info_log(strategy_step,
"System Controller License missing: %s."
% target_error)
return self.next_state
else:
# An unexpected error occurred querying the license
message = (
'An unexpected error occurred querying the license %s. '
'Detail: %s' % (dccommon_consts.SYSTEM_CONTROLLER_NAME,
target_error))
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=message[0:consts.ERROR_DESCRIPTION_LENGTH])
raise exceptions.LicenseInstallError(
subcloud_id=dccommon_consts.SYSTEM_CONTROLLER_NAME,
error_message=target_error)
# retrieve the keystone session for the subcloud and query its license
subcloud_sysinv_client = \
self.get_sysinv_client(strategy_step.subcloud.region_name)
subcloud_license_response = subcloud_sysinv_client.get_license()
subcloud_license = subcloud_license_response.get('content')
subcloud_error = subcloud_license_response.get('error')
# Skip license install if the license is already up to date
# If there was not an error, there might be a license
if len(subcloud_error) == 0:
if self.license_up_to_date(target_license, subcloud_license):
self.info_log(strategy_step, "License up to date.")
return self.next_state
else:
self.debug_log(strategy_step, "License mismatch. Updating.")
else:
self.debug_log(strategy_step, "License missing. Installing.")
# Install the license
install_rc = subcloud_sysinv_client.install_license(target_license)
install_error = install_rc.get('error')
if len(install_error) != 0:
# Save error response from sysinv into subcloud error description.
# Provide exception with sysinv error response to strategy_step details
message = ('Error installing license on subcloud %s. Detail: %s' %
(strategy_step.subcloud.name,
install_error))
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=message[0:consts.ERROR_DESCRIPTION_LENGTH])
raise exceptions.LicenseInstallError(
subcloud_id=strategy_step.subcloud_id,
error_message=install_error)
# The license has been successfully installed. Move to the next stage
self.info_log(strategy_step, "License installed.")
return self.next_state

View File

@ -1,18 +0,0 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.lock_host import LockHostState
class LockDuplexState(LockHostState):
"""Orchestration state for locking controller-1 host"""
def __init__(self, region_name):
super(LockDuplexState, self).__init__(
next_state=consts.STRATEGY_STATE_UPGRADING_DUPLEX,
region_name=region_name,
hostname="controller-1")

View File

@ -1,18 +0,0 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.lock_host import LockHostState
class LockSimplexState(LockHostState):
"""Orchestration state for locking controller-0 host"""
def __init__(self, region_name):
super(LockSimplexState, self).__init__(
next_state=consts.STRATEGY_STATE_UPGRADING_SIMPLEX,
region_name=region_name,
hostname="controller-0",)

View File

@ -1,183 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import os
import time
from dccommon import consts as dccommon_consts
from dccommon.exceptions import PlaybookExecutionFailed
from dccommon.utils import AnsiblePlaybook
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
ANSIBLE_UPGRADE_PLAYBOOK = \
'/usr/share/ansible/stx-ansible/playbooks/upgrade_platform.yml'
# When an unlock occurs, a reboot is triggered. During reboot, API calls fail.
# The max time allowed here is 30 minutes (ie: 30 queries with 1 minute sleep)
DEFAULT_MAX_FAILED_QUERIES = 30
DEFAULT_FAILED_SLEEP = 60
# after reboot, the unlock needs to do post-reboot activities during which
# time the API will succeed, but the expected states will not yet be set.
# The max time allowed here is 30 minutes (ie: 30 queries with 1 minute sleep)
DEFAULT_MAX_API_QUERIES = 30
DEFAULT_API_SLEEP = 60
def migrate_subcloud_data(migrate_command, log_file, subcloud_name):
try:
ansible = AnsiblePlaybook(subcloud_name)
ansible.run_playbook(log_file, migrate_command)
except PlaybookExecutionFailed:
msg_orch = ("Failed to migrate data, check individual "
"log at %s or run %s for details"
% (log_file, consts.ERROR_DESC_CMD))
raise Exception(msg_orch)
class MigratingDataState(BaseState):
"""Upgrade step for migrating data"""
def __init__(self, region_name):
super(MigratingDataState, self).__init__(
next_state=consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_0,
region_name=region_name)
self.max_api_queries = DEFAULT_MAX_API_QUERIES
self.api_sleep_duration = DEFAULT_API_SLEEP
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
def wait_for_unlock(self, strategy_step):
"""This method returns successfully when the unlock completes.
An exception is raised if it does not recover on time.
"""
# This code is 'borrowed' from the unlock_host state
# Allow separate durations for failures (ie: reboot) and api retries
api_counter = 0
fail_counter = 0
# todo(abailey): only supports AIO-SX here
target_hostname = 'controller-0'
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
try:
# query the administrative state to see if it is the new state.
host = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_host(target_hostname)
if (host.administrative == consts.ADMIN_UNLOCKED and
host.operational == consts.OPERATIONAL_ENABLED):
# Success. Break out of the loop.
msg = "Host: %s is now: %s %s" % (target_hostname,
host.administrative,
host.operational)
self.info_log(strategy_step, msg)
break
# no exception was raised so reset fail and auth checks
fail_counter = 0
except Exception:
# Handle other exceptions due to being unreachable
# for a significant period of time when there is a
# controller swact, or in the case of AIO-SX,
# when the controller reboots.
fail_counter += 1
if fail_counter >= self.max_failed_queries:
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_DATA_MIGRATION_FAILED)
raise Exception("Timeout waiting on reboot to complete")
time.sleep(self.failed_sleep_duration)
# skip the api_counter
continue
# If the max counter is exceeeded, raise a timeout exception
api_counter += 1
if api_counter >= self.max_api_queries:
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_DATA_MIGRATION_FAILED)
raise Exception("Timeout waiting for unlock to complete")
time.sleep(self.api_sleep_duration)
def perform_state_action(self, strategy_step):
"""Migrate data for an upgrade on a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
# To account for abrupt termination of dcmanager, check the last known
# subcloud deploy status. If it is migrated/complete, advance to the next
# stage. If it is 'migrating', fail the strategy. The user will need to
# delete the existing strategy, create a new one and apply. Pre-check will
# set the appropriate next step for this subcloud.
subcloud = db_api.subcloud_get(self.context, strategy_step.subcloud.id)
if (subcloud.deploy_status == consts.DEPLOY_STATE_MIGRATED or
subcloud.deploy_status == consts.DEPLOY_STATE_DONE):
return self.next_state
elif subcloud.deploy_status == consts.DEPLOY_STATE_MIGRATING_DATA:
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_DATA_MIGRATION_FAILED)
raise Exception("Previous data migration was abruptly terminated. "
"Please try again with a new upgrade strategy.")
# If it gets here, the subcloud deploy status must be 'installed'.
self.info_log(strategy_step, "Start migrating data...")
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_MIGRATING_DATA)
ansible_subcloud_inventory_file = os.path.join(
dccommon_consts.ANSIBLE_OVERRIDES_PATH,
strategy_step.subcloud.name + consts.INVENTORY_FILE_POSTFIX)
log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \
'_playbook_output.log'
# Send skip_patching=true to prevent the playbook from applying any patches
# present in the upgrade_data. All the required patches will be included in
# the generated install iso.
data_migrating_cmd = [
"ansible-playbook", ANSIBLE_UPGRADE_PLAYBOOK,
"-i", ansible_subcloud_inventory_file, "-e",
"ansible_ssh_pass=%s ansible_become_pass=%s skip_patching=true"
% (consts.TEMP_SYSADMIN_PASSWORD, consts.TEMP_SYSADMIN_PASSWORD)]
try:
migrate_subcloud_data(data_migrating_cmd, log_file,
strategy_step.subcloud.name)
except Exception as e:
# Two error messages: one for subcloud error description and logs and
# one for orchestrator strategy_step detail (shorter than the previous).
msg_subcloud = utils.find_ansible_error_msg(
strategy_step.subcloud.name, log_file,
consts.DEPLOY_STATE_MIGRATING_DATA)
# Get script output in case it is available
error_msg = utils.get_failure_msg(strategy_step.subcloud.region_name)
failure = ('%s \n%s' % (error_msg, msg_subcloud))
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
error_description=failure[0:consts.ERROR_DESCRIPTION_LENGTH])
self.error_log(strategy_step, msg_subcloud)
self.error_log(strategy_step, str(e))
raise
# wait up to 60 minutes for reboot to complete
self.wait_for_unlock(strategy_step)
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_MIGRATED)
self.info_log(strategy_step, "Data migration completed.")
return self.next_state

View File

@ -1,417 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import copy
import re
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.sysinv_v1 import HOST_FS_NAME_SCRATCH
from dcmanager.common import consts
from dcmanager.common.exceptions import ManualRecoveryRequiredException
from dcmanager.common.exceptions import PreCheckFailedException
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
from dcmanager.orchestrator.states.upgrade.cache.cache_specifications import \
REGION_ONE_SYSTEM_INFO_CACHE_TYPE
# These deploy states should transition to the 'upgrading' state
VALID_UPGRADE_STATES = [consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
consts.DEPLOY_STATE_INSTALL_FAILED,
consts.DEPLOY_STATE_DATA_MIGRATION_FAILED, ]
# These deploy states should transition to the 'migrating_data' state
VALID_MIGRATE_DATA_STATES = [consts.DEPLOY_STATE_INSTALLED, ]
# These deploy states should transition to the 'activating_upgrade' state
VALID_ACTIVATION_STATES = [consts.DEPLOY_STATE_MIGRATED, ]
MIN_SCRATCH_SIZE_REQUIRED_GB = 16
UPGRADE_IN_PROGRESS_ALARM = '900.005'
HOST_ADMINISTRATIVELY_LOCKED_ALARM = '200.001'
ALARM_IGNORE_LIST = [UPGRADE_IN_PROGRESS_ALARM, ]
class PreCheckState(BaseState):
"""This State performs entry checks and skips to the appropriate state"""
def __init__(self, region_name):
super(PreCheckState, self).__init__(
next_state=consts.STRATEGY_STATE_INSTALLING_LICENSE,
region_name=region_name)
def _check_health(self, strategy_step, subcloud_sysinv_client,
subcloud_fm_client, host, upgrades):
# Check system upgrade health
#
# Sample output #1
# ================
# Some non-management affecting alarms, all other checks passed
#
# System Health:
# All hosts are provisioned: [OK]
# All hosts are unlocked/enabled: [OK]
# All hosts have current configurations: [OK]
# All hosts are patch current: [OK]
# Ceph Storage Healthy: [OK]
# No alarms: [Fail]
# [1] alarms found, [0] of which are management affecting
# All kubernetes nodes are ready: [OK]
# All kubernetes control plane pods are ready: [OK]
# Active kubernetes version is the latest supported version: [OK]
# No imported load found. Unable to test further
#
# Sample output #2
# ================
# Multiple failed checks, management affecting alarms
#
# System Health:
# All hosts are provisioned: [OK]
# All hosts are unlocked/enabled: [OK]
# All hosts have current configurations: [OK]
# All hosts are patch current: [OK]
# Ceph Storage Healthy: [Fail]
# No alarms: [Fail]
# [7] alarms found, [2] of which are management affecting
# All kubernetes nodes are ready: [OK]
# All kubernetes control plane pods are ready: [OK]
# Active kubernetes version is the latest supported version: [OK]
# No imported load found. Unable to test further
# TODO(teewrs): Update the sysinv API to allow a list of ignored alarms
# to be passed to the health check API. This would be much more efficient
# than having to retrieve the alarms in a separate step.
system_health = subcloud_sysinv_client.get_system_health_upgrade()
fails = re.findall("\[Fail\]", system_health)
failed_alarm_check = re.findall("No alarms: \[Fail\]", system_health)
no_mgmt_alarms = re.findall("\[0\] of which are management affecting",
system_health)
alarm_ignore_list = copy.copy(ALARM_IGNORE_LIST)
if (host.administrative == consts.ADMIN_LOCKED and upgrades):
alarm_ignore_list.append(HOST_ADMINISTRATIVELY_LOCKED_ALARM)
# Clean old error messages
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=consts.ERROR_DESC_EMPTY)
# The health conditions acceptable for upgrade are:
# a) subcloud is completely healthy (i.e. no failed checks)
# b) subcloud only fails alarm check and it only has non-management
# affecting alarm(s)
# c) the management alarm(s) that subcloud has once upgrade has started
# are upgrade alarm itself and host locked alarm
if ((len(fails) == 0) or
(len(fails) == 1 and failed_alarm_check and no_mgmt_alarms)):
self.info_log(strategy_step, "Health check passed.")
return
if not failed_alarm_check:
# Health check failure: no alarms involved
#
# These could be Kubernetes or other related failure(s) which has not
# been been converted into an alarm condition.
error_desc_msg = ("System upgrade health check failed. \n %s" %
fails)
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=error_desc_msg[0:consts.ERROR_DESCRIPTION_LENGTH])
details = (
"System upgrade health check failed. Please run "
"'system health-query-upgrade' command on the subcloud or %s "
"on central for details" % (consts.ERROR_DESC_CMD))
self.error_log(strategy_step, "\n" + system_health)
raise PreCheckFailedException(
subcloud=strategy_step.subcloud.name,
details=details,
)
else:
# Health check failure: one or more alarms
if (upgrades and (len(fails) == len(alarm_ignore_list))):
# Upgrade has started, previous try failed either before or after
# host lock.
return
elif len(fails) == 1:
# Healthy check failure: exclusively alarms related
alarms = subcloud_fm_client.get_alarms()
for alarm in alarms:
if alarm.alarm_id not in alarm_ignore_list:
if alarm.mgmt_affecting == "True":
error_desc_msg = (
"System upgrade health check failed due to "
"alarm %s. System upgrade health: \n %s" %
(alarm.alarm_id, system_health))
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=error_desc_msg[
0:consts.ERROR_DESCRIPTION_LENGTH])
details = (
"System upgrade health check failed due to "
"alarm %s. Please run 'system health-query-upgrade' "
"command on the subcloud or %s on central for "
"details." % (alarm.alarm_id, consts.ERROR_DESC_CMD))
self.error_log(strategy_step, "\n" + system_health)
raise PreCheckFailedException(
subcloud=strategy_step.subcloud.name,
details=details,
)
else:
# Multiple failures
error_desc_msg = (
"System upgrade health check failed due to multiple failures. "
"Health: \n %s" % system_health)
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=error_desc_msg[
0:consts.ERROR_DESCRIPTION_LENGTH])
details = (
"System upgrade health check failed due to multiple failures. "
"Please run 'system health-query-upgrade' command on the "
"subcloud or %s on central for details." % consts.ERROR_DESC_CMD)
self.error_log(strategy_step, "\n" + system_health)
raise PreCheckFailedException(
subcloud=strategy_step.subcloud.name,
details=details,
)
def _check_scratch(self, strategy_step, subcloud_sysinv_client, host):
scratch_fs = subcloud_sysinv_client.get_host_filesystem(
host.uuid, HOST_FS_NAME_SCRATCH)
if scratch_fs.size < MIN_SCRATCH_SIZE_REQUIRED_GB:
details = ("Scratch filesystem size of %s does not meet "
"minimum required %s" %
(scratch_fs.size, MIN_SCRATCH_SIZE_REQUIRED_GB))
raise PreCheckFailedException(
subcloud=strategy_step.subcloud.name,
details=details,
)
def _perform_subcloud_online_checks(self, strategy_step, subcloud_sysinv_client,
subcloud_fm_client, host, upgrades):
self._check_health(strategy_step, subcloud_sysinv_client, subcloud_fm_client,
host, upgrades)
self._check_scratch(strategy_step, subcloud_sysinv_client, host)
def perform_state_action(self, strategy_step):
"""This state will check if the subcloud is offline:
Check the deploy_status and transfer to the correct state.
if an unsupported deploy_status is encountered, fail the upgrade
"""
subcloud = db_api.subcloud_get(self.context, strategy_step.subcloud.id)
if subcloud.availability_status == dccommon_consts.AVAILABILITY_ONLINE:
subcloud_sysinv_client = None
try:
subcloud_sysinv_client = \
self.get_sysinv_client(strategy_step.subcloud.region_name)
subcloud_fm_client = \
self.get_fm_client(strategy_step.subcloud.region_name)
except Exception:
# if getting the token times out, the orchestrator may have
# restarted and subcloud may be offline; so will attempt
# to use the persisted values
message = ("Subcloud %s failed to get subcloud client" %
strategy_step.subcloud.name)
self.error_log(strategy_step, message)
error_message = "deploy state: %s" % subcloud.deploy_status
raise ManualRecoveryRequiredException(
subcloud=strategy_step.subcloud.name,
error_message=error_message)
host = subcloud_sysinv_client.get_host("controller-0")
subcloud_type = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_system().system_mode
upgrades = subcloud_sysinv_client.get_upgrades()
if subcloud_type == consts.SYSTEM_MODE_SIMPLEX:
# Check presence of data_install values. These are managed
# semantically on subcloud add or update
if not subcloud.data_install:
details = ("Data install values are missing and must be updated "
"via dcmanager subcloud update")
raise PreCheckFailedException(
subcloud=strategy_step.subcloud.name,
details=details)
sc_status = subcloud.deploy_status
if (host.administrative == consts.ADMIN_LOCKED and
(sc_status == consts.DEPLOY_STATE_INSTALL_FAILED or
sc_status == consts.DEPLOY_STATE_PRE_INSTALL_FAILED)):
# If the subcloud is online but its deploy state is
# pre-install-failed or install-failed and the subcloud host is
# locked, the upgrading simplex step must have failed early in
# the previous upgrade attempt. The pre-check should transition
# directly to upgrading simplex step in the retry.
self.override_next_state(consts.STRATEGY_STATE_UPGRADING_SIMPLEX)
return self.next_state
# Skip subcloud online checks if the subcloud deploy status is
# either "migrated" or "upgrade-activated".
if subcloud.deploy_status in [consts.DEPLOY_STATE_MIGRATED,
consts.DEPLOY_STATE_UPGRADE_ACTIVATED]:
self.info_log(strategy_step, "Online subcloud checks skipped.")
else:
self._perform_subcloud_online_checks(strategy_step,
subcloud_sysinv_client,
subcloud_fm_client,
host, upgrades)
if subcloud.deploy_status == consts.DEPLOY_STATE_UPGRADE_ACTIVATED:
# If the subcloud has completed upgrade activation,
# advance directly to completing step.
self.override_next_state(
consts.STRATEGY_STATE_COMPLETING_UPGRADE
)
elif subcloud.deploy_status == \
consts.DEPLOY_STATE_DATA_MIGRATION_FAILED:
# If the subcloud deploy status is data-migration-failed but
# it is online and has passed subcloud online checks, it must
# have timed out while waiting for the subcloud to unlock
# previously and has succesfully been unlocked since. Update
# the subcloud deploy status and advance to activating upgrade
# step.
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_MIGRATED)
self.override_next_state(
consts.STRATEGY_STATE_ACTIVATING_UPGRADE
)
elif subcloud.deploy_status == consts.DEPLOY_STATE_MIGRATED:
# If the subcloud deploy status is migrated but it is online, it
# must have undergone 2 upgrade attempts:
# - in 1st upgrade attempt: strategy timed out while waiting
# for the subcloud to unlock
# - in 2nd upgrade attempt: the subcloud was unlocked
# successfully (with or without manual interventions) but
# failed to activate.
# Advance to activating upgrade step so activation can be retried
# after the manual intervention.
self.override_next_state(
consts.STRATEGY_STATE_ACTIVATING_UPGRADE
)
else:
# Duplex case
if upgrades:
# If upgrade has started, skip subcloud online checks
self.info_log(strategy_step, "Online subcloud checks skipped.")
upgrade_state = upgrades[0].state
controllers_state = consts.UPGRADE_STATE_UPGRADING_CONTROLLERS
migration_complete = consts.UPGRADE_STATE_DATA_MIGRATION_COMPLETE
if (upgrade_state == consts.UPGRADE_STATE_DATA_MIGRATION_FAILED
or upgrade_state == consts.UPGRADE_STATE_DATA_MIGRATION):
error_message = "upgrade state: %s" % upgrade_state
raise ManualRecoveryRequiredException(
subcloud=strategy_step.subcloud.name,
error_message=error_message)
elif (upgrade_state == controllers_state or
upgrade_state == migration_complete):
# At this point the subcloud is duplex, deploy state is
# completeand "system upgrade-show" on the subcloud indicates
# that the upgrade state is "upgrading-controllers".
# If controller-1 is locked then we unlock it,
# if controller-0 is active we need to swact
# else we can proceed to create the VIM strategy.
controller_1_host = subcloud_sysinv_client.get_host(
"controller-1")
if controller_1_host.administrative == consts.ADMIN_LOCKED:
self.override_next_state(
consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1)
elif host.capabilities.get('Personality') == \
consts.PERSONALITY_CONTROLLER_ACTIVE:
self.override_next_state(
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1)
else:
self.override_next_state(
consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY)
elif upgrade_state == consts.UPGRADE_STATE_UPGRADING_HOSTS:
# At this point the subcloud is duplex, deploy state is
# complete and "system upgrade-show" on the subcloud
# indicates that theupgrade state is "upgrading-hosts".
# If both subcloud hosts are upgraded to the newer load,
# we resume the state machine from activate upgrade state.
# Otherwise, we resume from create the VIM strategy state.
# determine the version of the system controller in regionone
target_version = self._read_from_cache(
REGION_ONE_SYSTEM_INFO_CACHE_TYPE).software_version
all_hosts_upgraded = True
subcloud_hosts = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_hosts()
for subcloud_host in subcloud_hosts:
is_locked = (subcloud_host.administrative ==
consts.ADMIN_LOCKED)
is_disabled = (subcloud_host.operational ==
consts.OPERATIONAL_DISABLED)
create_vim_state = \
consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY
if (subcloud_host.software_load != target_version or
is_locked or is_disabled):
all_hosts_upgraded = False
self.override_next_state(create_vim_state)
if all_hosts_upgraded:
if host.capabilities.get('Personality') == \
consts.PERSONALITY_CONTROLLER_ACTIVE:
self.override_next_state(
consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
else:
self.override_next_state(
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0)
elif upgrade_state == consts.UPGRADE_STATE_ACTIVATION_FAILED:
if (host.capabilities.get('Personality') ==
consts.PERSONALITY_CONTROLLER_ACTIVE):
self.override_next_state(
consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
else:
self.override_next_state(
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0)
elif upgrade_state == consts.UPGRADE_STATE_ACTIVATION_COMPLETE:
self.override_next_state(
consts.STRATEGY_STATE_COMPLETING_UPGRADE)
else:
# Perform subcloud online check for duplex and proceed to the
# next step (i.e. installing license)
self._perform_subcloud_online_checks(strategy_step,
subcloud_sysinv_client,
subcloud_fm_client,
host, upgrades)
return self.next_state
# If it gets here, the subcloud must be offline and is a simplex
if subcloud.deploy_status in VALID_UPGRADE_STATES:
if not subcloud.data_install:
details = ("Data install values are missing and must be updated "
"via dcmanager subcloud update")
raise PreCheckFailedException(
subcloud=strategy_step.subcloud.name,
details=details)
self.override_next_state(consts.STRATEGY_STATE_UPGRADING_SIMPLEX)
return self.next_state
elif subcloud.deploy_status in VALID_MIGRATE_DATA_STATES:
self.override_next_state(consts.STRATEGY_STATE_MIGRATING_DATA)
return self.next_state
elif subcloud.deploy_status in VALID_ACTIVATION_STATES:
self.override_next_state(consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
return self.next_state
# FAIL: We are offline and encountered an un-recoverable deploy status
self.info_log(strategy_step,
"Un-handled deploy_status: %s" % subcloud.deploy_status)
error_message = "deploy state: %s" % subcloud.deploy_status
raise ManualRecoveryRequiredException(
subcloud=strategy_step.subcloud.name,
error_message=error_message)

View File

@ -1,131 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dccommon.drivers.openstack.vim import ALARM_RESTRICTIONS_RELAXED
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
DEFAULT_FORCE_FLAG = False
# Max time 30 minutes = 180 attempts, with 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
MAX_FAILED_RETRIES = 10
UPGRADE_FAILED = 'upgrade-failed'
UPGRADE_STARTED_STATES = ['started', ]
UPGRADE_RETRY_STATES = [UPGRADE_FAILED, ]
class StartingUpgradeState(BaseState):
"""Upgrade state for starting an upgrade on a subcloud"""
def __init__(self, region_name):
super(StartingUpgradeState, self).__init__(
next_state=consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE,
region_name=region_name)
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
self.max_failed_retries = MAX_FAILED_RETRIES
def get_upgrade_state(self, strategy_step):
try:
upgrades = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_upgrades()
except Exception as exception:
self.warn_log(strategy_step,
"Encountered exception: %s, "
"retry upgrade start for subcloud %s."
% (str(exception), strategy_step.subcloud.name))
return UPGRADE_FAILED
if len(upgrades) == 0:
raise Exception("Failed to generate upgrade data. Please "
"check sysinv.log on the subcloud for details.")
# The list of upgrades will never contain more than one entry.
return upgrades[0].state
def perform_state_action(self, strategy_step):
"""Start an upgrade on a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
# Check if an existing upgrade is already in progress.
# The list of upgrades will never contain more than one entry.
upgrades = self.get_sysinv_client(
strategy_step.subcloud.region_name).get_upgrades()
if upgrades is not None and len(upgrades) > 0:
for upgrade in upgrades:
# If a previous upgrade exists (even one that failed) skip
self.info_log(strategy_step,
"An upgrade already exists: %s" % upgrade)
return self.next_state
else:
# invoke the API 'upgrade-start'.
# query the alarm_restriction_type from DB SwUpdateOpts
force_flag = DEFAULT_FORCE_FLAG
opts_dict = \
utils.get_sw_update_opts(self.context,
for_sw_update=True,
subcloud_id=strategy_step.subcloud_id)
if opts_dict is not None:
force_flag = (opts_dict.get('alarm-restriction-type')
== ALARM_RESTRICTIONS_RELAXED)
# This call is asynchronous and throws an exception on failure.
self.get_sysinv_client(
strategy_step.subcloud.region_name).upgrade_start(force=force_flag)
# Do not move to the next state until the upgrade state is correct
counter = 0
retry_counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
upgrade_state = self.get_upgrade_state(strategy_step)
if upgrade_state in UPGRADE_STARTED_STATES:
self.info_log(strategy_step,
"Upgrade started. State=%s" % upgrade_state)
break
if upgrade_state in UPGRADE_RETRY_STATES:
retry_counter += 1
if retry_counter >= self.max_failed_retries:
error_msg = utils.get_failure_msg(
strategy_step.subcloud.region_name)
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
error_description=error_msg[
0:consts.ERROR_DESCRIPTION_LENGTH])
details = ("Failed to start upgrade. Please "
"check sysinv.log on the subcloud or "
"%s on central for details." %
(consts.ERROR_DESC_CMD))
raise Exception(details)
self.warn_log(strategy_step,
"Upgrade start failed, retrying... State=%s"
% upgrade_state)
try:
self.get_sysinv_client(
strategy_step.subcloud.region_name).upgrade_start(
force=force_flag)
except Exception as exception:
self.warn_log(strategy_step,
"Encountered exception: %s, "
"during upgrade start for subcloud %s."
% (str(exception),
strategy_step.subcloud.name))
counter += 1
if counter >= self.max_queries:
raise Exception("Timeout waiting for upgrade to start")
time.sleep(self.sleep_duration)
# When we return from this method without throwing an exception, the
# state machine can proceed to the next state
return self.next_state

View File

@ -1,18 +0,0 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.swact_host import SwactHostState
class SwactToController0State(SwactHostState):
"""Orchestration state for swacting to controller-0 host"""
def __init__(self, region_name):
super(SwactToController0State, self).__init__(
next_state=consts.STRATEGY_STATE_ACTIVATING_UPGRADE,
region_name=region_name,
active="controller-0",
standby="controller-1",)

View File

@ -1,18 +0,0 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.swact_host import SwactHostState
class SwactToController1State(SwactHostState):
"""Orchestration state for swacting to controller-1 host"""
def __init__(self, region_name):
super(SwactToController1State, self).__init__(
next_state=consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY,
region_name=region_name,
active="controller-1",
standby="controller-0")

View File

@ -1,80 +0,0 @@
#
# Copyright (c) 2022-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common import utils
from dcmanager.orchestrator.states.base import BaseState
# Max time: 1.5 minutes = 3 retries x 30 seconds between each
DEFAULT_MAX_RETRIES = 3
DEFAULT_SLEEP_DURATION = 30
class TransferCACertificateState(BaseState):
"""Upgrade step for transfering CA certificate"""
def __init__(self, region_name):
self.subcloud_type = self.get_sysinv_client(
region_name).get_system().system_mode
if self.subcloud_type == consts.SYSTEM_MODE_SIMPLEX:
super(TransferCACertificateState, self).__init__(
next_state=consts.STRATEGY_STATE_LOCKING_CONTROLLER_0,
region_name=region_name)
else:
super(TransferCACertificateState, self).__init__(
next_state=consts.STRATEGY_STATE_LOCKING_CONTROLLER_1,
region_name=region_name)
self.max_retries = DEFAULT_MAX_RETRIES
self.sleep_duration = DEFAULT_SLEEP_DURATION
def perform_state_action(self, strategy_step):
"""Transfer CA certificate for an upgrade to a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
if self.subcloud_type == consts.SYSTEM_MODE_SIMPLEX:
return self.next_state
self.info_log(strategy_step, "Start transferring CA certificate...")
retry_counter = 0
while True:
try:
sysinv_client = \
self.get_sysinv_client(strategy_step.subcloud.region_name)
data = {'mode': 'openldap_ca'}
ldap_ca_cert, ldap_ca_key, rca_crt = \
utils.get_certificate_from_secret(
consts.OPENLDAP_CA_CERT_SECRET_NAME,
consts.CERT_NAMESPACE_PLATFORM_CA_CERTS)
sysinv_client.update_certificate(
'', ldap_ca_cert + rca_crt + ldap_ca_key, data)
break
except Exception as e:
self.warn_log(strategy_step,
"Encountered exception: %s" % str(e))
retry_counter += 1
if retry_counter > self.max_retries:
raise Exception(
"Failed to transfer CA certificate for subcloud %s."
% strategy_step.subcloud.name)
self.warn_log(strategy_step,
"Retry (%i/%i) in %i secs."
% (retry_counter,
self.max_retries,
self.sleep_duration))
time.sleep(self.sleep_duration)
self.info_log(strategy_step, "CA certificate transfer completed.")
return self.next_state

View File

@ -1,18 +0,0 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.unlock_host import UnlockHostState
class UnlockDuplexState(UnlockHostState):
"""Orchestration state for unlocking controller-1 host"""
def __init__(self, region_name):
super(UnlockDuplexState, self).__init__(
next_state=consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1,
region_name=region_name,
hostname="controller-1")

View File

@ -1,18 +0,0 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.unlock_host import UnlockHostState
class UnlockSimplexState(UnlockHostState):
"""Orchestration state for unlocking controller-0 host"""
def __init__(self, region_name):
super(UnlockSimplexState, self).__init__(
next_state=consts.STRATEGY_STATE_ACTIVATING_UPGRADE,
region_name=region_name,
hostname="controller-0")

View File

@ -1,209 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import os
import time
from dccommon.drivers.openstack import patching_v1
from dcmanager.common import consts
from dcmanager.common import utils
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
from dcmanager.orchestrator.states.upgrade.cache.cache_specifications import \
REGION_ONE_PATCHING_CACHE_TYPE
# Max time: 30 minutes = 180 queries x 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
class UpdatingPatchesState(BaseState):
"""Upgrade state for updating patches"""
def __init__(self, region_name):
super(UpdatingPatchesState, self).__init__(
next_state=consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY,
region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
# todo(jcasteli): Refactor instead of duplicating code from patch_orch_thread.py
def perform_state_action(self, strategy_step):
"""Update patches in this subcloud that need to be applied and
removed to match the applied patches in RegionOne
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
This code is based on patch orchestration.
"""
self.info_log(strategy_step, "Updating patches")
if strategy_step.subcloud_id is None:
# This is the SystemController. It is the master so no update
# is necessary.
self.info_log(strategy_step,
"Skipping update patches for SystemController")
return self.next_state
# First query RegionOne to determine what patches should be applied.
regionone_patches = self._read_from_cache(REGION_ONE_PATCHING_CACHE_TYPE)
self.debug_log(strategy_step, "regionone_patches: %s" % regionone_patches)
# Build lists of patches that should be applied in this subcloud,
# based on their state in RegionOne. Check repostate (not patchstate)
# as we only care if the patch has been applied to the repo (not
# whether it is installed on the hosts). If we were to check the
# patchstate, we could end up removing patches from this subcloud
# just because a single host in RegionOne reported that it was not
# patch current.
applied_patch_ids = list()
for patch_id in regionone_patches.keys():
if regionone_patches[patch_id]['repostate'] in [
patching_v1.PATCH_STATE_APPLIED,
patching_v1.PATCH_STATE_COMMITTED]:
applied_patch_ids.append(patch_id)
self.debug_log(strategy_step,
"RegionOne applied_patch_ids: %s" % applied_patch_ids)
region = self.get_region_name(strategy_step)
# Retrieve all the patches that are present in this subcloud.
subcloud_patches = self.get_patching_client(region).query()
self.debug_log(strategy_step, "Patches for subcloud: %s" %
(subcloud_patches))
# Determine which loads are present in this subcloud. During an
# upgrade, there will be more than one load installed.
loads = self.get_sysinv_client(region).get_loads()
installed_loads = utils.get_loads_for_patching(loads)
patches_to_upload = list()
patches_to_apply = list()
patches_to_remove = list()
# Figure out which patches in this subcloud need to be applied and
# removed to match the applied patches in RegionOne. Check the
# repostate, which indicates whether it is applied or removed in
# the repo.
subcloud_patch_ids = list(subcloud_patches.keys())
for patch_id in subcloud_patch_ids:
if subcloud_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_APPLIED:
if subcloud_patches[patch_id]['patchstate'] != \
patching_v1.PATCH_STATE_APPLIED:
self.info_log(strategy_step,
"Patch %s will be removed from subcloud" %
(patch_id))
patches_to_remove.append(patch_id)
elif subcloud_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_COMMITTED:
if patch_id not in applied_patch_ids:
message = ("Patch %s is committed in subcloud but "
"not applied in SystemController" %
patch_id)
self.warn_log(strategy_step, message)
raise Exception(message)
elif subcloud_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_AVAILABLE:
if patch_id in applied_patch_ids:
self.info_log(strategy_step,
"Patch %s will be applied to subcloud" %
(patch_id))
patches_to_apply.append(patch_id)
else:
# This patch is in an invalid state
message = ('Patch %s in subcloud in unexpected state %s' %
(patch_id,
subcloud_patches[patch_id]['repostate']))
self.warn_log(strategy_step, message)
raise Exception(message)
# Check that all applied patches in RegionOne are present in the
# subcloud.
for patch_id in applied_patch_ids:
if regionone_patches[patch_id]['sw_version'] in \
installed_loads and patch_id not in subcloud_patch_ids:
self.info_log(strategy_step,
"Patch %s missing from subcloud" %
(patch_id))
patches_to_upload.append(patch_id)
patches_to_apply.append(patch_id)
if patches_to_remove:
self.info_log(strategy_step,
"Removing patches %s from subcloud" %
(patches_to_remove))
self.get_patching_client(region).remove(patches_to_remove)
if patches_to_upload:
self.info_log(strategy_step,
"Uploading patches %s to subcloud" %
(patches_to_upload))
for patch in patches_to_upload:
patch_sw_version = regionone_patches[patch]['sw_version']
patch_file = "%s/%s/%s.patch" % (consts.PATCH_VAULT_DIR,
patch_sw_version,
patch)
if not os.path.isfile(patch_file):
message = ('Patch file %s is missing' % patch_file)
self.error_log(strategy_step, message)
raise Exception(message)
self.get_patching_client(region).upload([patch_file])
if self.stopped():
self.info_log(strategy_step,
"Exiting because task is stopped")
raise StrategyStoppedException()
if patches_to_apply:
self.info_log(strategy_step,
"Applying patches %s to subcloud" %
(patches_to_apply))
self.get_patching_client(region).apply(patches_to_apply)
# Now that we have applied/removed/uploaded patches, we need to give
# the patch controller on this subcloud time to determine whether
# each host on that subcloud is patch current.
wait_count = 0
while True:
subcloud_hosts = self.get_patching_client(
region).query_hosts()
self.debug_log(strategy_step,
"query_hosts for subcloud: %s" % subcloud_hosts)
for host in subcloud_hosts:
if host['interim_state']:
# This host is not yet ready.
self.debug_log(strategy_step,
"Host %s in subcloud in interim state" %
(host["hostname"]))
break
else:
# All hosts in the subcloud are updated
break
wait_count += 1
if wait_count >= 6:
# We have waited at least 60 seconds. This is too long. We
# will just log it and move on without failing the step.
message = ("Too much time expired after applying patches to "
"subcloud - continuing.")
self.warn_log(strategy_step, message)
break
if self.stopped():
self.info_log(strategy_step, "Exiting because task is stopped")
raise StrategyStoppedException()
# Wait 10 seconds before doing another query.
time.sleep(10)
return self.next_state

View File

@ -1,111 +0,0 @@
#
# Copyright (c) 2020-2021, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
# When an unlock occurs, a reboot is triggered. During reboot, API calls fail.
# The max time allowed here is 30 minutes (ie: 180 queries with 10 secs sleep)
DEFAULT_MAX_FAILED_QUERIES = 180
DEFAULT_FAILED_SLEEP = 10
# Max time: 30 minutes = 180 queries x 10 seconds
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
class UpgradingDuplexState(BaseState):
"""Update state for upgrading a non-simplex subcloud host"""
def __init__(self, region_name):
super(UpgradingDuplexState, self).__init__(
next_state=consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1,
region_name=region_name
)
self.target_hostname = "controller-1"
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
def perform_state_action(self, strategy_step):
"""Upgrade a duplex host on a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
self.info_log(strategy_step, "Performing duplex upgrade for subcloud")
region = self.get_region_name(strategy_step)
host = self.get_sysinv_client(
region).get_host(self.target_hostname)
self.get_sysinv_client(region).upgrade_host(host.id)
# Wait for controller-1 to reinstall with the load N+1
# and become locked-disabled-online state.
# this action is asynchronous, query until it completes or times out
# Allow separate durations for failures (ie: reboot) and api retries
fail_counter = 0
api_counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
try:
upgrades = self.get_sysinv_client(region).get_upgrades()
if len(upgrades) != 0:
if (
upgrades[0].state ==
consts.UPGRADE_STATE_DATA_MIGRATION_FAILED or
upgrades[0].state ==
consts.UPGRADE_STATE_DATA_MIGRATION_COMPLETE
):
msg = "Upgrade state is %s now" % (upgrades[0].state)
self.info_log(strategy_step, msg)
break
fail_counter = 0
except Exception:
# Handle other exceptions due to being unreachable
# for a significant period of time when there is a
# controller swact
fail_counter += 1
if fail_counter >= self.max_failed_queries:
raise Exception("Timeout waiting for reboot to complete")
time.sleep(self.failed_sleep_duration)
# skip the api_counter
continue
api_counter += 1
if api_counter >= self.max_queries:
raise Exception(
"Timeout waiting for update state to be updated to "
"'data-migration-failed' or 'data-migration-complete'. "
"Please check sysinv.log on the subcloud for details."
)
time.sleep(self.sleep_duration)
# If the upgrade state is 'data-migration-complete' we move to the
# next state, else if it is 'data-migration-failed' we go to the failed
# state.
upgrades = self.get_sysinv_client(region).get_upgrades()
if len(upgrades) == 0:
raise Exception("No upgrades were found")
# The list of upgrades will never contain more than one entry.
if upgrades[0].state == consts.UPGRADE_STATE_DATA_MIGRATION_FAILED:
raise Exception(
"Data migration failed on host %s" % self.target_hostname
)
# If we reach at this point, the upgrade state is 'data-migration-complete'
# and we can move to the next state.
return self.next_state

View File

@ -1,399 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import json
import os
import keyring
from oslo_serialization import base64
from tsconfig.tsconfig import SW_VERSION
from dccommon import consts as dccommon_consts
from dccommon.subcloud_install import SubcloudInstall
from dcmanager.common import consts
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
class UpgradingSimplexState(BaseState):
"""Upgrade state for upgrading a simplex subcloud host"""
def __init__(self, region_name):
super(UpgradingSimplexState, self).__init__(
next_state=consts.STRATEGY_STATE_MIGRATING_DATA, region_name=region_name)
def perform_state_action(self, strategy_step):
"""Upgrade a simplex host on a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
self.info_log(strategy_step, "Performing simplex upgrade for subcloud")
subcloud_sysinv_client = None
subcloud_barbican_client = None
try:
subcloud_sysinv_client = self.get_sysinv_client(
strategy_step.subcloud.region_name)
subcloud_barbican_client = self.get_barbican_client(
strategy_step.subcloud.region_name)
except Exception:
# if getting the token times out, the orchestrator may have
# restarted and subcloud may be offline; so will attempt
# to use the persisted values
message = ("Simplex upgrade perform_subcloud_install "
"subcloud %s failed to get subcloud client" %
strategy_step.subcloud.name)
self.error_log(strategy_step, message)
pass
# Check whether subcloud is already re-installed with N+1 load
target_version = SW_VERSION
if self._check_load_already_active(
target_version, subcloud_sysinv_client):
self.info_log(strategy_step,
"Load:%s already active" % target_version)
return self.next_state
# Check whether subcloud supports redfish, and if not, fail.
# This needs to be inferred from absence of install_values as
# there is currrently no external api to query.
install_values = self.get_subcloud_upgrade_install_values(
strategy_step, subcloud_sysinv_client, subcloud_barbican_client)
# Upgrade the subcloud to the install_values image
self.perform_subcloud_install(strategy_step, install_values)
return self.next_state
def _check_load_already_active(self, target_version, subcloud_sysinv_client):
"""Check if the target_version is already active in subcloud"""
if subcloud_sysinv_client:
current_loads = subcloud_sysinv_client.get_loads()
for load in current_loads:
if (load.software_version == target_version and
load.state == 'active'):
return True
return False
def get_subcloud_upgrade_install_values(
self, strategy_step,
subcloud_sysinv_client, subcloud_barbican_client):
"""Get the data required for the remote subcloud install.
subcloud data_install are obtained from:
dcmanager database:
subcloud.subcloud_install_initial::for values which are persisted at
subcloud_add time
INSTALL: (needed for upgrade install)
bootstrap_interface
bootstrap_vlan
bootstrap_address
bootstrap_address_prefix
install_type # could also be from host-show
# This option can be set to extend the installing stage timeout value
# wait_for_timeout: 3600
# Set this options for https with self-signed certificate
# no_check_certificate
# Override default filesystem device: also from host-show, but is
static.
# rootfs_device: "/dev/disk/by-path/pci-0000:00:1f.2-ata-1.0"
# boot_device: "/dev/disk/by-path/pci-0000:00:1f.2-ata-1.0"
# Set rd.net.timeout.ipv6dad to increase timeout on IPv6 NIC up
# rd.net.timeout.ipv6dad: 300
BOOTSTRAP: (also needed for bootstrap)
# If the subcloud's bootstrap IP interface and the system controller
# are not on the same network then the customer must configure a
# default route or static route so that the Central Cloud can login
# bootstrap the newly installed subcloud. If nexthop_gateway is
# specified and the network_address is not specified then a default
# route will be configured. Otherwise, if a network_address is
# specified then a static route will be configured.
nexthop_gateway: default_route_address
network_address: static_route_address
network_mask: static_route_mask
subcloud.data_upgrade - persist for upgrade duration
for values from subcloud online sysinv host-show
(persist since upgrade-start)
bmc_address # sysinv_v1 host-show
bmc_username # sysinv_v1 host-show
for values from barbican_client (as barbican user),
or from upgrade-start:
bmc_password --- obtain from barbican_client as barbican user
"""
install_values = {'name': strategy_step.subcloud.name}
install_values.update(
self._get_subcloud_upgrade_load_info(strategy_step))
upgrade_data_install_values = self._get_subcloud_upgrade_data_install(
strategy_step)
install_values.update(upgrade_data_install_values)
install_values.update(
self._get_subcloud_upgrade_data(
strategy_step, subcloud_sysinv_client, subcloud_barbican_client))
# Check bmc values
if not self._bmc_data_available(install_values):
if self._bmc_data_available(upgrade_data_install_values):
# It is possible the bmc data is only latched on install if it
# was not part of the deployment configuration
install_values.update({
'bmc_address':
upgrade_data_install_values.get('bmc_address'),
'bmc_username':
upgrade_data_install_values.get('bmc_username'),
'bmc_password':
upgrade_data_install_values.get('bmc_password'),
})
else:
message = ("Failed to get bmc credentials for subcloud %s" %
strategy_step.subcloud.name)
raise Exception(message)
self.info_log(strategy_step,
"get_subcloud_upgrade_data_install %s" % install_values)
return install_values
@staticmethod
def _bmc_data_available(bmc_values):
if (not bmc_values.get('bmc_username') or
not bmc_values.get('bmc_address') or
not bmc_values.get('bmc_password')):
return False
return True
def _get_subcloud_upgrade_load_info(self, strategy_step):
"""Get the subcloud upgrade load information"""
# The 'software_version' is the active running load on SystemController
matching_iso, _ = utils.get_vault_load_files(SW_VERSION)
if not matching_iso:
message = ("Failed to get upgrade load info for subcloud %s" %
strategy_step.subcloud.name)
raise Exception(message)
load_info = {'software_version': SW_VERSION,
'image': matching_iso}
return load_info
def _get_subcloud_upgrade_data_install(self, strategy_step):
"""Get subcloud upgrade data_install from persisted values"""
upgrade_data_install = {}
subcloud = db_api.subcloud_get(self.context, strategy_step.subcloud_id)
if not subcloud.data_install:
# Set the deploy status to pre-install-failed so it can be
# handled accordingly in pre check step.
message = ("Failed to get upgrade data from install")
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
error_description=message)
self.warn_log(strategy_step, message)
raise Exception(message)
data_install = json.loads(subcloud.data_install)
# base64 encoded sysadmin_password is default
upgrade_data_install.update({
'ansible_become_pass': consts.TEMP_SYSADMIN_PASSWORD,
'ansible_ssh_pass': consts.TEMP_SYSADMIN_PASSWORD,
})
# Get mandatory bootstrap info from data_install
# bootstrap_address is referenced in SubcloudInstall
# bootstrap-address is referenced in create_subcloud_inventory and
# subcloud manager.
# todo(jkung): refactor to just use one bootstrap address index
upgrade_data_install.update({
'bootstrap_interface': data_install.get('bootstrap_interface'),
'bootstrap-address': data_install.get('bootstrap_address'),
'bootstrap_address': data_install.get('bootstrap_address'),
'bootstrap_address_prefix': data_install.get('bootstrap_address_prefix'),
'bmc_username': data_install.get('bmc_username'),
'bmc_address': data_install.get('bmc_address'),
'bmc_password': data_install.get('bmc_password'),
})
persistent_size = data_install.get('persistent_size')
if persistent_size is not None:
upgrade_data_install.update({'persistent_size': persistent_size})
for p in dccommon_consts.OPTIONAL_INSTALL_VALUES:
if p in data_install:
upgrade_data_install.update({p: data_install.get(p)})
return upgrade_data_install
def _get_subcloud_upgrade_data(self, strategy_step, subcloud_sysinv_client,
subcloud_barbican_client):
"""Get the subcloud data required for upgrades.
In case the subcloud is no longer reachable, get upgrade_data from
persisted database values. For example, this may be required in
the scenario where the subcloud experiences an unexpected error
(e.g. loss of power) and this step needs to be rerun.
"""
volatile_data_install = {}
if subcloud_sysinv_client is None:
# subcloud is not reachable, use previously saved values
subcloud = db_api.subcloud_get(
self.context, strategy_step.subcloud_id)
if subcloud.data_upgrade:
return json.loads(subcloud.data_upgrade)
else:
message = ('Cannot retrieve upgrade data install '
'for subcloud: %s' %
strategy_step.subcloud.name)
raise Exception(message)
subcloud_system = subcloud_sysinv_client.get_system()
if subcloud_system.system_type != 'All-in-one':
message = ('subcloud %s install unsupported for system type: %s' %
(strategy_step.subcloud.name,
subcloud_system.system_type))
raise Exception(message)
host = subcloud_sysinv_client.get_host('controller-0')
install_type = self._get_install_type(host)
bmc_password = None
if subcloud_barbican_client:
bmc_password = subcloud_barbican_client.get_host_bmc_password(host.uuid)
if bmc_password:
# If the host is configured to store bmc in its barbican database,
# encode the password. Otherwise leave it as None and it will be
# replaced with the value retrieved from the dcmanager database.
bmc_password = base64.encode_as_text(bmc_password)
volatile_data_install.update({
'bmc_address': host.bm_ip,
'bmc_username': host.bm_username,
'bmc_password': bmc_password,
'install_type': install_type,
'boot_device': host.boot_device,
'rootfs_device': host.rootfs_device,
})
# Persist the volatile data
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
data_upgrade=json.dumps(volatile_data_install))
admin_password = str(keyring.get_password('CGCS', 'admin'))
volatile_data_install.update({'admin_password': admin_password})
return volatile_data_install
@staticmethod
def _get_install_type(host):
if 'lowlatency' in host.subfunctions.split(','):
lowlatency = True
else:
lowlatency = False
if 'graphical' in host.console.split(','): # graphical console
if lowlatency:
install_type = 5
else:
install_type = 3
else: # serial console
if lowlatency:
install_type = 4
else:
install_type = 2
return install_type
def perform_subcloud_install(self, strategy_step, install_values):
log_file = os.path.join(
consts.DC_ANSIBLE_LOG_DIR,
strategy_step.subcloud.name) + '_playbook_output.log'
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL)
try:
install = SubcloudInstall(strategy_step.subcloud.name)
install.prep(dccommon_consts.ANSIBLE_OVERRIDES_PATH,
install_values)
except Exception as e:
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
error_description=str(e)[0:consts.ERROR_DESCRIPTION_LENGTH])
self.error_log(strategy_step, str(e))
# TODO(jkung): cleanup to be implemented within SubcloudInstall
if install:
install.cleanup()
raise
ansible_subcloud_inventory_file = os.path.join(
dccommon_consts.ANSIBLE_OVERRIDES_PATH,
strategy_step.subcloud.name + consts.INVENTORY_FILE_POSTFIX)
# Create the ansible inventory for the upgrade subcloud
utils.create_subcloud_inventory(install_values,
ansible_subcloud_inventory_file)
rvmc_config_file = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH,
strategy_step.subcloud.name,
dccommon_consts.RVMC_CONFIG_FILE_NAME)
# SubcloudInstall.prep creates data_install.yml (install overrides)
install_command = [
"ansible-playbook", dccommon_consts.ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK,
"-i", ansible_subcloud_inventory_file,
"-e", "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" +
strategy_step.subcloud.name + '/' + "install_values.yml",
"-e", "rvmc_config_file=%s" % rvmc_config_file
]
# Run the remote install playbook
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
availability_status=dccommon_consts.AVAILABILITY_OFFLINE,
deploy_status=consts.DEPLOY_STATE_INSTALLING)
try:
install.install(consts.DC_ANSIBLE_LOG_DIR, install_command)
except Exception as e:
# Detailed error message for subcloud error description field.
# Exception message for strategy_step detail.
msg = utils.find_ansible_error_msg(
strategy_step.subcloud.name, log_file,
consts.DEPLOY_STATE_INSTALLING)
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_INSTALL_FAILED,
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
self.error_log(strategy_step, msg)
self.error_log(strategy_step, str(e))
install.cleanup()
raise
db_api.subcloud_update(
self.context, strategy_step.subcloud_id,
deploy_status=consts.DEPLOY_STATE_INSTALLED)
install.cleanup()
self.info_log(strategy_step, "Successfully installed subcloud")

View File

@ -38,7 +38,6 @@ from dcmanager.orchestrator.kube_upgrade_orch_thread \
from dcmanager.orchestrator.patch_orch_thread import PatchOrchThread
from dcmanager.orchestrator.prestage_orch_thread import PrestageOrchThread
from dcmanager.orchestrator.software_orch_thread import SoftwareOrchThread
from dcmanager.orchestrator.sw_upgrade_orch_thread import SwUpgradeOrchThread
LOG = logging.getLogger(__name__)
@ -70,11 +69,6 @@ class SwUpdateManager(manager.Manager):
self.strategy_lock, self.audit_rpc_client)
self.patch_orch_thread.start()
# - sw upgrade orchestration thread
self.sw_upgrade_orch_thread = SwUpgradeOrchThread(
self.strategy_lock, self.audit_rpc_client)
self.sw_upgrade_orch_thread.start()
# - fw update orchestration thread
self.fw_update_orch_thread = FwUpdateOrchThread(
self.strategy_lock, self.audit_rpc_client)
@ -104,9 +98,6 @@ class SwUpdateManager(manager.Manager):
# - patch orchestration thread
self.patch_orch_thread.stop()
self.patch_orch_thread.join()
# - sw upgrade orchestration thread
self.sw_upgrade_orch_thread.stop()
self.sw_upgrade_orch_thread.join()
# - fw update orchestration thread
self.fw_update_orch_thread.stop()
self.fw_update_orch_thread.join()
@ -150,18 +141,6 @@ class SwUpdateManager(manager.Manager):
dccommon_consts.ENDPOINT_TYPE_PATCHING and
subcloud_status.sync_status ==
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC)
elif strategy_type == consts.SW_UPDATE_TYPE_UPGRADE:
# force option only has an effect in offline case for upgrade
if force and availability_status != dccommon_consts.AVAILABILITY_ONLINE:
return (subcloud_status.endpoint_type ==
dccommon_consts.ENDPOINT_TYPE_LOAD and
subcloud_status.sync_status !=
dccommon_consts.SYNC_STATUS_IN_SYNC)
else:
return (subcloud_status.endpoint_type ==
dccommon_consts.ENDPOINT_TYPE_LOAD and
subcloud_status.sync_status ==
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC)
elif strategy_type == consts.SW_UPDATE_TYPE_SOFTWARE:
if force and availability_status != dccommon_consts.AVAILABILITY_ONLINE:
return (subcloud_status.endpoint_type ==
@ -203,11 +182,12 @@ class SwUpdateManager(manager.Manager):
subcloud_status.sync_status ==
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC)
elif strategy_type == consts.SW_UPDATE_TYPE_PRESTAGE:
# For prestage we reuse the ENDPOINT_TYPE_LOAD.
# For prestage we reuse the ENDPOINT_TYPE_SOFTWARE.
# We just need to key off a unique endpoint,
# so that the strategy is created only once.
return (subcloud_status.endpoint_type ==
dccommon_consts.ENDPOINT_TYPE_LOAD)
return (
subcloud_status.endpoint_type == dccommon_consts.ENDPOINT_TYPE_SOFTWARE
)
# Unimplemented strategy_type status check. Log an error
LOG.error("_validate_subcloud_status_sync for %s not implemented" %
strategy_type)
@ -364,16 +344,7 @@ class SwUpdateManager(manager.Manager):
resource='strategy',
msg='Subcloud %s does not exist' % cloud_name)
if strategy_type == consts.SW_UPDATE_TYPE_UPGRADE:
# Make sure subcloud requires upgrade
subcloud_status = db_api.subcloud_status_get(
context, subcloud.id, dccommon_consts.ENDPOINT_TYPE_LOAD)
if subcloud_status.sync_status == \
dccommon_consts.SYNC_STATUS_IN_SYNC:
raise exceptions.BadRequest(
resource='strategy',
msg='Subcloud %s does not require upgrade' % cloud_name)
elif strategy_type == consts.SW_UPDATE_TYPE_SOFTWARE:
if strategy_type == consts.SW_UPDATE_TYPE_SOFTWARE:
subcloud_status = db_api.subcloud_status_get(
context, subcloud.id, dccommon_consts.ENDPOINT_TYPE_SOFTWARE)
if subcloud_status.sync_status == \
@ -492,7 +463,7 @@ class SwUpdateManager(manager.Manager):
# Don't create a strategy if any of the subclouds is online and the
# relevant sync status is unknown. Offline subcloud is skipped unless
# --force option is specified and strategy type is upgrade.
# --force option is specified and strategy type is sw-deploy.
if single_group:
subclouds = []
for sb in db_api.subcloud_get_for_group(context, single_group.id):
@ -512,23 +483,7 @@ class SwUpdateManager(manager.Manager):
# We are not updating this subcloud
continue
if strategy_type == consts.SW_UPDATE_TYPE_UPGRADE:
if (
subcloud.availability_status !=
dccommon_consts.AVAILABILITY_ONLINE
):
if not force:
continue
elif (
endpoint_type == dccommon_consts.ENDPOINT_TYPE_LOAD
and sync_status == dccommon_consts.SYNC_STATUS_UNKNOWN
):
raise exceptions.BadRequest(
resource="strategy",
msg="Upgrade sync status is unknown for one or more "
"subclouds",
)
elif strategy_type == consts.SW_UPDATE_TYPE_SOFTWARE:
if strategy_type == consts.SW_UPDATE_TYPE_SOFTWARE:
if (
subcloud.availability_status !=
dccommon_consts.AVAILABILITY_ONLINE
@ -541,8 +496,7 @@ class SwUpdateManager(manager.Manager):
):
raise exceptions.BadRequest(
resource="strategy",
msg="Software sync status is unknown for one or more "
"subclouds",
msg="Software sync status is unknown for one or more subclouds",
)
elif strategy_type == consts.SW_UPDATE_TYPE_PATCH:
if (
@ -659,14 +613,13 @@ class SwUpdateManager(manager.Manager):
continue
if subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE:
if strategy_type == consts.SW_UPDATE_TYPE_UPGRADE:
if strategy_type == consts.SW_UPDATE_TYPE_SOFTWARE:
if not force:
continue
else:
continue
subcloud_status = db_api.subcloud_status_get_all(context,
subcloud.id)
subcloud_status = db_api.subcloud_status_get_all(context, subcloud.id)
for status in subcloud_status:
if self._validate_subcloud_status_sync(strategy_type,
status,

View File

@ -1,139 +0,0 @@
# Copyright 2017 Ericsson AB.
# Copyright (c) 2017-2022, 2024 Wind River Systems, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.orch_thread import OrchThread
from dcmanager.orchestrator.states.upgrade.activating \
import ActivatingUpgradeState
from dcmanager.orchestrator.states.upgrade.applying_vim_upgrade_strategy \
import ApplyingVIMUpgradeStrategyState
from dcmanager.orchestrator.states.upgrade.cache.shared_cache_repository import \
SharedCacheRepository
from dcmanager.orchestrator.states.upgrade.completing \
import CompletingUpgradeState
from dcmanager.orchestrator.states.upgrade.creating_vim_upgrade_strategy \
import CreatingVIMUpgradeStrategyState
from dcmanager.orchestrator.states.upgrade.deleting_load \
import DeletingLoadState
from dcmanager.orchestrator.states.upgrade.finishing_patch_strategy \
import FinishingPatchStrategyState
from dcmanager.orchestrator.states.upgrade.importing_load \
import ImportingLoadState
from dcmanager.orchestrator.states.upgrade.installing_license \
import InstallingLicenseState
from dcmanager.orchestrator.states.upgrade.lock_duplex \
import LockDuplexState
from dcmanager.orchestrator.states.upgrade.lock_simplex \
import LockSimplexState
from dcmanager.orchestrator.states.upgrade.migrating_data \
import MigratingDataState
from dcmanager.orchestrator.states.upgrade.pre_check \
import PreCheckState
from dcmanager.orchestrator.states.upgrade.starting_upgrade \
import StartingUpgradeState
from dcmanager.orchestrator.states.upgrade.swact_to_controller_0 \
import SwactToController0State
from dcmanager.orchestrator.states.upgrade.swact_to_controller_1 \
import SwactToController1State
from dcmanager.orchestrator.states.upgrade.transfer_ca_certificate \
import TransferCACertificateState
from dcmanager.orchestrator.states.upgrade.unlock_duplex \
import UnlockDuplexState
from dcmanager.orchestrator.states.upgrade.unlock_simplex \
import UnlockSimplexState
from dcmanager.orchestrator.states.upgrade.updating_patches \
import UpdatingPatchesState
from dcmanager.orchestrator.states.upgrade.upgrading_duplex \
import UpgradingDuplexState
from dcmanager.orchestrator.states.upgrade.upgrading_simplex \
import UpgradingSimplexState
class SwUpgradeOrchThread(OrchThread):
"""SwUpgrade Orchestration Thread
This thread is responsible for executing the upgrade orchestration strategy.
Here is how it works:
- The user creates an update strategy from CLI (or REST API) of 'upgrade'
- This ends up being handled by the SwUpdateManager class, which
runs under the main dcmanager thread. The strategy is created and stored
in the database.
- The user then applies the strategy from the CLI (or REST API). The
SwUpdateManager code updates the state of the strategy in the database.
- The SwUpgradeOrchThread wakes up periodically and checks the database for
a strategy that is in an active state (applying, aborting, etc...). If
so, it executes the strategy, updating the strategy and steps in the
database as it goes, with state and progress information.
"""
# every state in sw upgrade orchestration should have an operator
STATE_OPERATORS = {
consts.STRATEGY_STATE_PRE_CHECK: PreCheckState,
consts.STRATEGY_STATE_INSTALLING_LICENSE: InstallingLicenseState,
consts.STRATEGY_STATE_IMPORTING_LOAD: ImportingLoadState,
consts.STRATEGY_STATE_UPDATING_PATCHES: UpdatingPatchesState,
consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY:
FinishingPatchStrategyState,
consts.STRATEGY_STATE_STARTING_UPGRADE: StartingUpgradeState,
consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE:
TransferCACertificateState,
consts.STRATEGY_STATE_LOCKING_CONTROLLER_0: LockSimplexState,
consts.STRATEGY_STATE_LOCKING_CONTROLLER_1: LockDuplexState,
consts.STRATEGY_STATE_UPGRADING_SIMPLEX: UpgradingSimplexState,
consts.STRATEGY_STATE_UPGRADING_DUPLEX: UpgradingDuplexState,
consts.STRATEGY_STATE_MIGRATING_DATA: MigratingDataState,
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0:
SwactToController0State,
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1:
SwactToController1State,
consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_0: UnlockSimplexState,
consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1: UnlockDuplexState,
consts.STRATEGY_STATE_ACTIVATING_UPGRADE: ActivatingUpgradeState,
consts.STRATEGY_STATE_COMPLETING_UPGRADE: CompletingUpgradeState,
consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY:
CreatingVIMUpgradeStrategyState,
consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY:
ApplyingVIMUpgradeStrategyState,
consts.STRATEGY_STATE_DELETING_LOAD: DeletingLoadState,
}
def __init__(self, strategy_lock, audit_rpc_client):
super(SwUpgradeOrchThread, self).__init__(
strategy_lock,
audit_rpc_client,
consts.SW_UPDATE_TYPE_UPGRADE, # software update strategy type
vim.STRATEGY_NAME_SW_UPGRADE, # strategy type used by vim
consts.STRATEGY_STATE_PRE_CHECK) # starting state
# Initialize shared cache instances for the states that require them
self._shared_caches = SharedCacheRepository(consts.SW_UPDATE_TYPE_UPGRADE)
self._shared_caches.initialize_caches()
def trigger_audit(self):
"""Trigger an audit for upgrade (which is combined with patch audit)"""
self.audit_rpc_client.trigger_patch_audit(self.context)
def delete(self, sw_update_strategy):
# Restart caches for next strategy
self._shared_caches.initialize_caches()
super(SwUpgradeOrchThread, self).delete(sw_update_strategy)
def determine_state_operator(self, strategy_step):
state = super(SwUpgradeOrchThread, self).determine_state_operator(
strategy_step)
state.add_shared_caches(self._shared_caches)
return state

View File

@ -572,7 +572,7 @@ class DBAPISubcloudTest(base.DCManagerTestCase):
def test_create_sw_update_strategy(self):
sw_update_strategy = self.create_sw_update_strategy(
self.ctx,
type=consts.SW_UPDATE_TYPE_UPGRADE,
type=consts.SW_UPDATE_TYPE_SOFTWARE,
subcloud_apply_type=consts.SUBCLOUD_APPLY_TYPE_SERIAL,
max_parallel_subclouds=42,
stop_on_failure=False,
@ -582,7 +582,7 @@ class DBAPISubcloudTest(base.DCManagerTestCase):
new_sw_update_strategy = db_api.sw_update_strategy_get(self.ctx)
self.assertIsNotNone(new_sw_update_strategy)
self.assertEqual(consts.SW_UPDATE_TYPE_UPGRADE, new_sw_update_strategy.type)
self.assertEqual(consts.SW_UPDATE_TYPE_SOFTWARE, new_sw_update_strategy.type)
self.assertEqual(
consts.SUBCLOUD_APPLY_TYPE_SERIAL,
new_sw_update_strategy.subcloud_apply_type,

View File

@ -12,8 +12,6 @@ from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states import applying_vim_strategy
from dcmanager.tests.unit.fakes import FakeVimStrategy
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base import \
TestSwUpgradeState
STRATEGY_READY_TO_APPLY = FakeVimStrategy(state=vim.STATE_READY_TO_APPLY)
STRATEGY_APPLYING = FakeVimStrategy(state=vim.STATE_APPLYING)
@ -207,14 +205,3 @@ class ApplyingVIMStrategyMixin(object):
self.assert_step_updated(
self.strategy_step.subcloud_id, consts.STRATEGY_STATE_FAILED
)
class TestSwUpgradeApplyingVIMStrategyStage(
ApplyingVIMStrategyMixin, TestSwUpgradeState
):
def setUp(self):
super(TestSwUpgradeApplyingVIMStrategyStage, self).setUp()
self.set_state(
consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY,
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0,
)

View File

@ -11,8 +11,6 @@ from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states import creating_vim_strategy
from dcmanager.tests.unit.fakes import FakeVimStrategy
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
STRATEGY_BUILDING = FakeVimStrategy(state=vim.STATE_BUILDING)
STRATEGY_DONE_BUILDING = FakeVimStrategy(state=vim.STATE_READY_TO_APPLY)
@ -180,13 +178,3 @@ class CreatingVIMStrategyStageMixin(object):
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
class TestSwUpgradeCreatingVIMStrategyStage(CreatingVIMStrategyStageMixin,
TestSwUpgradeState):
"""Test Creating Vim Strategy for a SW upgrade"""
def setUp(self):
super(TestSwUpgradeCreatingVIMStrategyStage, self).setUp()
self.set_state(consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY,
consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY)

View File

@ -1,55 +0,0 @@
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.common.exceptions import InvalidParameterValue
from dcmanager.orchestrator.states.upgrade.cache.cache_specifications import \
REGION_ONE_PATCHING_CACHE_TYPE
from dcmanager.orchestrator.states.upgrade.cache.shared_cache_repository import \
SharedCacheRepository
from dcmanager.tests import base
CACHE_CLASS_PATH = 'dcmanager.orchestrator.states.upgrade.cache.' \
'shared_client_cache.SharedClientCache'
MOCK_REGION_ONE_PATCHES = {'applied_patch': {'sw_version': '17.07',
'repostate': 'Applied',
'patchstate': 'Applied'}
}
class TestSharedCacheRepo(base.DCManagerTestCase):
def setUp(self):
super(TestSharedCacheRepo, self).setUp()
# Return the same mock patches when reading from cache and from client
self.read_patch = mock.patch('%s.read' % CACHE_CLASS_PATH,
return_value=MOCK_REGION_ONE_PATCHES)
self.mock_read = self.read_patch.start()
# Initialize repository and instantiate caches
self.shared_cache = SharedCacheRepository(
operation_type=consts.SW_UPDATE_TYPE_UPGRADE)
self.shared_cache.initialize_caches()
def test_read_from_cache_should_use_shared_cache_if_present(self):
patches = self.shared_cache.read(REGION_ONE_PATCHING_CACHE_TYPE)
# Verify that cache instance was used to retrieve patches
assert set(patches.keys()) == set(MOCK_REGION_ONE_PATCHES.keys())
assert self.mock_read.call_count == 1
def test_read_from_cache_should_raise_exception_if_cache_type_invalid(self):
# Verify that an exception is raised if cache type does not correspond
# an existing one
self.assertRaises(InvalidParameterValue, self.shared_cache.read,
"Invalid type")
def tearDown(self):
super(TestSharedCacheRepo, self).tearDown()
self.read_patch.stop()

View File

@ -1,220 +0,0 @@
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import copy
import socket
import threading
import time
import mock
from dccommon.drivers.openstack.patching_v1 import PatchingClient
from dcmanager.common.exceptions import InvalidParameterValue
from dcmanager.orchestrator.states.upgrade.cache.cache_specifications import \
REGION_ONE_PATCHING_CACHE_SPECIFICATION
from dcmanager.orchestrator.states.upgrade.cache.cache_specifications import \
REGION_ONE_PATCHING_CACHE_TYPE
from dcmanager.orchestrator.states.upgrade.cache.shared_client_cache import \
SharedClientCache
from dcmanager.tests import base
CACHE_CLASS_PATH = 'dcmanager.orchestrator.states.upgrade.cache.' \
'shared_client_cache.SharedClientCache'
CACHE_CLIENT_PATH = 'dcmanager.orchestrator.states.upgrade.cache.clients'
BLOCKING_TIME_SECONDS = 0.01
WAIT_TIME_SECONDS = 0.005
MOCK_REGION_ONE_PATCHES = {'committed_patch': {'sw_version': '17.07',
'repostate': 'Committed',
'patchstate': 'Committed'},
'applied_patch': {'sw_version': '17.07',
'repostate': 'Applied',
'patchstate': 'Applied'},
'available_patch': {'sw_version': '17.07',
'repostate': 'Available',
'patchstate': 'Available'}
}
class TestRegionOnePatchingCache(base.DCManagerTestCase):
def setUp(self):
super(TestRegionOnePatchingCache, self).setUp()
# Mock patching client used by cache
self.mock_client = mock.MagicMock(spec=PatchingClient)
self.mock_client.query.side_effect = lambda timeout: MOCK_REGION_ONE_PATCHES
self.region_one_client_patch = mock.patch(
'%s.get_patching_client' % CACHE_CLIENT_PATH,
return_value=self.mock_client)
self.region_one_client_patch.start()
def test_read_should_use_cache(self):
# Retrieve patches from cache for the first time
patching_cache = SharedClientCache(REGION_ONE_PATCHING_CACHE_TYPE,
REGION_ONE_PATCHING_CACHE_SPECIFICATION)
patches = patching_cache.read()
# Check if mocked client was used to retrieve the patches
assert set(patches.keys()) == set(MOCK_REGION_ONE_PATCHES.keys())
assert self.mock_client.query.call_count == 1
# Retrieve patches from cache another time
patches = patching_cache.read()
# Check if cached patches were used instead of the client (client is not
# called again)
assert set(patches.keys()) == set(MOCK_REGION_ONE_PATCHES.keys())
assert self.mock_client.query.call_count == 1
def test_read_should_block_concurrent_calls(self):
# Alter mock client to block the query and hold the cache lock for a while
self.mock_client.query.side_effect = \
lambda timeout: self._blocking_mock_client_query()
patching_cache = SharedClientCache(REGION_ONE_PATCHING_CACHE_TYPE,
REGION_ONE_PATCHING_CACHE_SPECIFICATION)
# Start thread to retrieve patches from cache for the first time
# Call from a separate method to add the client mock to the thread context
thread = threading.Thread(target=self._retrieve_patches_from_thread,
args=(patching_cache,))
thread.start()
# Wait for a short while before second call, so that the first call acquires
# lock
time.sleep(WAIT_TIME_SECONDS)
# By this point, the first call should have reached the blocking query
assert self.mock_client.query.call_count == 1
# Check that a second call retrieves patches directly (does not call client
# again)
patches = patching_cache.read()
assert set(patches.keys()) == set(MOCK_REGION_ONE_PATCHES.keys())
assert self.mock_client.query.call_count == 1
def test_read_should_retry_if_specified(self):
# First attempt to read from client should fail, second one should succeed
attempt_sequence = [self._blocking_mock_client_error,
self._blocking_mock_client_query]
self.mock_client.query.side_effect = \
lambda timeout: attempt_sequence.pop(0)()
# Specify two attempts for the patching client read
cache_specification = copy.deepcopy(REGION_ONE_PATCHING_CACHE_SPECIFICATION)
cache_specification.max_attempts = 2
cache_specification.retry_sleep_msecs = WAIT_TIME_SECONDS * 1000
patching_cache = SharedClientCache(REGION_ONE_PATCHING_CACHE_TYPE,
cache_specification)
# Start thread to retrieve patches from cache for the first time
# Call from a separate method to add the client mock to the thread context
thread = threading.Thread(target=self._retrieve_patches_from_thread,
args=(patching_cache,))
thread.start()
# After a while, the first call should try to retrieve data for the first
# time
time.sleep(WAIT_TIME_SECONDS)
assert self.mock_client.query.call_count == 1
# After some more time, the first attempt should fail, and the call should
# try a second time
time.sleep(BLOCKING_TIME_SECONDS + WAIT_TIME_SECONDS)
assert self.mock_client.query.call_count == 2
# Start a second read while the first one is retrying
patches = patching_cache.read()
# Second read should retrieve data directly from the cache (client should
# not be called)
assert set(patches.keys()) == set(MOCK_REGION_ONE_PATCHES.keys())
assert self.mock_client.query.call_count == 2
def test_read_should_fail_concurrent_calls_if_client_fails(self):
# Read from client should only succeed on third attempt
attempt_sequence = [self._blocking_mock_client_error,
self._blocking_mock_client_error,
self._blocking_mock_client_query]
self.mock_client.query.side_effect = \
lambda timeout: attempt_sequence.pop(0)()
# Specify only two attempts for the patching client read
# Since client read should only succeed on third attempt, fetch should fail
cache_specification = copy.deepcopy(REGION_ONE_PATCHING_CACHE_SPECIFICATION)
cache_specification.max_attempts = 2
cache_specification.retry_sleep_msecs = WAIT_TIME_SECONDS * 1000
patching_cache = SharedClientCache(REGION_ONE_PATCHING_CACHE_TYPE,
cache_specification)
# Start thread to retrieve patches from cache for the first time
# Call from a separate method to add the client mock to the thread context
thread = threading.Thread(target=self._retrieve_patches_from_thread,
args=(patching_cache,))
thread.start()
# Wait for first call to have the client lock
time.sleep(WAIT_TIME_SECONDS)
assert self.mock_client.query.call_count == 1
# Start a second read while the first one is reading from client
# Since client read should fail, reading from cache should fail too
self.assertRaises(RuntimeError, patching_cache.read)
# Check that second call did not read from client (number of calls did not
# change)
assert self.mock_client.query.call_count == 2
def test_read_should_filter_by_given_state(self):
patching_cache = SharedClientCache(REGION_ONE_PATCHING_CACHE_TYPE,
REGION_ONE_PATCHING_CACHE_SPECIFICATION)
# Retrieve available patches from cache and verify results
assert set(patching_cache.read(state='Available').keys()) == {
'available_patch'}
assert self.mock_client.query.call_count == 1
# Retrieve applied patches from cache and verify results
assert set(patching_cache.read(state='Applied').keys()) == {'applied_patch'}
assert self.mock_client.query.call_count == 1
# Retrieve committed patches from cache and verify results
assert set(patching_cache.read(state='Committed').keys()) == {
'committed_patch'}
assert self.mock_client.query.call_count == 1
def test_read_should_reject_invalid_filter_parameter(self):
patching_cache = SharedClientCache(REGION_ONE_PATCHING_CACHE_TYPE,
REGION_ONE_PATCHING_CACHE_SPECIFICATION)
self.assertRaises(InvalidParameterValue, patching_cache.read,
invalid_param='test')
def _retrieve_patches_from_thread(self, patching_cache):
patcher = mock.patch('%s.get_patching_client' % CACHE_CLIENT_PATH,
return_value=self.mock_client)
patcher.start()
patching_cache.read()
patcher.stop()
@staticmethod
def _blocking_mock_client_query():
time.sleep(BLOCKING_TIME_SECONDS)
return MOCK_REGION_ONE_PATCHES
@staticmethod
def _blocking_mock_client_error():
time.sleep(BLOCKING_TIME_SECONDS)
raise socket.timeout
def tearDown(self):
super(TestRegionOnePatchingCache, self).tearDown()
self.region_one_client_patch.stop()

View File

@ -1,239 +0,0 @@
#
# Copyright (c) 2020-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dcmanager.common import consts
from dcmanager.db.sqlalchemy import api as db_api
from dcmanager.orchestrator.states.upgrade import activating
from dcmanager.tests.unit.orchestrator.states.fakes import FakeUpgrade
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
VALID_UPGRADE = FakeUpgrade(state='imported')
ACTIVATING_UPGRADE = FakeUpgrade(state='activation-requested')
ACTIVATING_FAILED = FakeUpgrade(state='activation-failed')
ALREADY_ACTIVATED_UPGRADE = FakeUpgrade(state='activation-complete')
@mock.patch(
"dcmanager.orchestrator.states.upgrade.activating.DEFAULT_MAX_QUERIES", 5)
@mock.patch(
"dcmanager.orchestrator.states.upgrade.activating.DEFAULT_SLEEP_DURATION", 1)
@mock.patch(
"dcmanager.orchestrator.states.upgrade.activating.MAX_FAILED_RETRIES", 3)
class TestSwUpgradeActivatingStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeActivatingStage, self).setUp()
# next state after activating an upgrade is 'completing'
self.on_success_state = consts.STRATEGY_STATE_COMPLETING_UPGRADE
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.upgrade_activate = mock.MagicMock()
self.sysinv_client.get_upgrades = mock.MagicMock()
def test_upgrade_subcloud_activating_upgrade_failure(self):
"""Test the activating upgrade API call fails."""
# upgrade_activate will only be called if an appropriate upgrade exists
self.sysinv_client.get_upgrades.return_value = [VALID_UPGRADE, ]
# API call raises an exception when it is rejected
self.sysinv_client.upgrade_activate.side_effect = \
Exception("upgrade activate failed for some reason")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the expected API call was invoked
self.sysinv_client.upgrade_activate.assert_called()
# Verify the state moves to 'failed'
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_activating_upgrade_success(self):
"""Test the activating upgrade step succeeds."""
# upgrade_activate will only be called if an appropriate upgrade exists
# first call is before the API call
# loops once waiting for activating to complete
# final query is the activation having completed
self.sysinv_client.get_upgrades.side_effect = [
[VALID_UPGRADE, ],
[ACTIVATING_UPGRADE, ],
[ALREADY_ACTIVATED_UPGRADE], ]
# API call will not raise an exception, and will return an upgrade
self.sysinv_client.upgrade_activate.return_value = ACTIVATING_UPGRADE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call was invoked
self.sysinv_client.upgrade_activate.assert_called()
# verify the DB update was invoked
updated_subcloud = db_api.subcloud_get(self.ctx,
self.subcloud.id)
self.assertEqual(
updated_subcloud.deploy_status, consts.DEPLOY_STATE_UPGRADE_ACTIVATED
)
# On success, the state should be updated to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_activating_upgrade_skip_already_activated(self):
"""Test the activating upgrade step skipped if already activated."""
# upgrade_activate will only be called if an appropriate upgrade exists
self.sysinv_client.get_upgrades.return_value = \
[ALREADY_ACTIVATED_UPGRADE, ]
# API call will not be invoked, so no need to mock it
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# upgrade is already in one of the activating states so skip activating
self.sysinv_client.upgrade_activate.assert_not_called()
# On success, the state is set to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_activating_upgrade_times_out(self):
"""Test the activating upgrade step should succeed but times out."""
# upgrade_activate will only be called if an appropriate upgrade exists
# first call is before the API call
# remaining loops are wating for the activation to complete
self.sysinv_client.get_upgrades.side_effect = itertools.chain(
[[VALID_UPGRADE, ], ],
itertools.repeat([ACTIVATING_UPGRADE, ]))
# API call will not raise an exception, and will return an upgrade
self.sysinv_client.upgrade_activate.return_value = ACTIVATING_UPGRADE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call was invoked
self.sysinv_client.upgrade_activate.assert_called()
# verify the get_upgrades query was invoked: 1 + max_attempts times
self.assertEqual(activating.DEFAULT_MAX_QUERIES + 1,
self.sysinv_client.get_upgrades.call_count)
# Times out. state goes to failed
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_activating_upgrade_retries(self):
"""Test the activating upgrade step fails but succeeds on retry."""
# upgrade_activate will only be called if an appropriate upgrade exists
# first call is before the API call
# then goes to activating
# then activating fails which triggers a retry
# then goes to activating
# then goes to success
self.sysinv_client.get_upgrades.side_effect = [
[VALID_UPGRADE, ],
[ACTIVATING_UPGRADE, ],
[ACTIVATING_FAILED, ],
[ACTIVATING_UPGRADE, ],
[ALREADY_ACTIVATED_UPGRADE, ]
]
# API call will not raise an exception, and will return an upgrade
self.sysinv_client.upgrade_activate.return_value = ACTIVATING_UPGRADE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call was invoked twice
self.assertEqual(2, self.sysinv_client.upgrade_activate.call_count)
# Even though it failed once, the retry passed
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_activating_upgrade_exceed_max_retries(self):
"""Test the activating upgrade step should succeed but times out."""
# upgrade_activate will only be called if an appropriate upgrade exists
# first call is before the API call
# remaining loops are retrying due to the activationg fails
# the get_upgrades query is invoked less than max query limit
self.sysinv_client.get_upgrades.side_effect = itertools.chain(
[[VALID_UPGRADE, ], ],
itertools.repeat([ACTIVATING_FAILED, ]))
# API call will not raise an exception, and will return an upgrade
self.sysinv_client.upgrade_activate.return_value = ACTIVATING_UPGRADE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call was invoked: 1 + MAX_FAILED_RETRIES times
self.assertEqual(activating.MAX_FAILED_RETRIES + 1,
self.sysinv_client.upgrade_activate.call_count)
# verify get_upgrades query was invoked: 1 + MAX_FAILED_RETRIES times
self.assertEqual(activating.MAX_FAILED_RETRIES + 1,
self.sysinv_client.get_upgrades.call_count)
# Exceeds maximum retries. state goes to failed
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_activating_retries_until_times_out(self):
"""Test the activating upgrade fails due to reaches the max retries."""
# upgrade_activate will only be called if an appropriate upgrade exists
# first call is before the API call
# then goes to three times of activating
# remaining loops are activate failed
# the API call is invoked less than the maxmium retries + 1
# the states goes to failed due to times out rather than max retries
self.sysinv_client.get_upgrades.side_effect = itertools.chain(
[[VALID_UPGRADE, ],
[ACTIVATING_UPGRADE, ],
[ACTIVATING_UPGRADE, ],
[ACTIVATING_UPGRADE, ], ],
itertools.repeat([ACTIVATING_FAILED, ]))
# API call will not raise an exception, and will return an upgrade
self.sysinv_client.upgrade_activate.return_value = ACTIVATING_UPGRADE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call was invoked was invoked:
# max_attempts + 1 - 3 times, 3 times for the in progress status
self.assertEqual(activating.DEFAULT_MAX_QUERIES - 2,
self.sysinv_client.upgrade_activate.call_count)
# verify the get_upgrades query was invoked: 2 + max_attempts times
self.assertEqual(activating.DEFAULT_MAX_QUERIES + 1,
self.sysinv_client.get_upgrades.call_count)
# Times out. state goes to failed
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@ -1,35 +0,0 @@
#
# Copyright (c) 2020, 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.test_base import TestSwUpdate
CACHE_CLIENT_PATH = 'dcmanager.orchestrator.states.upgrade.cache.clients'
class TestSwUpgradeState(TestSwUpdate):
# Setting DEFAULT_STRATEGY_TYPE to upgrade will setup the upgrade
# orchestration worker, and will mock away the other orch threads
DEFAULT_STRATEGY_TYPE = consts.SW_UPDATE_TYPE_UPGRADE
def setUp(self):
super(TestSwUpgradeState, self).setUp()
# Modify cache helpers to return client mocks
self.patch_cache_client_mock = mock.patch('%s.get_patching_client' %
CACHE_CLIENT_PATH,
return_value=self.patching_client)
self.sysinv_cache_client_mock = mock.patch('%s.get_sysinv_client' %
CACHE_CLIENT_PATH,
return_value=self.sysinv_client)
self.patch_cache_client_mock.start()
self.sysinv_cache_client_mock.start()
def tearDown(self):
self.patch_cache_client_mock.stop()
self.sysinv_cache_client_mock.stop()
super(TestSwUpgradeState, self).tearDown()

View File

@ -1,145 +0,0 @@
#
# Copyright (c) 2020, 2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states.upgrade import completing
from dcmanager.tests.unit.orchestrator.states.fakes import FakeSystem
from dcmanager.tests.unit.orchestrator.states.fakes import FakeUpgrade
from dcmanager.tests.unit.orchestrator.states.fakes import UPGRADED_VERSION
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base import \
TestSwUpgradeState
VALID_UPGRADE = FakeUpgrade(state='activation-complete')
INVALID_UPGRADE = FakeUpgrade(state='aborting')
UPGRADE_COMPLETING = FakeUpgrade(state='completing')
@mock.patch(
"dcmanager.orchestrator.states.upgrade.completing.DEFAULT_MAX_QUERIES", 3)
@mock.patch(
"dcmanager.orchestrator.states.upgrade.completing.DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeCompletingStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeCompletingStage, self).setUp()
# next state after completing an upgrade is 'complete'
self.on_success_state = consts.STRATEGY_STATE_DELETING_LOAD
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_COMPLETING_UPGRADE)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.upgrade_complete = mock.MagicMock()
self.sysinv_client.get_upgrades = mock.MagicMock()
self.sysinv_client.get_system = mock.MagicMock()
self.sysinv_client.get_system.return_value = FakeSystem()
def test_upgrade_subcloud_completing_upgrade_failure(self):
"""Test the completing upgrade API call fails."""
# upgrade_complete will only be called if an appropriate upgrade exists
self.sysinv_client.get_upgrades.return_value = [VALID_UPGRADE, ]
# API call raises an exception when it is rejected
self.sysinv_client.upgrade_complete.side_effect = \
Exception("upgrade complete failed for some reason")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the expected API call was invoked
self.sysinv_client.upgrade_complete.assert_called()
# Verify the state moves to 'failed'
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_completing_upgrade_success(self):
"""Test the completing upgrade step succeeds."""
# upgrade_complete will only be called if an appropriate upgrade exists
# it will be re-queried until no upgrade exists
self.sysinv_client.get_upgrades.side_effect = [
[VALID_UPGRADE, ],
[]
]
# API call will not raise an exception. It will delete the upgrade
self.sysinv_client.upgrade_complete.return_value = UPGRADE_COMPLETING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call was invoked
self.sysinv_client.upgrade_complete.assert_called()
# verify deploy_status of subcloud is 'complete'
self.assert_subcloud_deploy_status(self.subcloud.id,
consts.DEPLOY_STATE_DONE)
# verify software_version of subcloud is the upgraded version
self.assert_subcloud_software_version(self.subcloud.id,
UPGRADED_VERSION)
# On success, the state should be updated to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_completing_upgrade_skip_already_completed(self):
"""Test the completing upgrade step skipped if already completed."""
# upgrade_complete will only be called if an appropriate upgrade exists
# If the upgrade has been deleted, there is nothing to complete
self.sysinv_client.get_upgrades.return_value = []
# API call will not be invoked, so no need to mock it
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# upgrade is already in one of the completing states so skip completing
self.sysinv_client.upgrade_complete.assert_not_called()
# verify deploy_status of subcloud is 'complete'
self.assert_subcloud_deploy_status(self.subcloud.id,
consts.DEPLOY_STATE_DONE)
# verify software_version of subcloud is the upgraded version
self.assert_subcloud_software_version(self.subcloud.id,
UPGRADED_VERSION)
# On success, the state is set to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_completing_upgrade_timeout(self):
"""Test the completing upgrade step succeeds but times out."""
# upgrade_complete will only be called if an appropriate upgrade exists
self.sysinv_client.get_upgrades.return_value = [VALID_UPGRADE, ]
# API call will not raise an exception. It will delete the upgrade
self.sysinv_client.upgrade_complete.return_value = UPGRADE_COMPLETING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call was invoked
self.sysinv_client.upgrade_complete.assert_called()
# verify the get_upgrades query was invoked: 1 + max_attempts times
self.assertEqual(completing.DEFAULT_MAX_QUERIES + 1,
self.sysinv_client.get_upgrades.call_count)
# Verify the state moves to 'failed' due to the timeout
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@ -1,120 +0,0 @@
#
# Copyright (c) 2020, 2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states.upgrade import deleting_load
from dcmanager.tests.unit.orchestrator.states.fakes import FakeLoad
from dcmanager.tests.unit.orchestrator.states.fakes import PREVIOUS_PREVIOUS_VERSION
from dcmanager.tests.unit.orchestrator.states.fakes import PREVIOUS_VERSION
from dcmanager.tests.unit.orchestrator.states.fakes import UPGRADED_VERSION
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base import \
TestSwUpgradeState
PREVIOUS_LOAD = FakeLoad(1, software_version=PREVIOUS_VERSION,
state='imported')
UPGRADED_LOAD = FakeLoad(2, software_version=UPGRADED_VERSION,
state='active')
ONE_LOAD_RESPONSE = [UPGRADED_LOAD, ]
TWO_LOAD_RESPONSE = [PREVIOUS_LOAD, UPGRADED_LOAD, ]
SUCCESS_DELETE_RESPONSE = {
'id': 0,
'uuid': 'aaa4b4c6-8536-41f6-87ea-211d208a723b',
'compatible_version': PREVIOUS_PREVIOUS_VERSION,
'required_patches': '',
'software_version': PREVIOUS_VERSION,
'state': 'deleting',
'created_at': '2020-06-01 12:12:12+00:00',
'updated_at': None
}
@mock.patch("dcmanager.orchestrator.states.upgrade."
"deleting_load.DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade."
"deleting_load.DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeDeletingLoadStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeDeletingLoadStage, self).setUp()
# next state after 'importing load' is 'starting upgrade'
self.on_success_state = consts.STRATEGY_STATE_COMPLETE
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_DELETING_LOAD)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_loads = mock.MagicMock()
self.sysinv_client.delete_load = mock.MagicMock()
def test_upgrade_subcloud_deleting_N_load(self):
"""Test the deleting load step succeeds with deleting N load
After the subcloud upgrade to N+1 release is complete, the N release
load is removed.
"""
# Mock get_loads API to return 2 loads in the first call and 1 load
# in the subsequent call.
self.sysinv_client.get_loads.side_effect = [
TWO_LOAD_RESPONSE, ONE_LOAD_RESPONSE, ]
# Simulate a delete_load API success on the subcloud.
self.sysinv_client.delete_load.return_value = \
SUCCESS_DELETE_RESPONSE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the delete load API call was invoked
self.sysinv_client.delete_load.assert_called()
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_deleting_load_timeout(self):
"""Test delete_load invoked and fails if times out
The subcloud still has an N-1 load that needs to be removed before
the N+1 load can be imported. The API call to delete this old load
succeeds, however the state times out waiting for the load to be
removed.
"""
# Simulate the target load has not been imported yet on the subcloud
self.sysinv_client.get_loads.return_value = TWO_LOAD_RESPONSE
# Simulate a delete_load API success on the subcloud.
self.sysinv_client.delete_load.return_value = \
SUCCESS_DELETE_RESPONSE
# mock the get_loads queries to return 2 loads
self.sysinv_client.get_loads.side_effect = \
itertools.repeat(TWO_LOAD_RESPONSE)
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was invoked
self.sysinv_client.delete_load.assert_called()
# verify the get_loads query was invoked 1 + max_attempts times
self.assertEqual(deleting_load.DEFAULT_MAX_QUERIES + 1,
self.sysinv_client.get_loads.call_count)
# verify that state failed due to the delete load never finishing
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@ -1,88 +0,0 @@
#
# Copyright (c) 2020, 2023-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
REGION_ONE_PATCHES = {'DC.2': {'sw_version': '17.07',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.3': {'sw_version': '17.07',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.4': {'sw_version': '17.07',
'repostate': 'Committed',
'patchstate': 'Committed'},
}
SUBCLOUD_PATCHES = {'DC.1': {'sw_version': '17.07',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '17.07',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.3': {'sw_version': '17.07',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.4': {'sw_version': '17.07',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.5': {'sw_version': '17.07',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.6': {'sw_version': '17.07',
'repostate': 'Available',
'patchstate': 'Available'},
}
@mock.patch("dcmanager.orchestrator.states.upgrade.finishing_patch_strategy"
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.finishing_patch_strategy"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeFinishingPatchStrategyStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeFinishingPatchStrategyStage, self).setUp()
# next state after 'finishing patch strategy' is 'starting upgrade'
self.on_success_state = consts.STRATEGY_STATE_STARTING_UPGRADE
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY)
# Add mock API endpoints for patching client calls invoked by this state
self.patching_client.query = mock.MagicMock()
self.patching_client.delete = mock.MagicMock()
self.patching_client.commit = mock.MagicMock()
def test_finishing_patch_strategy_success(self):
"""Test finishing_patch_strategy where the API call succeeds."""
self.patching_client.query.side_effect = [
REGION_ONE_PATCHES,
SUBCLOUD_PATCHES,
]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
call_args, _ = self.patching_client.delete.call_args_list[0]
self.assertItemsEqual(['DC.5', 'DC.6'], call_args[0])
call_args, _ = self.patching_client.commit.call_args_list[0]
self.assertItemsEqual(['DC.2', 'DC.3'], call_args[0])
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)

View File

@ -1,500 +0,0 @@
#
# Copyright (c) 2020-2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dccommon.exceptions import LoadMaxReached
from dcmanager.common import consts
from dcmanager.common.exceptions import VaultLoadMissingError
from dcmanager.orchestrator.states.upgrade import importing_load
from dcmanager.tests.unit.orchestrator.states.fakes import FakeLoad
from dcmanager.tests.unit.orchestrator.states.fakes import FakeSystem
from dcmanager.tests.unit.orchestrator.states.fakes import PREVIOUS_PREVIOUS_VERSION
from dcmanager.tests.unit.orchestrator.states.fakes import PREVIOUS_VERSION
from dcmanager.tests.unit.orchestrator.states.fakes import UPGRADED_VERSION
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
PREVIOUS_PREVIOUS_LOAD = FakeLoad(0, software_version=PREVIOUS_PREVIOUS_VERSION,
state='imported')
PREVIOUS_LOAD = FakeLoad(1, software_version=PREVIOUS_VERSION,
state='active')
UPGRADED_LOAD = FakeLoad(2,
compatible_version=PREVIOUS_VERSION,
software_version=UPGRADED_VERSION)
# Use the same fields for the IMPORTING_LOAD as UPGRADED_LOAD
IMPORTING_LOAD = FakeLoad(UPGRADED_LOAD.id,
state='importing',
compatible_version=UPGRADED_LOAD.compatible_version,
software_version=UPGRADED_LOAD.software_version)
# Use the same fields for the IMPORTED_LOAD as UPGRADED_LOAD
IMPORTED_LOAD = FakeLoad(UPGRADED_LOAD.id,
state='imported',
compatible_version=UPGRADED_LOAD.compatible_version,
software_version=UPGRADED_LOAD.software_version)
DEST_LOAD_IMPORTED = [IMPORTED_LOAD, ]
DEST_LOAD_EXISTS = [UPGRADED_LOAD, ]
DEST_LOAD_MISSING = [PREVIOUS_LOAD, ]
DEST_LOAD_MISSING_2_LOADS = [PREVIOUS_LOAD, PREVIOUS_PREVIOUS_LOAD, ]
FAKE_ISO = '/opt/dc-vault/loads/' + UPGRADED_VERSION + '/bootimage.iso'
FAKE_SIG = '/opt/dc-vault/loads/' + UPGRADED_VERSION + '/bootimage.sig'
FAILED_IMPORT_RESPONSE = 'kaboom'
# To simulate a response where a database record has already been created
# but the state was set to 'error'.
FAILED_IMPORT_RESPONSE_PROCESSING_ERROR = FakeLoad.from_dict({
'obj_id': 2,
'compatible_version': PREVIOUS_VERSION,
'required_patches': '',
'software_version': UPGRADED_VERSION,
'state': 'error',
'created_at': '2020-06-01 12:12:12+00:00',
'updated_at': None
})
SUCCESS_IMPORTING_RESPONSE = FakeLoad.from_dict({
'obj_id': 2,
'compatible_version': PREVIOUS_VERSION,
'required_patches': '',
'software_version': UPGRADED_VERSION,
'state': 'importing',
'created_at': '2020-06-01 12:12:12+00:00',
'updated_at': None
})
SUCCESS_IMPORT_METADATA_RESPONSE = FakeLoad.from_dict({
'obj_id': 2,
'compatible_version': PREVIOUS_VERSION,
'required_patches': '',
'software_version': UPGRADED_VERSION,
'state': 'imported-metadata',
'created_at': '2020-06-01 12:12:12+00:00',
'updated_at': None
})
SUCCESS_DELETE_RESPONSE = {
'id': 0,
'uuid': 'aaa4b4c6-8536-41f6-87ea-211d208a723b',
'compatible_version': PREVIOUS_VERSION,
'required_patches': '',
'software_version': PREVIOUS_PREVIOUS_VERSION,
'state': 'deleting',
'created_at': '2020-06-01 12:12:12+00:00',
'updated_at': None
}
@mock.patch("dcmanager.orchestrator.states.upgrade.importing_load."
"DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.importing_load."
"DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeImportingLoadStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeImportingLoadStage, self).setUp()
# next state after 'importing load' is 'updating patches'
self.on_success_state = consts.STRATEGY_STATE_UPDATING_PATCHES
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_IMPORTING_LOAD)
# Mock the get_vault_load_files utility method
p = mock.patch(
'dcmanager.common.utils.get_vault_load_files')
self.mock_vault_files = p.start()
# simulate get_vault_load_files finding the iso and sig in the vault
self.mock_vault_files.return_value = (FAKE_ISO, FAKE_SIG)
self.addCleanup(p.stop)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_system = mock.MagicMock()
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_DUPLEX
self.sysinv_client.get_system.return_value = system_values
self.sysinv_client.get_loads = mock.MagicMock()
self.sysinv_client.get_load = mock.MagicMock()
self.sysinv_client.delete_load = mock.MagicMock()
self.sysinv_client.import_load = mock.MagicMock()
self.sysinv_client.import_load_metadata = mock.MagicMock()
def test_upgrade_subcloud_importing_load_success(self):
"""Test the importing load step succeeds.
The load will be imported on the subcloud when the subcloud does not
have the load already imported, and the API call succeeds to import it.
"""
# Simulate the target load has not been imported yet on the subcloud
self.sysinv_client.get_loads.return_value = DEST_LOAD_MISSING
# Simulate an API success on the subcloud.
self.sysinv_client.import_load.return_value = \
SUCCESS_IMPORTING_RESPONSE
# mock the get_load queries to return 'imported' and not 'importing'
self.sysinv_client.get_load.return_value = IMPORTED_LOAD
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was invoked
self.sysinv_client.import_load.assert_called()
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_importing_get_load_retry(self):
"""Test importing load where HTTP error occurs after successful API call."""
# Simulate the target load has not been imported yet on the subcloud
self.sysinv_client.get_loads.return_value = DEST_LOAD_MISSING
# Simulate an API success on the subcloud.
self.sysinv_client.import_load.return_value = \
SUCCESS_IMPORTING_RESPONSE
# Simulate an HTTP exception thrown
self.sysinv_client.get_load.side_effect = \
[IMPORTING_LOAD,
Exception("HTTPBadRequest: this is a fake exception"),
IMPORTED_LOAD]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was invoked
self.sysinv_client.import_load.assert_called()
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_sx_subcloud_import_load_vault_load_abort(self):
"""Import_load_metadata retry invoked and strategy continues as expected"""
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_SIMPLEX
self.sysinv_client.get_system.return_value = system_values
# Two get load calls. One to the subcloud one to the system controller
self.sysinv_client.get_loads.side_effect = [
DEST_LOAD_MISSING, DEST_LOAD_EXISTS, ]
self.sysinv_client.import_load_metadata.side_effect = \
[SUCCESS_IMPORT_METADATA_RESPONSE,
Exception("HTTPBadRequest: this is a fake exception"),
VaultLoadMissingError(file_type='.iso', vault_dir='/mock/vault/'),
SUCCESS_IMPORT_METADATA_RESPONSE]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load metadata API call was invoked
self.sysinv_client.import_load_metadata.assert_called()
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_dx_importing_import_load_retry(self):
"""Importing load on AIO-DX where import_load HTTP error requires retry."""
# Simulate the target load has not been imported yet on the subcloud
self.sysinv_client.get_loads.return_value = DEST_LOAD_MISSING
self.sysinv_client.import_load.side_effect = \
[Exception("HTTPBadRequest: this is a fake exception"),
SUCCESS_IMPORTING_RESPONSE]
# mock the get_load queries to return 'imported' and not 'importing'
self.sysinv_client.get_load.return_value = IMPORTED_LOAD
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was invoked
self.sysinv_client.import_load.assert_called()
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_dx_import_load_max_retry(self):
"""Test importing load on AIO-DX after import maximum load."""
# Get load calls to the subcloud and systemcontroller
self.sysinv_client.get_loads.side_effect = [
DEST_LOAD_MISSING, DEST_LOAD_IMPORTED, DEST_LOAD_IMPORTED]
self.sysinv_client.import_load.side_effect = \
[Exception("HTTPBadRequest: this is a fake exception"),
LoadMaxReached(region_name='subcloud1')]
# mock the get_load queries to return 'imported' and not 'importing'
self.sysinv_client.get_load.return_value = IMPORTED_LOAD
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was invoked
self.sysinv_client.import_load.assert_called()
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_importing_load_with_old_load_success(self):
"""Test the importing load step succeeds with existing old load
The old (N-1) load already exists and is removed before importing the
new (N+1) load. Both the delete_load and import_load call succeed.
"""
# Simulate the target load has not been imported yet on the subcloud
# Mock get_loads API to return 2 loads in the first call and 1 load
# in subsequent call.
self.sysinv_client.get_loads.side_effect = [
DEST_LOAD_MISSING_2_LOADS, DEST_LOAD_MISSING, ]
# Simulate a delete_load API success on the subcloud.
self.sysinv_client.delete_load.return_value = \
SUCCESS_DELETE_RESPONSE
# Simulate an API success on the subcloud.
self.sysinv_client.import_load.return_value = \
SUCCESS_IMPORTING_RESPONSE
# mock the get_load queries to return 'imported' and not 'importing'
self.sysinv_client.get_load.return_value = IMPORTED_LOAD
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the delete load API call was invoked
self.sysinv_client.delete_load.assert_called()
# verify the import load API call was invoked
self.sysinv_client.import_load.assert_called()
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_importing_load_fails_missing_vault_files(self):
"""Test importing load fails when files are not in the vault."""
self.mock_vault_files.side_effect = \
VaultLoadMissingError(file_type='.iso', vault_dir='/mock/vault/')
# Simulate the target load has not been imported yet on the subcloud
self.sysinv_client.get_loads.return_value = DEST_LOAD_MISSING
# import_load will not be invoked, so nothing to mock for it
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was never invoked
self.sysinv_client.import_load.assert_not_called()
# Verify a failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_importing_load_skip_existing(self):
"""Test the importing load step skipped due to load already there"""
# Simulate the target load has been previously imported on the subcloud
self.sysinv_client.get_loads.return_value = DEST_LOAD_EXISTS
# import_load will not be invoked, so nothing to mock for it
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# The import_load should not have been attempted
self.sysinv_client.import_load.assert_not_called()
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_importing_load_general_failure(self):
"""Test import_load invoked and fails due to general failure condition
The API call returns no new load data. This scenario can be
observed following a connection, disk space, sematic check or load
validation failure.
"""
# Simulate the target load has not been imported yet on the subcloud
self.sysinv_client.get_loads.return_value = DEST_LOAD_MISSING
# Simulate an API failure on the subcloud.
self.sysinv_client.import_load.return_value = FAILED_IMPORT_RESPONSE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was invoked
self.sysinv_client.import_load.assert_called()
# Verify a failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_importing_load_processing_error(self):
"""Test import_load invoked and fails due to error state
The API call succeeds, however subsequent get_load call
returns a load in error state. This scenario can be observed
if sysinv conductor fails to process the import request in
the background.
"""
# Simulate the target load has not been imported yet on the subcloud
self.sysinv_client.get_loads.return_value = DEST_LOAD_MISSING
# Simulate an API success on the subclould.
self.sysinv_client.import_load.return_value = SUCCESS_IMPORTING_RESPONSE
# mock the get_load queries to return 'error' state load data
self.sysinv_client.get_load.return_value = \
FAILED_IMPORT_RESPONSE_PROCESSING_ERROR
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was invoked
self.sysinv_client.import_load.assert_called()
# Verify a failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_importing_load_timeout(self):
"""Test import_load invoked and fails if times out before 'imported'
The API call succeeds, however the state times out waiting for
load state to transition from 'importing' to 'imported'
"""
# Simulate the target load has not been imported yet on the subcloud
self.sysinv_client.get_loads.return_value = DEST_LOAD_MISSING
# Simulate an API success on the subcloud.
self.sysinv_client.import_load.return_value = \
SUCCESS_IMPORTING_RESPONSE
# mock the get_load queries to return 'importing' and not 'imported'
self.sysinv_client.get_load.side_effect = \
itertools.repeat(IMPORTING_LOAD)
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was invoked
self.sysinv_client.import_load.assert_called()
# verify the query was invoked max_attempts times
self.assertEqual(importing_load.DEFAULT_MAX_QUERIES,
self.sysinv_client.get_load.call_count)
# verify that state failed due to the import_load never finishing
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_deleting_load_timeout(self):
"""Test delete_load invoked and fails if times out
The subcloud still has an N-1 load that needs to be removed before
the N+1 load can be imported. The API call to delete this old load
succeeds, however the state times out waiting for the load to be
removed.
"""
# Simulate the target load has not been imported yet on the subcloud
self.sysinv_client.get_loads.return_value = DEST_LOAD_MISSING_2_LOADS
# Simulate a delete_load API success on the subcloud.
self.sysinv_client.delete_load.return_value = \
SUCCESS_DELETE_RESPONSE
# mock the get_loads queries to return 2 loads
self.sysinv_client.get_loads.side_effect = \
itertools.repeat(DEST_LOAD_MISSING_2_LOADS)
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load API call was invoked
self.sysinv_client.delete_load.assert_called()
# verify the get_loads query was invoked 1 + max_attempts times
self.assertEqual(importing_load.DEFAULT_MAX_QUERIES + 1,
self.sysinv_client.get_loads.call_count)
# verify that state failed due to the delete load never finishing
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_sx_subcloud_import_success(self):
"""Test import_load_metadata invoked and strategy continues as expected"""
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_SIMPLEX
self.sysinv_client.get_system.return_value = system_values
# Two get load calls. One to the subcloud one to the system controller
self.sysinv_client.get_loads.side_effect = [
DEST_LOAD_MISSING, DEST_LOAD_EXISTS, ]
# Simulate an API success on the subcloud.
self.sysinv_client.import_load_metadata.return_value = \
SUCCESS_IMPORT_METADATA_RESPONSE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load metadata API call was invoked
self.sysinv_client.import_load_metadata.assert_called()
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_sx_subcloud_import_failure(self):
"""Test when import_load_metadata fails the strategy exits"""
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_SIMPLEX
self.sysinv_client.get_system.return_value = system_values
# Two get load calls. One to the subcloud one to the system controller
self.sysinv_client.get_loads.side_effect = [
DEST_LOAD_MISSING, DEST_LOAD_EXISTS, ]
# Simulate an API failure on the subcloud.
self.sysinv_client.import_load_metadata.side_effect = \
Exception("Failure to create load")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the import load metadata API call was invoked
self.sysinv_client.import_load_metadata.assert_called()
# verify that strategy state is set to failed
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@ -1,162 +0,0 @@
#
# Copyright (c) 2020, 2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base import \
TestSwUpgradeState
MISSING_LICENSE_RESPONSE = {
u'content': u'',
u'error': u'License file not found. A license may not have been installed.'
}
LICENSE_VALID_RESPONSE = {
u'content': u'A valid license',
u'error': u''
}
ALTERNATE_LICENSE_RESPONSE = {
u'content': u'A different valid license',
u'error': u''
}
class TestSwUpgradeInstallingLicenseStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeInstallingLicenseStage, self).setUp()
# next state after installing a license is 'importing load'
self.on_success_state = consts.STRATEGY_STATE_IMPORTING_LOAD
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_INSTALLING_LICENSE)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_license = mock.MagicMock()
self.sysinv_client.install_license = mock.MagicMock()
def test_upgrade_subcloud_license_install_failure(self):
"""Test the installing license step where the install fails.
The system controller has a license, but the API call to install on the
subcloud fails.
"""
# Order of get_license calls:
# first license query is to system controller
# second license query is to subcloud (should be missing)
self.sysinv_client.get_license.side_effect = [LICENSE_VALID_RESPONSE,
MISSING_LICENSE_RESPONSE]
# Simulate a license install failure on the subcloud
self.sysinv_client.install_license.return_value = \
MISSING_LICENSE_RESPONSE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the license install was invoked
self.sysinv_client.install_license.assert_called()
# Verify a install_license failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_license_install_success(self):
"""Test the install license step succeeds.
The license will be installed on the subcloud when system controller
has a license, the subcloud does not have a license, and the API call
succeeds.
"""
# Order of get_license calls:
# first license query is to system controller
# second license query is to subcloud (should be missing)
self.sysinv_client.get_license.side_effect = [LICENSE_VALID_RESPONSE,
MISSING_LICENSE_RESPONSE]
# A license install should return a success
self.sysinv_client.install_license.return_value = \
LICENSE_VALID_RESPONSE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the license install was invoked
self.sysinv_client.install_license.assert_called()
# On success, the next state after installing license is importing load
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_license_skip_existing(self):
"""Test the install license step skipped due to license up to date"""
# Order of get_license calls:
# first license query is to system controller
# second license query is to subcloud
self.sysinv_client.get_license.side_effect = [LICENSE_VALID_RESPONSE,
LICENSE_VALID_RESPONSE]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# A license install should not have been attempted due to the license
# already being up to date
self.sysinv_client.install_license.assert_not_called()
# On success, the next state after installing license is importing load
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_license_overrides_mismatched_license(self):
"""Test the install license overrides a mismatched license"""
# Order of get_license calls:
# first license query is to system controller
# second license query is to subcloud (should be valid but different)
self.sysinv_client.get_license.side_effect = \
[LICENSE_VALID_RESPONSE,
ALTERNATE_LICENSE_RESPONSE]
# A license install should return a success
self.sysinv_client.install_license.return_value = \
LICENSE_VALID_RESPONSE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the license install was invoked
self.sysinv_client.install_license.assert_called()
# Verify it successfully moves to the next step
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_license_skip_when_no_sys_controller_lic(self):
"""Test license install skipped when no license on system controller."""
# Only makes one query: to system controller
self.sysinv_client.get_license.return_value = MISSING_LICENSE_RESPONSE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Should skip install_license API call
self.sysinv_client.install_license.assert_not_called()
# Verify it successfully moves to the next step
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)

View File

@ -1,172 +0,0 @@
#
# Copyright (c) 2020, 2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states import lock_host
from dcmanager.tests.unit.orchestrator.states.fakes import FakeController
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base import \
TestSwUpgradeState
@mock.patch("dcmanager.orchestrator.states.lock_host.DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.lock_host.DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeLockSimplexStage(TestSwUpgradeState):
state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_0
def setUp(self):
super(TestSwUpgradeLockSimplexStage, self).setUp()
# next state after a successful lock is upgrading simplex
self.on_success_state = consts.STRATEGY_STATE_UPGRADING_SIMPLEX
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(self.subcloud.id, self.state)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.lock_host = mock.MagicMock()
self.setup_fake_controllers('controller-0')
def setup_fake_controllers(self, host_name):
self.CONTROLLER_UNLOCKED = FakeController(
hostname=host_name, administrative=consts.ADMIN_UNLOCKED)
self.CONTROLLER_LOCKED = FakeController(
hostname=host_name, administrative=consts.ADMIN_LOCKED)
self.CONTROLLER_LOCKING = FakeController(
hostname=host_name,
administrative=consts.ADMIN_UNLOCKED,
ihost_action='lock',
task='Locking')
self.CONTROLLER_LOCKING_FAILED = FakeController(
hostname=host_name,
administrative=consts.ADMIN_UNLOCKED,
ihost_action='force-swact',
task='Swacting')
def test_lock_success(self):
"""Test the lock command returns a success"""
# mock the controller host queries
# first query is the starting state
# query 2,3 are are during the lock phase
# query 4 : the host is now locked
self.sysinv_client.get_host.side_effect = [self.CONTROLLER_UNLOCKED,
self.CONTROLLER_LOCKING,
self.CONTROLLER_LOCKING,
self.CONTROLLER_LOCKED]
# mock the API call as failed on the subcloud
self.sysinv_client.lock_host.return_value = self.CONTROLLER_LOCKING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the lock command was actually attempted
self.sysinv_client.lock_host.assert_called()
# verify that the API moved to the next state on success
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_lock_skipped_when_already_locked(self):
"""Test the lock command skips if host is already locked"""
# mock the controller host query as being already locked
self.sysinv_client.get_host.return_value = self.CONTROLLER_LOCKED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the lock command was never attempted
self.sysinv_client.lock_host.assert_not_called()
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_lock_attempt_timeout(self):
"""Test lock invoked and fails if timeout before host becomes locked"""
# mock the get_host queries
# first query is the starting state
# all remaining queries, the host returns 'locking'
self.sysinv_client.get_host.side_effect = itertools.chain(
[self.CONTROLLER_UNLOCKED, ],
itertools.repeat(self.CONTROLLER_LOCKING))
# mock the API call as successful on the subcloud
self.sysinv_client.lock_host.return_value = self.CONTROLLER_LOCKING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the lock command was actually attempted
self.sysinv_client.lock_host.assert_called()
# verify the query was invoked: 1 + max_attempts times
self.assertEqual(lock_host.DEFAULT_MAX_QUERIES + 1,
self.sysinv_client.get_host.call_count)
# verify that state failed due to subcloud never finishing the lock
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_lock_failure(self):
"""Test the lock command returns a failure"""
# mock the controller get_host query
self.sysinv_client.get_host.return_value = self.CONTROLLER_UNLOCKED
# mock the API call as failed on the subcloud
self.sysinv_client.lock_host.return_value = self.CONTROLLER_LOCKING_FAILED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the lock command was actually attempted
self.sysinv_client.lock_host.assert_called()
# verify that the API error for the lock leads to a failed state
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_lock_fails_when_host_query_fails(self):
"""Test the lock command fails when it cannot get the controllers"""
# mock the get_host query is empty and raises an exception
self.sysinv_client.get_host.side_effect = \
Exception("Unable to find host controller-0")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the lock command was never attempted
self.sysinv_client.lock_host.assert_not_called()
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
class TestSwUpgradeLockDuplexStage(TestSwUpgradeLockSimplexStage):
def setUp(self):
self.state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_1
super(TestSwUpgradeLockDuplexStage, self).setUp()
# next state after a successful lock is upgrading simplex
self.on_success_state = consts.STRATEGY_STATE_UPGRADING_DUPLEX
# Add mock API endpoints for sysinv client calls invoked by this state
self.setup_fake_controllers('controller-1')

View File

@ -1,199 +0,0 @@
#
# Copyright (c) 2020, 2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.db.sqlalchemy import api as db_api
from dcmanager.orchestrator.states.upgrade import migrating_data
from dcmanager.tests.unit.orchestrator.states.fakes import FakeController
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base import \
TestSwUpgradeState
CONTROLLER_0_LOCKED = FakeController(administrative=consts.ADMIN_LOCKED)
CONTROLLER_0_UNLOCKING = \
FakeController(administrative=consts.ADMIN_UNLOCKED,
operational=consts.OPERATIONAL_DISABLED)
CONTROLLER_0_UNLOCKED = \
FakeController(administrative=consts.ADMIN_UNLOCKED,
operational=consts.OPERATIONAL_ENABLED)
@mock.patch("dcmanager.orchestrator.states.upgrade.migrating_data."
"DEFAULT_MAX_API_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.migrating_data."
"DEFAULT_MAX_FAILED_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.migrating_data."
"DEFAULT_API_SLEEP", 1)
@mock.patch("dcmanager.orchestrator.states.upgrade.migrating_data."
"DEFAULT_FAILED_SLEEP", 1)
class TestSwUpgradeMigratingDataStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeMigratingDataStage, self).setUp()
# next state after 'migrating data' is 'unlocking controller'
self.on_success_state = consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_0
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_MIGRATING_DATA)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
def test_upgrade_subcloud_migrating_data_failure(self):
"""Test migrating data step where the subprocess call fails."""
# Simulate a failed subprocess call to the platform upgrade playbook
# on the subcloud.
p = mock.patch("dcmanager.orchestrator.states.upgrade.migrating_data."
"migrate_subcloud_data")
self.mock_platform_upgrade_call = p.start()
self.mock_platform_upgrade_call.side_effect = Exception("Bad day!")
self.addCleanup(p.stop)
# Invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Verify a failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_migrating_data_success(self):
"""Test migrating data step where the subprocess call passes."""
# Simulate a successful subprocess call to the platform upgrade playbook
# on the subcloud.
p = mock.patch("dcmanager.orchestrator.states.upgrade.migrating_data."
"migrate_subcloud_data")
self.mock_platform_upgrade_call = p.start()
self.mock_platform_upgrade_call.return_value = 0
self.addCleanup(p.stop)
# mock the get_host queries
# first query is an exception, to emulate the host being inaccessible
# query 2 : during the unlock phase
# query 3 : the host is now unlocked
self.sysinv_client.get_host.side_effect = [Exception("Bad Connection"),
CONTROLLER_0_LOCKED,
CONTROLLER_0_UNLOCKING,
CONTROLLER_0_UNLOCKED, ]
# Invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_migrating_data_skip_migration_done(self):
"""Test the migrating data step skipped (migration completed)"""
# online subcloud running N load
# Update the subcloud to have deploy state as "migrated"
db_api.subcloud_update(self.ctx,
self.subcloud.id,
deploy_status=consts.DEPLOY_STATE_MIGRATED)
# Invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_migrating_data_skip_deployment_done(self):
"""Test the migrating data step skipped (deployment completed)"""
# online subcloud running N load
# Update the subcloud to have deploy state as "done"
db_api.subcloud_update(self.ctx,
self.subcloud.id,
deploy_status=consts.DEPLOY_STATE_DONE)
# Invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# On success, should have moved to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_migrating_data_interrupted_migration(self):
"""Test the migrating data step skipped"""
# online subcloud running N load
# Update the subcloud to have deploy state as "migrating data"
db_api.subcloud_update(
self.ctx,
self.subcloud.id,
deploy_status=consts.DEPLOY_STATE_MIGRATING_DATA)
# Invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Cannot resume the migration, the state goes to failed
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_migrating_data_reboot_timeout(self):
"""Test migrating data step times out during reboot
The subprocess call passes however the reboot times out.
"""
# Simulate a successful subprocess call to the platform upgrade playbook
# on the subcloud.
p = mock.patch("dcmanager.orchestrator.states.upgrade.migrating_data."
"migrate_subcloud_data")
self.mock_platform_upgrade_call = p.start()
self.mock_platform_upgrade_call.return_value = 0
self.addCleanup(p.stop)
# mock the get_host queries as never coming back from reboot
self.sysinv_client.get_host.side_effect = Exception("Bad Connection")
# Invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# reboot failed, so the 'failed' query count should have been hit
self.assertEqual(migrating_data.DEFAULT_MAX_FAILED_QUERIES,
self.sysinv_client.get_host.call_count)
# Due to the timeout, the state goes to failed
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_migrating_data_recover_timeout(self):
"""Test migrating data step times out enabling after reboot
The subprocess call passes however the unlock enable times out.
"""
# Simulate a successful subprocess call to the platform upgrade playbook
# on the subcloud.
p = mock.patch("dcmanager.orchestrator.states.upgrade.migrating_data."
"migrate_subcloud_data")
self.mock_platform_upgrade_call = p.start()
self.mock_platform_upgrade_call.return_value = 0
self.addCleanup(p.stop)
# mock the get_host queries as never coming back from reboot
self.sysinv_client.get_host.side_effect = CONTROLLER_0_UNLOCKING
# Invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# reboot passed, so the 'api' query count should have been hit
self.assertEqual(migrating_data.DEFAULT_MAX_API_QUERIES,
self.sysinv_client.get_host.call_count)
# Due to the timeout, the state goes to failed
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@ -1,225 +0,0 @@
#
# Copyright (c) 2020, 2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states.upgrade import starting_upgrade
from dcmanager.tests.unit.orchestrator.states.fakes import FakeSystem
from dcmanager.tests.unit.orchestrator.states.fakes import FakeUpgrade
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base import \
TestSwUpgradeState
UPGRADE_ABORTING = FakeUpgrade(state='aborting')
UPGRADE_STARTING = FakeUpgrade(state='starting')
UPGRADE_STARTED = FakeUpgrade(state='started')
@mock.patch("dcmanager.orchestrator.states.upgrade.starting_upgrade"
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.starting_upgrade"
".DEFAULT_SLEEP_DURATION", 1)
@mock.patch("dcmanager.orchestrator.states.upgrade.starting_upgrade"
".MAX_FAILED_RETRIES", 2)
class TestSwUpgradeSimplexStartingUpgradeStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeSimplexStartingUpgradeStage, self).setUp()
# next state after 'starting upgrade' is 'migrating data'
self.on_success_state = consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_STARTING_UPGRADE)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.upgrade_start = mock.MagicMock()
self.sysinv_client.get_upgrades = mock.MagicMock()
self.sysinv_client.get_system = mock.MagicMock()
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_SIMPLEX
self.sysinv_client.get_system.return_value = system_values
def test_upgrade_subcloud_upgrade_start_failure(self):
"""Test the upgrade_start where the API call fails.
The upgrade_start call fails due to a validation check such as from
the health-query check.
"""
# No upgrades should yet exist in the DB / API
self.sysinv_client.get_upgrades.return_value = []
# Simulate an upgrade_start failure on the subcloud.
# The API throws an exception rather than returning an error response
self.sysinv_client.upgrade_start.side_effect = \
Exception("HTTPBadRequest: upgrade-start rejected: "
"System is not in a valid state for upgrades. "
"Run system health-query-upgrade for more details.")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call that failed was invoked
self.sysinv_client.upgrade_start.assert_called()
# Verify the API failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_upgrade_start_success(self):
"""Test upgrade_start where the API call succeeds.
This will result in an upgrade being created with the appropriate
state.
"""
# No upgrades should yet exist in the initial DB / API
# the subsequent call should indicate it is started
self.sysinv_client.get_upgrades.side_effect = [
[],
[UPGRADE_STARTING, ],
[UPGRADE_STARTED, ],
]
# Simulate an upgrade_start succeeds on the subcloud
self.sysinv_client.upgrade_start.return_value = UPGRADE_STARTING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call that succeeded was actually invoked
self.sysinv_client.upgrade_start.assert_called()
# verify default alarm-restriction-type (relaxed) is treated as 'force'
self.sysinv_client.upgrade_start.assert_called_with(force=True)
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_upgrade_start_skip_already_started(self):
"""Test upgrade_start where the upgrade is already started."""
# An already started upgrade exists in the DB"""
self.sysinv_client.get_upgrades.return_value = [UPGRADE_STARTED, ]
# upgrade_start should not be invoked, so can be mocked as 'failed'
# by raising an exception
self.sysinv_client.upgrade_start.side_effect = \
Exception("HTTPBadRequest: this is a fake exception")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# upgrade_start API call should not have been attempted due to the
# existing upgrade already in started state.
self.sysinv_client.upgrade_start.assert_not_called()
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_upgrade_start_fails_bad_existing_upgrade(self):
"""Test the upgrade_start fails due to a bad existing upgrade."""
# An already started upgrade exists in the DB but is in bad shape."""
self.sysinv_client.get_upgrades.return_value = [UPGRADE_ABORTING, ]
# upgrade_start will NOT be invoked. No need to mock it.
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# upgrade_start API call should not have been attempted due to the
# invalid existing upgrade that needs to be cleaned up.
self.sysinv_client.upgrade_start.assert_not_called()
# Verify it failed and moves to the next step
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_upgrade_start_retry(self):
"""Test upgrade_start where HTTP error occurs after successful API call."""
# Simulate an HTTP exception thrown after upgrade start
self.sysinv_client.get_upgrades.side_effect = itertools.chain(
[[], ],
itertools.repeat(Exception("HTTPBadRequest: this is a fake exception")))
self.sysinv_client.upgrade_start.return_value = UPGRADE_STARTING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call that succeeded was actually invoked
self.sysinv_client.upgrade_start.assert_called()
# verify default alarm-restriction-type (relaxed) is treated as 'force'
self.sysinv_client.upgrade_start.assert_called_with(force=True)
# verify the get_upgrades query was invoked: 1 + max_failed_retries times
self.assertEqual(starting_upgrade.MAX_FAILED_RETRIES + 1,
self.sysinv_client.get_upgrades.call_count)
# Verify the timeout leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_upgrade_start_timeout(self):
"""Test upgrade_start where the API call succeeds but times out."""
# No upgrades should yet exist in the initial DB / API
# the subsequent calls indicate 'starting' instead of 'started'
# which eventually leads to the timeout
self.sysinv_client.get_upgrades.side_effect = itertools.chain(
[[], ],
itertools.repeat([UPGRADE_STARTING, ]))
# Simulate an upgrade_start succeeds on the subcloud
self.sysinv_client.upgrade_start.return_value = UPGRADE_STARTING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call that succeeded was actually invoked
self.sysinv_client.upgrade_start.assert_called()
# verify default alarm-restriction-type (relaxed) is treated as 'force'
self.sysinv_client.upgrade_start.assert_called_with(force=True)
# verify the get_upgrades query was invoked: 1 + max_attempts times
self.assertEqual(starting_upgrade.DEFAULT_MAX_QUERIES + 1,
self.sysinv_client.get_upgrades.call_count)
# Verify the timeout leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
@mock.patch("dcmanager.orchestrator.states.upgrade.starting_upgrade"
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.starting_upgrade"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeDuplexStartingUpgradeStage(
TestSwUpgradeSimplexStartingUpgradeStage):
def setUp(self):
super(TestSwUpgradeDuplexStartingUpgradeStage, self).setUp()
# next state after 'starting upgrade' is 'migrating data'
self.on_success_state = consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE
# Add mock API endpoints for sysinv client calls invoked by this state
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_DUPLEX
self.sysinv_client.get_system.return_value = system_values

View File

@ -1,146 +0,0 @@
#
# Copyright (c) 2021-2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states import swact_host
from dcmanager.tests.unit.orchestrator.states.fakes import FakeController
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
@mock.patch("dcmanager.orchestrator.states.swact_host.DEFAULT_SWACT_SLEEP", 1)
@mock.patch("dcmanager.orchestrator.states.swact_host.DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.swact_host.DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeSwactToController0Stage(TestSwUpgradeState):
state = consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0
def setUp(self):
super(TestSwUpgradeSwactToController0Stage, self).setUp()
# next state after a successful swact is deleting load
self.on_success_state = consts.STRATEGY_STATE_ACTIVATING_UPGRADE
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = \
self.setup_strategy_step(self.subcloud.id, self.state)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.swact_host = mock.MagicMock()
# In order to swact to controller-0, we run "system host-swact controller-1"
self.setup_fake_controllers('controller-1')
def setup_fake_controllers(self, host_name):
self.CONTROLLER_ACTIVE = FakeController(hostname=host_name)
self.CONTROLLER_STANDBY = FakeController(
hostname=host_name, capabilities={"Personality": "Controller-Standby"})
self.CONTROLLER_SWACTING = FakeController(
hostname=host_name, task='Swacting')
def test_swact_success(self):
"""Test the swact command returns a success"""
# mock the controller host queries
# first query is the starting state
# query 2 is during the ongoing swact phase
# query 3 is after successful host swact
self.sysinv_client.get_host.side_effect = [self.CONTROLLER_STANDBY,
self.CONTROLLER_STANDBY,
self.CONTROLLER_ACTIVE]
# mock the API call as failed on the subcloud
self.sysinv_client.swact_host.return_value = self.CONTROLLER_SWACTING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the swact command was actually attempted
self.sysinv_client.swact_host.assert_called()
# verify that the API moved to the next state on success
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_swact_skipped_when_already_active(self):
"""Test the swact command skips if host is already active controller"""
# mock the controller host query as being already Controller-Active
self.sysinv_client.get_host.return_value = self.CONTROLLER_ACTIVE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the swact command was never attempted
self.sysinv_client.swact_host.assert_not_called()
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_swact_attempt_timeout(self):
"""Test swact invoked and fails if timeout before host becomes active"""
# mock the get_host queries
# all remaining queries, the host returns 'Controller-Standby'
self.sysinv_client.get_host.side_effect = itertools.chain(
itertools.repeat(self.CONTROLLER_STANDBY))
# mock the API call as successful on the subcloud
self.sysinv_client.swact_host.return_value = self.CONTROLLER_SWACTING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the swact command was actually attempted
self.sysinv_client.swact_host.assert_called()
# verify the query was invoked: 1 + max_attempts times
self.assertEqual(swact_host.DEFAULT_MAX_QUERIES + 2,
self.sysinv_client.get_host.call_count)
# verify that state failed due to subcloud never finishing the swact
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_swact_fails_when_host_query_fails(self):
"""Test the swact command fails when it cannot get the controllers"""
# mock the get_host query is empty and raises an exception
self.sysinv_client.get_host.side_effect = \
Exception("Unable to find host controller-0")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the swact command was never attempted
self.sysinv_client.swact_host.assert_not_called()
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
class TestSwUpgradeSwactToController1Stage(TestSwUpgradeSwactToController0Stage):
def setUp(self):
self.state = consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1
super(TestSwUpgradeSwactToController1Stage, self).setUp()
# next state after a successful swact to comntroller-1 is creating VIM
# upgrade strategy
self.on_success_state = consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY
# In order to swact to controller-0, we run "system host-swact controller-0"
self.setup_fake_controllers('controller-0')

View File

@ -1,148 +0,0 @@
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states.upgrade import transfer_ca_certificate
from dcmanager.tests.unit.orchestrator.states.fakes import FakeSystem
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
FAKE_CERT = "-----BEGIN CERTIFICATE-----\nMIIDAO\n-----END CERTIFICATE-----\n"
FAKE_KEY = "-----BEGIN PRIVATE KEY-----\nMIIDAO\n-----END PRIVATE KEY-----\n"
class TestSwUpgradeSimplexTransferringCACertificateStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeSimplexTransferringCACertificateStage, self).setUp()
# next state after 'transferring CA certificate' is 'locking controller-0'
self.on_success_state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_0
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.update_certificate = mock.MagicMock()
self.sysinv_client.get_system = mock.MagicMock()
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_SIMPLEX
self.sysinv_client.get_system.return_value = system_values
def test_upgrade_subcloud_upgrade_transferring_ca_certificate_skipped(self):
"""Test transferring CA certificate is skipped for the Simplex."""
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call was not invoked
self.sysinv_client.update_certificate.assert_not_called()
# On simplex, the step is skipped and the state should transition to
# the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
@mock.patch("dcmanager.orchestrator.states.upgrade.transfer_ca_certificate"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeDuplexTransferringCACertificateStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeDuplexTransferringCACertificateStage, self).setUp()
# next state after 'transferring CA certificate' is 'locking controller-1'
self.on_success_state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_1
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.update_certificate = mock.MagicMock()
self.sysinv_client.get_system = mock.MagicMock()
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_DUPLEX
self.sysinv_client.get_system.return_value = system_values
def test_upgrade_subcloud_upgrade_transferring_ca_certificate_success(self):
"""Test transferring_ca_certificate where the API call succeeds."""
# simulate get_certificate_from_secret finding the openldap ca certificate
p = mock.patch('dcmanager.common.utils.get_certificate_from_secret')
self.mock_cert_file = p.start()
self.mock_cert_file.return_value = (FAKE_CERT, FAKE_KEY, FAKE_CERT)
self.addCleanup(p.stop)
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify update_certificate was invoked
self.sysinv_client.update_certificate.assert_called_with(
'', FAKE_CERT + FAKE_CERT + FAKE_KEY, {'mode': 'openldap_ca'})
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_upgrade_transferring_ca_certificate_fails_get_cert(
self):
"""Test API call fails due to failing to get certificate from secret."""
# simulate get_certificate_from_secret failing to get
# the openldap ca certificate
p = mock.patch('dcmanager.common.utils.get_certificate_from_secret')
self.mock_cert_file = p.start()
self.mock_cert_file.side_effect = Exception("Invalid certificated")
self.addCleanup(p.stop)
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify update_certificate was not invoked
self.sysinv_client.update_certificate.assert_not_called()
# Verify the failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_upgrade_transferring_ca_certificate_fails_update_cert(
self):
"""Test API call fails due to failing to update certificate."""
# simulate get_certificate_from_secret finding the openldap ca certificate
p = mock.patch('dcmanager.common.utils.get_certificate_from_secret')
self.mock_cert_file = p.start()
self.mock_cert_file.return_value = (FAKE_CERT, FAKE_KEY, FAKE_CERT)
self.addCleanup(p.stop)
# simulate update_certificate failing to update
self.sysinv_client.update_certificate.side_effect = Exception(
"Faile to update certificated")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify update_certificate was invoked
self.sysinv_client.update_certificate.assert_called_with(
'', FAKE_CERT + FAKE_CERT + FAKE_KEY, {'mode': 'openldap_ca'})
# verify the update_certificate was invoked: 1 + max_retries times
self.assertEqual(transfer_ca_certificate.DEFAULT_MAX_RETRIES + 1,
self.sysinv_client.update_certificate.call_count)
# Verify the failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@ -1,249 +0,0 @@
#
# Copyright (c) 2020, 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states import unlock_host
from dcmanager.tests.unit.orchestrator.states.fakes import FakeController
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_MAX_API_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_MAX_FAILED_QUERIES",
3)
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_MAX_UNLOCK_RETRIES",
3)
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_API_SLEEP", 1)
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_FAILED_SLEEP", 1)
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_UNLOCK_SLEEP", 1)
class TestSwUpgradeUnlockSimplexStage(TestSwUpgradeState):
state = consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_0
def setUp(self):
super(TestSwUpgradeUnlockSimplexStage, self).setUp()
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.unlock_host = mock.MagicMock()
# next state after a successful unlock is 'activating'
self.on_success_state = consts.STRATEGY_STATE_ACTIVATING_UPGRADE
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(self.subcloud.id, self.state)
self.setup_fake_controllers('controller-0')
def setup_fake_controllers(self, host_name):
self.CONTROLLER_UNLOCKED = \
FakeController(hostname=host_name,
administrative=consts.ADMIN_UNLOCKED,
operational=consts.OPERATIONAL_ENABLED,
availability=consts.AVAILABILITY_AVAILABLE)
self.CONTROLLER_LOCKED = \
FakeController(hostname=host_name,
administrative=consts.ADMIN_LOCKED)
self.CONTROLLER_UNLOCKING = \
FakeController(hostname=host_name,
administrative=consts.ADMIN_LOCKED,
ihost_action='unlock',
task='Unlocking')
self.CONTROLLER_UNLOCKING_FAILED = \
FakeController(hostname=host_name,
administrative=consts.ADMIN_LOCKED,
ihost_action='force-swact',
task='Swacting')
def test_unlock_success(self):
"""Test the unlock command returns a success"""
# mock the get_host queries
# first query is the starting state
# query 2,3 are are during the unlock phase
# query 4 : the host is now unlocked
self.sysinv_client.get_host.side_effect = [self.CONTROLLER_LOCKED,
self.CONTROLLER_UNLOCKING,
self.CONTROLLER_UNLOCKING,
self.CONTROLLER_UNLOCKED, ]
# mock the API call as failed on the subcloud
self.sysinv_client.unlock_host.return_value = self.CONTROLLER_UNLOCKING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the unlock command was actually attempted
self.sysinv_client.unlock_host.assert_called()
# verify that the API moved to the next state on success
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_unlock_skipped_when_already_unlocked(self):
"""Test the unlock command skips if host is already unlocked"""
# mock the controller host query as being already unlocked
self.sysinv_client.get_host.return_value = self.CONTROLLER_UNLOCKED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the unlock command was never attempted
self.sysinv_client.unlock_host.assert_not_called()
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_unlock_attempt_timeout(self):
"""Test unlock invoked handles timeout if unlocking takes too long"""
# mock the get_host queries
# first query is the starting state
# all remaining queries, the host returns 'unlocking'
self.sysinv_client.get_host.side_effect = itertools.chain(
[self.CONTROLLER_LOCKED, ],
itertools.repeat(self.CONTROLLER_UNLOCKING))
# mock the API call as successful on the subcloud
self.sysinv_client.unlock_host.return_value = self.CONTROLLER_UNLOCKING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the lock command was actually attempted
self.sysinv_client.unlock_host.assert_called()
# verify the query was invoked: 1 + max_attempts times
self.assertEqual(unlock_host.DEFAULT_MAX_API_QUERIES + 1,
self.sysinv_client.get_host.call_count)
# verify that state failed due to subcloud never finishing the unlock
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_unlock_failure_sriov(self):
"""Test the unlock command returns an exception because of sriov failure"""
# mock the get_host query
self.sysinv_client.get_host.return_value = self.CONTROLLER_LOCKED
# mock the API call as an unlock sriov failure
self.sysinv_client.unlock_host.side_effect = \
Exception("Expecting number of interface sriov_numvfs=32. Please"
" wait a few minutes for inventory update and retry"
" host-unlock.")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the unlock command was actually attempted
self.sysinv_client.unlock_host.assert_called()
# verified the unlock was tried max retries + 1
self.assertEqual(unlock_host.DEFAULT_MAX_UNLOCK_RETRIES + 1,
self.sysinv_client.unlock_host.call_count)
# verify that state failed due to subcloud never finishing the unlock
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_unlock_attempt_due_sriov_failure(self):
"""Test the unlock attempts after sriov failure"""
# mock the get_host query
self.sysinv_client.get_host.return_value = self.CONTROLLER_LOCKED
# mock the API call as an unlock sriov failure 2 times then a success
self.sysinv_client.unlock_host.side_effect = \
[Exception("Expecting number of interface sriov_numvfs=32. Please"
" wait a few minutes for inventory update and retry"
" host-unlock."),
Exception("Expecting number of interface sriov_numvfs=32. Please"
" wait a few minutes for inventory update and retry"
" host-unlock."),
self.CONTROLLER_UNLOCKING]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the unlock command was actually attempted
self.sysinv_client.unlock_host.assert_called()
# verify the unlock was called 3 times: 1st call + 2 retries
self.assertEqual(3, self.sysinv_client.unlock_host.call_count)
# verify that state failed because host did not get unlocked
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
# now move to unlock the controller
self.sysinv_client.get_host.return_value = self.CONTROLLER_UNLOCKED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_unlock_failure(self):
"""Test the unlock command returns a failure"""
# mock the get_host query
self.sysinv_client.get_host.return_value = self.CONTROLLER_LOCKED
# mock the API call as failed on the subcloud
self.sysinv_client.unlock_host.return_value = \
self.CONTROLLER_UNLOCKING_FAILED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the unlock command was actually attempted
self.sysinv_client.unlock_host.assert_called()
# verify that the API error for the unlock leads to a failed state
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_unlock_fails_when_host_query_fails(self):
"""Test the unlock command fails when it cannot get the controllers"""
# mock the get_host query fails and raises an exception
self.sysinv_client.get_host.side_effect = \
Exception("Unable to find host controller-0")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the unlock command was never attempted
self.sysinv_client.unlock_host.assert_not_called()
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
class TestSwUpgradeUnlockDuplexStage(TestSwUpgradeUnlockSimplexStage):
"""This subclasses Controller 0 Unlock, and overides some setup values"""
def setUp(self):
self.state = consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1
super(TestSwUpgradeUnlockDuplexStage, self).setUp()
# override some of the fields that were setup in the super class
# next state after a successful unlock is 'creating vim strategy'
self.on_success_state = consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1
self.setup_fake_controllers('controller-1')

View File

@ -1,200 +0,0 @@
#
# Copyright (c) 2020, 2023-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from os import path as os_path
import mock
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.fakes import FakeLoad
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
REGION_ONE_PATCHES = {'DC.1': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.3': {'sw_version': '20.12',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.4': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.8': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
}
SUBCLOUD_PATCHES_SUCCESS = {'DC.1': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.3': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Partial-Remove'},
'DC.5': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.6': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Partial-Apply'},
}
SUBCLOUD_PATCHES_BAD_COMMIT = {'DC.1': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.3': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Partial-Remove'},
'DC.5': {'sw_version': '20.12',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.6': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Partial-Apply'},
}
SUBCLOUD_PATCHES_BAD_STATE = {'DC.1': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.3': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Partial-Remove'},
'DC.5': {'sw_version': '20.12',
'repostate': 'Unknown',
'patchstate': 'Unknown'},
'DC.6': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Partial-Apply'},
}
@mock.patch("dcmanager.orchestrator.states.upgrade.updating_patches"
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.updating_patches"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeUpdatingPatchesStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeUpdatingPatchesStage, self).setUp()
# next state after 'updating patches' is 'finishing patch strategy'
self.on_success_state = consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_UPDATING_PATCHES)
# Add mock API endpoints for patching and sysinv client calls invoked by
# this state
self.patching_client.query = mock.MagicMock()
self.sysinv_client.get_loads = mock.MagicMock()
self.patching_client.remove = mock.MagicMock()
self.patching_client.upload = mock.MagicMock()
self.patching_client.apply = mock.MagicMock()
self.patching_client.query_hosts = mock.MagicMock()
@mock.patch.object(os_path, 'isfile')
def test_update_subcloud_patches_success(self, mock_os_path_isfile):
"""Test update_patches where the API call succeeds."""
self.patching_client.query.side_effect = [
REGION_ONE_PATCHES,
SUBCLOUD_PATCHES_SUCCESS,
]
self.sysinv_client.get_loads.side_effect = [
[FakeLoad(1,
software_version='20.12',
state=consts.ACTIVE_LOAD_STATE)]
]
mock_os_path_isfile.return_value = True
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
self.patching_client.upload.assert_called_with(
[consts.PATCH_VAULT_DIR + '/20.12/DC.8.patch'])
call_args, _ = self.patching_client.remove.call_args_list[0]
self.assertItemsEqual(['DC.6'], call_args[0])
call_args, _ = self.patching_client.apply.call_args_list[0]
self.assertItemsEqual(['DC.2', 'DC.3', 'DC.8'], call_args[0])
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
@mock.patch.object(os_path, 'isfile')
def test_update_subcloud_patches_bad_committed(self, mock_os_path_isfile):
"""Test update_patches where the API call fails.
The update_patches call fails because the patch is 'committed' in
the subcloud but not 'applied' in the System Controller.
"""
self.patching_client.query.side_effect = [
REGION_ONE_PATCHES,
SUBCLOUD_PATCHES_BAD_COMMIT,
]
self.sysinv_client.get_loads.side_effect = [
[FakeLoad(1,
software_version='20.12',
state=consts.ACTIVE_LOAD_STATE)]
]
mock_os_path_isfile.return_value = True
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Verify it failed and moves to the next step
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
@mock.patch.object(os_path, 'isfile')
def test_update_subcloud_patches_bad_state(self, mock_os_path_isfile):
"""Test update_patches where the API call succeeds.
The update_patches call fails because the patch is 'unknown' in
the subcloud which is not a valid state.
"""
self.patching_client.query.side_effect = [
REGION_ONE_PATCHES,
SUBCLOUD_PATCHES_BAD_STATE,
]
self.sysinv_client.get_loads.side_effect = [
[FakeLoad(1,
software_version='20.12',
state=consts.ACTIVE_LOAD_STATE)]
]
mock_os_path_isfile.return_value = True
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Verify it failed and moves to the next step
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@ -1,96 +0,0 @@
#
# Copyright (c) 2020, 2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states.upgrade import upgrading_duplex
from dcmanager.tests.unit.orchestrator.states.fakes import FakeUpgrade
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base import \
TestSwUpgradeState
UPGRADE_ABORTING = FakeUpgrade(state='aborting')
UPGRADE_STARTED = FakeUpgrade(state='started')
UPGRADE_COMPLETE = FakeUpgrade(state='data-migration-complete')
UPGRADE_FAILED = FakeUpgrade(state='data-migration-failed')
@mock.patch("dcmanager.orchestrator.states.upgrade.upgrading_duplex"
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.upgrading_duplex"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeUpgradingDuplexStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeUpgradingDuplexStage, self).setUp()
# next state after 'upgrading duplex' is 'unlocking controller 1'
self.on_success_state = consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_UPGRADING_DUPLEX)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.upgrade_host = mock.MagicMock()
self.sysinv_client.get_upgrades = mock.MagicMock()
def test_subcloud_duplex_upgrade_success(self):
"""Test upgrading_duplex where the API call succeeds."""
# A successfully completed upgrade exists in the DB"""
self.sysinv_client.get_upgrades.return_value = [UPGRADE_COMPLETE, ]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the get_upgrades query was invoked: 1 + max_attempts times
self.assertEqual(self.sysinv_client.get_upgrades.call_count, 2)
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_subcloud_duplex_upgrade_fails(self):
"""Test the upgrading_duplex fails as data migration fails."""
self.sysinv_client.get_upgrades.return_value = [UPGRADE_FAILED, ]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the get_upgrades query was invoked: 1 + max_attempts times
self.assertEqual(self.sysinv_client.get_upgrades.call_count, 2)
# Verify it failed and moves to the next step
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_subcloud_duplex_upgrade_timeout(self):
"""Test upgrading_duplex where the API call succeeds but times out."""
# Upgrades state is stuck at 'started' state which eventually
# leads to the timeout
self.sysinv_client.get_upgrades.side_effect = itertools.chain(
itertools.repeat([UPGRADE_STARTED, ]))
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call that succeeded was actually invoked
self.sysinv_client.upgrade_host.assert_called()
# verify the get_upgrades query was invoked: 1 + max_attempts times
self.assertEqual(upgrading_duplex.DEFAULT_MAX_QUERIES,
self.sysinv_client.get_upgrades.call_count)
# Verify the timeout leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@ -1,83 +0,0 @@
#
# Copyright (c) 2020, 2022, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from tsconfig.tsconfig import SW_VERSION
from dcmanager.common import consts
from dcmanager.db.sqlalchemy import api as db_api
from dcmanager.tests.unit.orchestrator.states.fakes import FakeLoad
from dcmanager.tests.unit.orchestrator.states.fakes import PREVIOUS_VERSION
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
# UpgradingSimplexState uses SW_VERSION as the upgraded version check
UPGRADED_VERSION = SW_VERSION
PREVIOUS_LOAD = FakeLoad(1, software_version=PREVIOUS_VERSION,
state='imported')
UPGRADED_LOAD = FakeLoad(2, software_version=UPGRADED_VERSION,
state='active')
FAKE_ISO = '/opt/dc-vault/loads/' + UPGRADED_VERSION + '/bootimage.iso'
FAKE_SIG = '/opt/dc-vault/loads/' + UPGRADED_VERSION + '/bootimage.sig'
class TestSwUpgradeUpgradingSimplexStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeUpgradingSimplexStage, self).setUp()
# next state after 'upgrading simplex is 'migrating data'
self.on_success_state = consts.STRATEGY_STATE_MIGRATING_DATA
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_UPGRADING_SIMPLEX)
# simulate get_vault_load_files finding the iso and sig in the vault
p = mock.patch('dcmanager.common.utils.get_vault_load_files')
self.mock_vault_files = p.start()
self.mock_vault_files.return_value = (FAKE_ISO, FAKE_SIG)
self.addCleanup(p.stop)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_loads = mock.MagicMock()
def test_success_already_upgraded(self):
"""Test upgrading_simplex where the load already upgraded / active."""
# The state machine skips if the load is already upgraded and active
self.sysinv_client.get_loads.return_value = [
UPGRADED_LOAD,
]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_subcloud_simplex_upgrade_fails_no_install_data(self):
"""Test the upgrading_simplex fails due to missing install data"""
self.sysinv_client.get_loads.return_value = [
PREVIOUS_LOAD,
]
# Update the subcloud to have missing data_install
db_api.subcloud_update(self.ctx,
self.subcloud.id,
data_install="")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Verify it failed due to the get_upgrades failing
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@ -39,7 +39,7 @@ CONF = cfg.CONF
class TestSwUpdate(base.DCManagerTestCase):
DEFAULT_STRATEGY_TYPE = consts.SW_UPDATE_TYPE_UPGRADE
DEFAULT_STRATEGY_TYPE = consts.SW_UPDATE_TYPE_SOFTWARE
def setUp(self):
super(TestSwUpdate, self).setUp()
@ -100,21 +100,8 @@ class TestSwUpdate(base.DCManagerTestCase):
worker = None
mock_strategy_lock = mock.Mock()
mock_dcmanager_audit_api = mock.Mock()
# There are many orch threads. Only one needs to be setup based on type
if strategy_type == consts.SW_UPDATE_TYPE_UPGRADE:
sw_update_manager.SwUpgradeOrchThread.stopped = lambda x: False
worker = \
sw_update_manager.SwUpgradeOrchThread(mock_strategy_lock,
mock_dcmanager_audit_api)
else:
# mock the upgrade orch thread
self.fake_sw_upgrade_orch_thread = FakeOrchThread()
p = mock.patch.object(sw_update_manager, 'SwUpgradeOrchThread')
self.mock_sw_upgrade_orch_thread = p.start()
self.mock_sw_upgrade_orch_thread.return_value = \
self.fake_sw_upgrade_orch_thread
self.addCleanup(p.stop)
# There are many orch threads. Only one needs to be setup based on type
if strategy_type == consts.SW_UPDATE_TYPE_SOFTWARE:
sw_update_manager.SoftwareOrchThread.stopped = lambda x: False
worker = sw_update_manager.SoftwareOrchThread(

View File

@ -193,14 +193,6 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.addCleanup(p.stop)
# Note: mock where an item is used, not where it comes from
self.fake_sw_upgrade_orch_thread = FakeOrchThread()
p = mock.patch.object(sw_update_manager, "SwUpgradeOrchThread")
self.mock_sw_upgrade_orch_thread = p.start()
self.mock_sw_upgrade_orch_thread.return_value = (
self.fake_sw_upgrade_orch_thread
)
self.addCleanup(p.stop)
self.fake_software_orch_thread = FakeOrchThread()
p = mock.patch.object(sw_update_manager, "SoftwareOrchThread")
self.fake_software_orch_thread = p.start()
@ -337,7 +329,9 @@ class TestSwUpdateManager(base.DCManagerTestCase):
is_online=True,
)
self.update_subcloud_status(
self.ctxt, fake_subcloud1.id, endpoint=dccommon_consts.ENDPOINT_TYPE_LOAD
self.ctxt,
fake_subcloud1.id,
endpoint=dccommon_consts.ENDPOINT_TYPE_SOFTWARE
)
fake_subcloud2 = self.create_subcloud(
@ -348,11 +342,13 @@ class TestSwUpdateManager(base.DCManagerTestCase):
is_online=True,
)
self.update_subcloud_status(
self.ctxt, fake_subcloud2.id, endpoint=dccommon_consts.ENDPOINT_TYPE_LOAD
self.ctxt,
fake_subcloud2.id,
endpoint=dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
)
data = copy.copy(FAKE_SW_UPDATE_DATA)
data["type"] = consts.SW_UPDATE_TYPE_UPGRADE
data["type"] = consts.SW_UPDATE_TYPE_SOFTWARE
data["subcloud_group"] = str(self.fake_group3.id)
um = sw_update_manager.SwUpdateManager()
response = um.create_sw_update_strategy(self.ctxt, payload=data)
@ -362,7 +358,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.assertEqual(
response["subcloud-apply-type"], consts.SUBCLOUD_APPLY_TYPE_PARALLEL
)
self.assertEqual(response["type"], consts.SW_UPDATE_TYPE_UPGRADE)
self.assertEqual(response["type"], consts.SW_UPDATE_TYPE_SOFTWARE)
# Verify the strategy step list
subcloud_ids = [1, 2]
@ -390,7 +386,9 @@ class TestSwUpdateManager(base.DCManagerTestCase):
is_online=True,
)
self.update_subcloud_status(
self.ctxt, fake_subcloud1.id, endpoint=dccommon_consts.ENDPOINT_TYPE_LOAD
self.ctxt,
fake_subcloud1.id,
endpoint=dccommon_consts.ENDPOINT_TYPE_SOFTWARE
)
fake_subcloud2 = self.create_subcloud(
@ -401,7 +399,9 @@ class TestSwUpdateManager(base.DCManagerTestCase):
is_online=True,
)
self.update_subcloud_status(
self.ctxt, fake_subcloud2.id, endpoint=dccommon_consts.ENDPOINT_TYPE_LOAD
self.ctxt,
fake_subcloud2.id,
endpoint=dccommon_consts.ENDPOINT_TYPE_SOFTWARE
)
mock_global_prestage_validate.return_value = None
@ -450,7 +450,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud1.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_IN_SYNC,
)
@ -459,7 +459,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.ctxt, "subcloud2", 1, is_managed=True, is_online=True
)
self.update_subcloud_status(
self.ctxt, fake_subcloud2.id, dccommon_consts.ENDPOINT_TYPE_LOAD, None
self.ctxt, fake_subcloud2.id, dccommon_consts.ENDPOINT_TYPE_SOFTWARE, None
)
# Subcloud3 will be prestaged load out of sync
@ -469,7 +469,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud3.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC,
)
@ -480,7 +480,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud4.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_UNKNOWN,
)
@ -530,7 +530,9 @@ class TestSwUpdateManager(base.DCManagerTestCase):
is_online=True,
)
self.update_subcloud_status(
self.ctxt, fake_subcloud1.id, endpoint=dccommon_consts.ENDPOINT_TYPE_LOAD
self.ctxt,
fake_subcloud1.id,
endpoint=dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
)
fake_subcloud2 = self.create_subcloud(
@ -541,7 +543,9 @@ class TestSwUpdateManager(base.DCManagerTestCase):
is_online=True,
)
self.update_subcloud_status(
self.ctxt, fake_subcloud2.id, endpoint=dccommon_consts.ENDPOINT_TYPE_LOAD
self.ctxt,
fake_subcloud2.id,
endpoint=dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
)
mock_initial_subcloud_validate.return_value = None
@ -1383,7 +1387,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud1.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC,
)
@ -1398,7 +1402,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud2.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC,
)
@ -1413,12 +1417,12 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud3.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_IN_SYNC,
)
data = copy.copy(FAKE_SW_UPDATE_DATA)
data["type"] = consts.SW_UPDATE_TYPE_UPGRADE
data["type"] = consts.SW_UPDATE_TYPE_SOFTWARE
data["force"] = "true"
data["subcloud_group"] = str(self.fake_group3.id)
@ -1429,7 +1433,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.assertEqual(
strategy_dict["subcloud-apply-type"], consts.SUBCLOUD_APPLY_TYPE_PARALLEL
)
self.assertEqual(strategy_dict["type"], consts.SW_UPDATE_TYPE_UPGRADE)
self.assertEqual(strategy_dict["type"], consts.SW_UPDATE_TYPE_SOFTWARE)
subcloud_ids = [1, 2]
strategy_step_list = db_api.strategy_step_get_all(self.ctxt)
@ -1451,7 +1455,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud1.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC,
)
@ -1466,7 +1470,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud2.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC,
)
@ -1481,12 +1485,12 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud3.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_IN_SYNC,
)
data = copy.copy(FAKE_SW_UPDATE_DATA)
data["type"] = consts.SW_UPDATE_TYPE_UPGRADE
data["type"] = consts.SW_UPDATE_TYPE_SOFTWARE
data["force"] = "false"
data["subcloud_group"] = str(self.fake_group3.id)
@ -1497,7 +1501,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.assertEqual(
strategy_dict["subcloud-apply-type"], consts.SUBCLOUD_APPLY_TYPE_PARALLEL
)
self.assertEqual(strategy_dict["type"], consts.SW_UPDATE_TYPE_UPGRADE)
self.assertEqual(strategy_dict["type"], consts.SW_UPDATE_TYPE_SOFTWARE)
subcloud_ids = [2]
strategy_step_list = db_api.strategy_step_get_all(self.ctxt)
@ -1505,43 +1509,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.assertEqual(subcloud_ids[index], strategy_step.subcloud_id)
@mock.patch.object(sw_update_manager, "PatchOrchThread")
def test_create_sw_update_strategy_not_insync_offline_sc_with_force_upgrade(
self, mock_patch_orch_thread
):
# This test verifies the offline subcloud is added to the strategy
# because force option is specified in the upgrade request.
fake_subcloud1 = self.create_subcloud(
self.ctxt, "subcloud1", 1, is_managed=True, is_online=False
)
self.update_subcloud_status(
self.ctxt,
fake_subcloud1.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.SYNC_STATUS_UNKNOWN,
)
um = sw_update_manager.SwUpdateManager()
data = copy.copy(FAKE_SW_UPDATE_DATA)
data["type"] = consts.SW_UPDATE_TYPE_UPGRADE
data["force"] = "true"
data["cloud_name"] = "subcloud1"
strategy_dict = um.create_sw_update_strategy(self.ctxt, payload=data)
# Assert that values passed through CLI are used instead of group values
self.assertEqual(
strategy_dict["subcloud-apply-type"], consts.SUBCLOUD_APPLY_TYPE_PARALLEL
)
self.assertEqual(strategy_dict["type"], consts.SW_UPDATE_TYPE_UPGRADE)
# Verify the strategy step list
subcloud_ids = [1]
strategy_step_list = db_api.strategy_step_get_all(self.ctxt)
for index, strategy_step in enumerate(strategy_step_list):
self.assertEqual(subcloud_ids[index], strategy_step.subcloud_id)
@mock.patch.object(sw_update_manager, "PatchOrchThread")
def test_create_sw_update_strategy_in_sync_offline_subcloud_with_force_upgrade(
def test_create_sw_update_strategy_in_sync_offline_subcloud_with_force_deploy(
self, mock_patch_orch_thread
):
# This test verifies that a bad request exception is raised even
@ -1553,13 +1521,13 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud1.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_IN_SYNC,
)
um = sw_update_manager.SwUpdateManager()
data = copy.copy(FAKE_SW_UPDATE_DATA)
data["type"] = consts.SW_UPDATE_TYPE_UPGRADE
data["type"] = consts.SW_UPDATE_TYPE_SOFTWARE
data["force"] = True
data["cloud_name"] = "subcloud1"
@ -1571,7 +1539,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
)
@mock.patch.object(sw_update_manager, "PatchOrchThread")
def test_create_sw_update_strategy_online_subcloud_with_force_upgrade(
def test_create_sw_update_strategy_online_subcloud_with_force_deploy(
self, mock_patch_orch_thread
):
# This test verifies that the force option has no effect in
@ -1584,13 +1552,13 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.update_subcloud_status(
self.ctxt,
fake_subcloud1.id,
dccommon_consts.ENDPOINT_TYPE_LOAD,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE,
dccommon_consts.SYNC_STATUS_UNKNOWN,
)
um = sw_update_manager.SwUpdateManager()
data = copy.copy(FAKE_SW_UPDATE_DATA)
data["type"] = consts.SW_UPDATE_TYPE_UPGRADE
data["type"] = consts.SW_UPDATE_TYPE_SOFTWARE
data["force"] = True
data["cloud_name"] = "subcloud1"
@ -1657,7 +1625,7 @@ class TestSwUpdateManager(base.DCManagerTestCase):
exceptions.NotFound,
um.delete_sw_update_strategy,
self.ctx,
update_type=consts.SW_UPDATE_TYPE_UPGRADE,
update_type=consts.SW_UPDATE_TYPE_SOFTWARE,
)
@mock.patch.object(sw_update_manager, "PatchOrchThread")