Upgrade Orchestration for Standard subclouds

This commit enhances upgrade orchestration to support Standard
subclouds.

Strategy Steps:
- Pre-checks to verify subcloud is ready for an upgrade
  and to recover from previous failures
- Install release N+1 license in subcloud
- Download/import the release N+1 load in subcloud
- Update and apply N+1 patches to release N+1 load in
  subcloud
- Finishing patch strategy
- Start duplex upgrade
- Lock controller-1
- Upgrade and re-install controller-1
- Unlock controller-1
- Swact to controller-1
- Create VIM upgrade strategy
- Apply VIM upgrade strategy
- Swact to controller-0
- Activating Upgrade
- Completing Upgrade
- Delete release N load

Change-Id: I6fb60912ec1e90baa593cf98e502f4c91f599f42
Story: 2008055
Task: 41105
Signed-off-by: Jessica Castelino <jessica.castelino@windriver.com>
This commit is contained in:
Jessica Castelino
2020-09-28 17:57:47 -04:00
parent 2525d5d02e
commit a6cb059419
40 changed files with 2325 additions and 160 deletions

View File

@@ -86,7 +86,7 @@ class OpenStackDriver(object):
self.get_cached_keystone_client(region_name)
if self.keystone_client is None:
LOG.info("get new keystone client for subcloud %s", region_name)
LOG.debug("get new keystone client for subcloud %s", region_name)
try:
self.keystone_client = KeystoneClient(region_name, auth_url)
OpenStackDriver.update_region_clients(region_name,
@@ -214,11 +214,11 @@ class OpenStackDriver(object):
STALE_TOKEN_DURATION_MAX,
STALE_TOKEN_DURATION_STEP)
if timeutils.is_soon(expiry_time, duration):
LOG.info("The cached keystone token for subcloud %s "
"will expire soon %s" %
(region_name,
OpenStackDriver._identity_tokens[region_name]
['expires_at']))
LOG.debug("The cached keystone token for subcloud %s "
"will expire soon %s" %
(region_name,
OpenStackDriver._identity_tokens[region_name]
['expires_at']))
# Reset the cached dictionary
OpenStackDriver.os_clients_dict[region_name] = \
collections.defaultdict(dict)

View File

@@ -132,6 +132,14 @@ class SysinvClient(base.DriverBase):
action_value = 'unlock'
return self._do_host_action(host_id, action_value)
def swact_host(self, host_id, force=False):
"""Perform host swact"""
if force:
action_value = 'force-swact'
else:
action_value = 'swact'
return self._do_host_action(host_id, action_value)
def configure_bmc_host(self,
host_id,
bm_username,
@@ -155,6 +163,10 @@ class SysinvClient(base.DriverBase):
]
return self.sysinv_client.ihost.update(host_id, patch)
def upgrade_host(self, host_id, force=False):
"""Invoke the API for 'system host-upgrade'"""
return self.sysinv_client.ihost.upgrade(host_id, force)
def power_on_host(self, host_id):
"""Power on a host"""
action_value = 'power-on'

View File

@@ -51,6 +51,14 @@ ADMIN_UNLOCKED = 'unlocked'
OPERATIONAL_ENABLED = 'enabled'
OPERATIONAL_DISABLED = 'disabled'
# Availability status for hosts
AVAILABILITY_AVAILABLE = 'available'
AVAILABILITY_DEGRADED = 'degraded'
# Personality of hosts
PERSONALITY_CONTROLLER_ACTIVE = 'Controller-Active'
PERSONALITY_CONTROLLER_STANDBY = 'Controller-Standby'
# Subcloud sync status
SYNC_STATUS_UNKNOWN = "unknown"
SYNC_STATUS_IN_SYNC = "in-sync"
@@ -112,13 +120,22 @@ STRATEGY_STATE_FAILED = "failed"
STRATEGY_STATE_PRE_CHECK = "pre check"
STRATEGY_STATE_INSTALLING_LICENSE = "installing license"
STRATEGY_STATE_IMPORTING_LOAD = "importing load"
STRATEGY_STATE_UPDATING_PATCHES = "updating patches"
STRATEGY_STATE_FINISHING_PATCH_STRATEGY = "finishing patch strategy"
STRATEGY_STATE_STARTING_UPGRADE = "starting upgrade"
STRATEGY_STATE_LOCKING_CONTROLLER = "locking controller"
STRATEGY_STATE_LOCKING_CONTROLLER_0 = "locking controller-0"
STRATEGY_STATE_LOCKING_CONTROLLER_1 = "locking controller-1"
STRATEGY_STATE_UPGRADING_SIMPLEX = "upgrading simplex"
STRATEGY_STATE_UPGRADING_DUPLEX = "upgrading duplex"
STRATEGY_STATE_MIGRATING_DATA = "migrating data"
STRATEGY_STATE_UNLOCKING_CONTROLLER = "unlocking controller"
STRATEGY_STATE_UNLOCKING_CONTROLLER_0 = "unlocking controller-0"
STRATEGY_STATE_UNLOCKING_CONTROLLER_1 = "unlocking controller-1"
STRATEGY_STATE_SWACTING_TO_CONTROLLER_0 = "swacting to controller-0"
STRATEGY_STATE_SWACTING_TO_CONTROLLER_1 = "swacting to controller-1"
STRATEGY_STATE_ACTIVATING_UPGRADE = "activating upgrade"
STRATEGY_STATE_COMPLETING_UPGRADE = "completing upgrade"
STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY = "creating VIM upgrade strategy"
STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY = "applying VIM upgrade strategy"
STRATEGY_STATE_DELETING_LOAD = "deleting load"
# Firmware update orchestration states
@@ -145,6 +162,15 @@ DEPLOY_STATE_DATA_MIGRATION_FAILED = 'data-migration-failed'
DEPLOY_STATE_MIGRATED = 'migrated'
DEPLOY_STATE_DONE = 'complete'
# Upgrades States
UPGRADE_STATE_DATA_MIGRATION_COMPLETE = 'data-migration-complete'
UPGRADE_STATE_DATA_MIGRATION_FAILED = 'data-migration-failed'
UPGRADE_STATE_UPGRADING_CONTROLLERS = 'upgrading-controllers'
UPGRADE_STATE_UPGRADING_HOSTS = 'upgrading-hosts'
UPGRADE_STATE_ACTIVATION_FAILED = 'activation-failed'
UPGRADE_STATE_ACTIVATION_COMPLETE = 'activation-complete'
# Alarm aggregation
ALARMS_DISABLED = "disabled"
ALARM_OK_STATUS = "OK"

View File

@@ -0,0 +1,162 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dccommon.drivers.openstack import vim
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
# Applying the vim update strategy may result in a loss of communication
# where API calls fail. The max time in this phase is 30 minutes
# (30 queries with 1 minute sleep)
DEFAULT_MAX_FAILED_QUERIES = 30
# Max time: 60 minutes = 60 queries x 60 seconds
# This is the max time for the state to change completion progress percent
DEFAULT_MAX_WAIT_ATTEMPTS = 60
# each loop while waiting for the apply will sleep for 60 seconds
WAIT_INTERVAL = 60
class ApplyingVIMStrategyState(BaseState):
"""State for applying the VIM strategy."""
def __init__(self, next_state, region_name, strategy_name):
super(ApplyingVIMStrategyState, self).__init__(
next_state=next_state, region_name=region_name)
self.strategy_name = strategy_name
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
self.wait_attempts = DEFAULT_MAX_WAIT_ATTEMPTS
self.wait_interval = WAIT_INTERVAL
def perform_state_action(self, strategy_step):
"""Apply a VIM strategy using VIM REST API
This code derives from patch orchestration: do_apply_subcloud_strategy
Any client (vim, sysinv, etc..) should be re-queried whenever used
to ensure the keystone token is up to date.
Any exceptions raised by this method set the strategy to FAILED
Returns the next state for the state machine if successful.
"""
region = self.get_region_name(strategy_step)
# query the vim strategy.
# Do not raise the default exception if there is no strategy
# because the default exception is unclear: ie: "Get strategy failed"
subcloud_strategy = self.get_vim_client(region).get_strategy(
strategy_name=self.strategy_name,
raise_error_if_missing=False)
# We have a VIM strategy, but need to check if it is ready to apply
if subcloud_strategy.state == vim.STATE_READY_TO_APPLY:
# An exception here will fail this state
subcloud_strategy = self.get_vim_client(region).apply_strategy(
strategy_name=self.strategy_name)
if subcloud_strategy.state == vim.STATE_APPLYING:
self.info_log(strategy_step, "VIM Strategy apply in progress")
elif subcloud_strategy.state == vim.STATE_APPLIED:
# Success.
self.info_log(strategy_step,
"VIM strategy has been applied")
elif subcloud_strategy.state in [vim.STATE_APPLY_FAILED,
vim.STATE_APPLY_TIMEOUT]:
# Explicit known failure states
raise Exception("VIM strategy apply failed. %s. %s"
% (subcloud_strategy.state,
subcloud_strategy.apply_phase.reason))
else:
# Other states are bad
raise Exception("VIM strategy apply failed. "
"Unexpected State: %s."
% subcloud_strategy.state)
# wait for the new strategy to apply or an existing strategy.
# Loop until the strategy applies. Repeatedly query the API
# This can take a long time.
# Waits for up to 60 minutes for the current phase or completion
# percentage to change before giving up.
wait_count = 0
get_fail_count = 0
last_details = ""
while True:
# todo(abailey): combine the sleep and stop check into one method
# which would allow the longer 60 second sleep to be broken into
# multiple smaller sleep calls
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
# break out of the loop if the max number of attempts is reached
wait_count += 1
if wait_count >= self.wait_attempts:
raise Exception("Timeout applying VIM strategy.")
# every loop we wait, even the first one
time.sleep(self.wait_interval)
# get the strategy
try:
subcloud_strategy = self.get_vim_client(region).get_strategy(
strategy_name=self.strategy_name,
raise_error_if_missing=False)
get_fail_count = 0
except Exception:
# When applying the strategy to a subcloud, the VIM can
# be unreachable for a significant period of time when
# there is a controller swact, or in the case of AIO-SX,
# when the controller reboots.
get_fail_count += 1
if get_fail_count >= self.max_failed_queries:
# We have waited too long.
raise Exception("Timeout during recovery of apply "
"VIM strategy.")
self.debug_log(strategy_step,
"Unable to get VIM strategy - "
"attempt %d" % get_fail_count)
continue
# The loop gets here if the API is able to respond
# Check if the strategy no longer exists. This should not happen.
if subcloud_strategy is None:
raise Exception("VIM strategy not found.")
elif subcloud_strategy.state == vim.STATE_APPLYING:
# Still applying. Update details if it has changed
new_details = ("%s phase is %s%% complete" % (
subcloud_strategy.current_phase,
subcloud_strategy.current_phase_completion_percentage))
if new_details != last_details:
# Progress is being made.
# Reset the counter and log the progress
last_details = new_details
wait_count = 0
self.info_log(strategy_step, new_details)
db_api.strategy_step_update(self.context,
strategy_step.subcloud_id,
details=new_details)
elif subcloud_strategy.state == vim.STATE_APPLIED:
# Success.
self.info_log(strategy_step,
"VIM strategy has been applied")
break
elif subcloud_strategy.state in [vim.STATE_APPLY_FAILED,
vim.STATE_APPLY_TIMEOUT]:
# Explicit known failure states
raise Exception("VIM strategy apply failed. %s. %s"
% (subcloud_strategy.state,
subcloud_strategy.apply_phase.reason))
else:
# Other states are bad
raise Exception("VIM strategy apply failed. "
"Unexpected State: %s."
% subcloud_strategy.state)
# end of loop
# Success, state machine can proceed to the next state
return self.next_state

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -9,6 +9,7 @@ import six
from oslo_log import log as logging
from dccommon.drivers.openstack.barbican import BarbicanClient
from dccommon.drivers.openstack.patching_v1 import PatchingClient
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.drivers.openstack.vim import VimClient
@@ -100,6 +101,10 @@ class BaseState(object):
return SysinvClient(region_name, keystone_client.session)
def get_patching_client(self, region_name=consts.DEFAULT_REGION_NAME):
keystone_client = self.get_keystone_client(region_name)
return PatchingClient(region_name, keystone_client.session)
@property
def local_sysinv(self):
return self.get_sysinv_client(consts.DEFAULT_REGION_NAME)
@@ -116,15 +121,11 @@ class BaseState(object):
return BarbicanClient(region_name, keystone_client.session)
@staticmethod
def get_vim_client(region_name):
def get_vim_client(self, region_name):
"""construct a vim client for a region."""
# If keystone client fails to initialze, raise an exception
# a cached keystone client is used if valid
os_client = OpenStackDriver(region_name=region_name,
region_clients=None)
keystone_client = self.get_keystone_client(region_name)
return VimClient(region_name,
os_client.keystone_client.session)
keystone_client.session)
@abc.abstractmethod
def perform_state_action(self, strategy_step):

View File

@@ -0,0 +1,134 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dccommon.drivers.openstack import vim
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.common import utils as dcmanager_utils
from dcmanager.orchestrator.states.base import BaseState
# Max time: 30 minutes = 180 queries x 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
class CreatingVIMStrategyState(BaseState):
"""State for creating the VIM strategy."""
def __init__(self, next_state, region_name, strategy_name):
super(CreatingVIMStrategyState, self).__init__(
next_state=next_state, region_name=region_name)
self.strategy_name = strategy_name
# max time to wait for the strategy to be built (in seconds)
# is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
def _create_vim_strategy(self, strategy_step, region):
self.info_log(strategy_step, "Creating VIM strategy")
# Get the update options
opts_dict = dcmanager_utils.get_sw_update_opts(
self.context,
for_sw_update=True,
subcloud_id=strategy_step.subcloud_id)
# Call the API to build the VIM strategy
subcloud_strategy = self.get_vim_client(region).create_strategy(
self.strategy_name,
opts_dict['storage-apply-type'],
opts_dict['worker-apply-type'],
opts_dict['max-parallel-workers'],
opts_dict['default-instance-action'],
opts_dict['alarm-restriction-type'])
# a successful API call to create MUST set the state be 'building'
if subcloud_strategy.state != vim.STATE_BUILDING:
raise Exception("Unexpected VIM strategy build state: %s"
% subcloud_strategy.state)
return subcloud_strategy
def perform_state_action(self, strategy_step):
"""Create a VIM strategy using VIM REST API
Any client (vim, sysinv, etc..) should be re-queried whenever used
to ensure the keystone token is up to date.
Any exceptions raised by this method set the strategy to FAILED
Returns the next state for the state machine if successful.
"""
region = self.get_region_name(strategy_step)
# Get the existing VIM strategy, which may be None
subcloud_strategy = self.get_vim_client(region).get_strategy(
strategy_name=self.strategy_name,
raise_error_if_missing=False)
if subcloud_strategy is None:
subcloud_strategy = self._create_vim_strategy(strategy_step,
region)
else:
self.info_log(strategy_step,
"VIM strategy exists with state: %s"
% subcloud_strategy.state)
# if a strategy exists in any type of failed state or aborted
# state it should be deleted.
# applied state should also be deleted from previous success runs.
if subcloud_strategy.state in [vim.STATE_BUILDING,
vim.STATE_APPLYING,
vim.STATE_ABORTING]:
# Can't delete a strategy in these states
message = ("Failed to create a VIM strategy for %s. "
"There already is an existing strategy in %s state"
% (region, subcloud_strategy.state))
self.warn_log(strategy_step, message)
raise Exception(message)
# if strategy exists in any other type of state, delete and create
self.info_log(strategy_step,
"Deleting existing VIM strategy")
self.get_vim_client(region).delete_strategy(
strategy_name=self.strategy_name)
# re-create it
subcloud_strategy = self._create_vim_strategy(strategy_step,
region)
# A strategy already exists, or is being built
# Loop until the strategy is done building Repeatedly query the API
counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
if counter >= self.max_queries:
raise Exception("Timeout building vim strategy. state: %s"
% subcloud_strategy.state)
counter += 1
time.sleep(self.sleep_duration)
# query the vim strategy to see if it is in the new state
subcloud_strategy = self.get_vim_client(region).get_strategy(
strategy_name=self.strategy_name,
raise_error_if_missing=True)
if subcloud_strategy.state == vim.STATE_READY_TO_APPLY:
self.info_log(strategy_step, "VIM strategy has been built")
break
elif subcloud_strategy.state == vim.STATE_BUILDING:
# This is the expected state while creating the strategy
pass
elif subcloud_strategy.state == vim.STATE_BUILD_FAILED:
raise Exception("VIM strategy build failed: %s. %s."
% (subcloud_strategy.state,
subcloud_strategy.build_phase.reason))
elif subcloud_strategy.state == vim.STATE_BUILD_TIMEOUT:
raise Exception("VIM strategy build timed out: %s."
% subcloud_strategy.state)
else:
raise Exception("VIM strategy unexpected build state: %s"
% subcloud_strategy.state)
# Success, state machine can proceed to the next state
return self.next_state

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -17,9 +17,9 @@ DEFAULT_SLEEP_DURATION = 10
class LockHostState(BaseState):
"""Orchestration state for locking a host"""
def __init__(self, region_name, hostname='controller-0'):
def __init__(self, next_state, region_name, hostname):
super(LockHostState, self).__init__(
next_state=consts.STRATEGY_STATE_UPGRADING_SIMPLEX, region_name=region_name)
next_state=next_state, region_name=region_name)
self.target_hostname = hostname
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION

View File

@@ -0,0 +1,98 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
# When a swact occurs, services become unavailable and API calls may fail.
# The max time allowed here is 30 minutes (ie: 180 queries with 10 secs sleep)
DEFAULT_MAX_FAILED_QUERIES = 120
DEFAULT_FAILED_SLEEP = 10
# Max time: 10 minutes = 60 queries x 10 seconds
DEFAULT_MAX_QUERIES = 60
DEFAULT_SLEEP_DURATION = 10
class SwactHostState(BaseState):
"""Orchestration state for host swact"""
def __init__(self, next_state, region_name, active, standby):
super(SwactHostState, self).__init__(
next_state=next_state, region_name=region_name)
self.active = active
self.standby = standby
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
def perform_state_action(self, strategy_step):
"""Swact host on the subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
region = self.get_region_name(strategy_step)
active_host = self.get_sysinv_client(region).get_host(self.active)
standby_host = self.get_sysinv_client(region).get_host(self.standby)
# if the desired active host is already the Active Controller, no need for action
if active_host.capabilities.get('Personality') == consts.PERSONALITY_CONTROLLER_ACTIVE:
msg = "Host: %s already the active controller." % (self.active)
self.info_log(strategy_step, msg)
return self.next_state
# Perform swact action
response = self.get_sysinv_client(region).swact_host(standby_host.id)
if response.task != 'Swacting':
raise Exception("Unable to swact to host %s" % self.active)
# Allow separate durations for failures and api retries
fail_counter = 0
api_counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
try:
# query the administrative state to see if it is the new state.
host = self.get_sysinv_client(region).get_host(self.active)
if host.capabilities.get('Personality') == consts.PERSONALITY_CONTROLLER_ACTIVE:
msg = "Host: %s is now the active controller." % (self.active)
self.info_log(strategy_step, msg)
break
fail_counter = 0
except Exception:
# Handle other exceptions due to being unreachable
# for a significant period of time when there is a
# controller swact
fail_counter += 1
if fail_counter >= self.max_failed_queries:
raise Exception("Timeout waiting for swact to complete")
time.sleep(self.failed_sleep_duration)
# skip the api_counter
continue
api_counter += 1
if api_counter >= self.max_queries:
raise Exception("Timeout waiting for swact to complete. "
"Please check sysinv.log on the subcloud "
"for details.")
time.sleep(self.sleep_duration)
# If we are here, the loop broke out cleanly and the action succeeded
# When we return from this method without throwing an exception, the
# state machine can proceed to the next state
# Adding a 2 minute delay before moving to the next state
self.info_log(strategy_step, "Waiting for 2 minutes before proceeding")
time.sleep(120)
return self.next_state

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -26,15 +26,23 @@ DEFAULT_API_SLEEP = 60
class UnlockHostState(BaseState):
"""Orchestration state for unlocking a host."""
def __init__(self, region_name, hostname='controller-0'):
def __init__(self, next_state, region_name, hostname):
super(UnlockHostState, self).__init__(
next_state=consts.STRATEGY_STATE_ACTIVATING_UPGRADE, region_name=region_name)
next_state=next_state, region_name=region_name)
self.target_hostname = hostname
self.max_api_queries = DEFAULT_MAX_API_QUERIES
self.api_sleep_duration = DEFAULT_API_SLEEP
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
def check_host_ready(self, host):
"""Returns True if host is unlocked, enabled and available."""
if (host.administrative == consts.ADMIN_UNLOCKED and
host.operational == consts.OPERATIONAL_ENABLED and
host.availability == consts.AVAILABILITY_AVAILABLE):
return True
def perform_state_action(self, strategy_step):
"""Unlocks a host on the subcloud
@@ -48,9 +56,11 @@ class UnlockHostState(BaseState):
host = sysinv_client.get_host(self.target_hostname)
# if the host is already in the desired state, no need for action
if host.administrative == consts.ADMIN_UNLOCKED:
msg = "Host: %s already: %s." % (self.target_hostname,
host.administrative)
if self.check_host_ready(host):
msg = "Host: %s is already: %s %s %s" % (self.target_hostname,
host.administrative,
host.operational,
host.availability)
self.info_log(strategy_step, msg)
return self.next_state
@@ -76,12 +86,12 @@ class UnlockHostState(BaseState):
# query the administrative state to see if it is the new state.
host = self.get_sysinv_client(
strategy_step.subcloud.name).get_host(self.target_hostname)
if (host.administrative == consts.ADMIN_UNLOCKED and
host.operational == consts.OPERATIONAL_ENABLED):
if self.check_host_ready(host):
# Success. Break out of the loop.
msg = "Host: %s is now: %s %s" % (self.target_hostname,
host.administrative,
host.operational)
msg = "Host: %s is now: %s %s %s" % (self.target_hostname,
host.administrative,
host.operational,
host.availability)
self.info_log(strategy_step, msg)
break
# no exception was raised so reset fail checks

View File

@@ -0,0 +1,19 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states.applying_vim_strategy \
import ApplyingVIMStrategyState
class ApplyingVIMUpgradeStrategyState(ApplyingVIMStrategyState):
"""State for applying the VIM upgrade strategy."""
def __init__(self, region_name):
super(ApplyingVIMUpgradeStrategyState, self).__init__(
next_state=consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0,
region_name=region_name,
strategy_name=vim.STRATEGY_NAME_SW_UPGRADE)

View File

@@ -0,0 +1,19 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states.creating_vim_strategy \
import CreatingVIMStrategyState
class CreatingVIMUpgradeStrategyState(CreatingVIMStrategyState):
"""State for creating the VIM upgrade strategy."""
def __init__(self, region_name):
super(CreatingVIMUpgradeStrategyState, self).__init__(
next_state=consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY,
region_name=region_name,
strategy_name=vim.STRATEGY_NAME_SW_UPGRADE)

View File

@@ -0,0 +1,93 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import patching_v1
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
# Max time: 30 minutes = 180 queries x 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
# todo(jcasteli): Refactor instead of duplicating code from patch_orch_thread.py
class FinishingPatchStrategyState(BaseState):
"""Upgrade state for finishing patch strategy"""
def __init__(self, region_name):
super(FinishingPatchStrategyState, self).__init__(
next_state=consts.STRATEGY_STATE_STARTING_UPGRADE, region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
def perform_state_action(self, strategy_step):
"""Clean up patches in this subcloud (commit, delete).
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
self.info_log(strategy_step, "Finishing patch strategy")
if strategy_step.subcloud_id is None:
# This is the SystemController. No cleanup is required.
self.info_log(strategy_step, "Skipping finish for SystemController")
return self.next_state
regionone_committed_patches = self.get_patching_client(
consts.DEFAULT_REGION_NAME).query(
state=patching_v1.PATCH_STATE_COMMITTED)
self.debug_log(strategy_step,
"regionone_committed_patches: %s" % regionone_committed_patches)
committed_patch_ids = list()
for patch_id in regionone_committed_patches.keys():
committed_patch_ids.append(patch_id)
self.debug_log(strategy_step,
"RegionOne committed_patch_ids: %s" % committed_patch_ids)
subcloud_patches = self.get_patching_client(
strategy_step.subcloud.name).query()
self.debug_log(strategy_step,
"Patches for subcloud: %s" % subcloud_patches)
patches_to_commit = list()
patches_to_delete = list()
# For this subcloud, determine which patches should be committed and
# which should be deleted. We check the patchstate here because
# patches cannot be deleted or committed if they are in a partial
# state (e.g. Partial-Apply or Partial-Remove).
subcloud_patch_ids = list(subcloud_patches.keys())
for patch_id in subcloud_patch_ids:
if subcloud_patches[patch_id]['patchstate'] == \
patching_v1.PATCH_STATE_AVAILABLE:
self.info_log(strategy_step,
"Patch %s will be deleted from subcloud" % patch_id)
patches_to_delete.append(patch_id)
elif subcloud_patches[patch_id]['patchstate'] == \
patching_v1.PATCH_STATE_APPLIED:
if patch_id in committed_patch_ids:
self.info_log(strategy_step,
"Patch %s will be committed in subcloud" % patch_id)
patches_to_commit.append(patch_id)
if patches_to_delete:
self.info_log(strategy_step, "Deleting patches %s" % patches_to_delete)
self.get_patching_client(
strategy_step.subcloud.name).delete(patches_to_delete)
if self.stopped():
raise StrategyStoppedException()
if patches_to_commit:
self.info_log(strategy_step,
"Committing patches %s in subcloud" % patches_to_commit)
self.get_patching_client(
strategy_step.subcloud.name).commit(patches_to_commit)
return self.next_state

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -22,7 +22,7 @@ class ImportingLoadState(BaseState):
def __init__(self, region_name):
super(ImportingLoadState, self).__init__(
next_state=consts.STRATEGY_STATE_STARTING_UPGRADE, region_name=region_name)
next_state=consts.STRATEGY_STATE_UPDATING_PATCHES, region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES

View File

@@ -0,0 +1,18 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.lock_host import LockHostState
class LockDuplexState(LockHostState):
"""Orchestration state for locking controller-1 host"""
def __init__(self, region_name):
super(LockDuplexState, self).__init__(
next_state=consts.STRATEGY_STATE_UPGRADING_DUPLEX,
region_name=region_name,
hostname="controller-1")

View File

@@ -0,0 +1,18 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.lock_host import LockHostState
class LockSimplexState(LockHostState):
"""Orchestration state for locking controller-0 host"""
def __init__(self, region_name):
super(LockSimplexState, self).__init__(
next_state=consts.STRATEGY_STATE_UPGRADING_SIMPLEX,
region_name=region_name,
hostname="controller-0",)

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -49,7 +49,7 @@ class MigratingDataState(BaseState):
def __init__(self, region_name):
super(MigratingDataState, self).__init__(
next_state=consts.STRATEGY_STATE_UNLOCKING_CONTROLLER, region_name=region_name)
next_state=consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_0, region_name=region_name)
self.ansible_sleep = DEFAULT_ANSIBLE_SLEEP
self.max_api_queries = DEFAULT_MAX_API_QUERIES

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -35,59 +35,69 @@ class PreCheckState(BaseState):
next_state=consts.STRATEGY_STATE_INSTALLING_LICENSE, region_name=region_name)
def _perform_subcloud_online_checks(self, strategy_step, subcloud_sysinv_client):
# check system health
#
# Sample output #1
# ================
# Some non-management affecting alarms, all other checks passed
#
# System Health:
# All hosts are provisioned: [OK]
# All hosts are unlocked/enabled: [OK]
# All hosts have current configurations: [OK]
# All hosts are patch current: [OK]
# Ceph Storage Healthy: [OK]
# No alarms: [Fail]
# [1] alarms found, [0] of which are management affecting
# All kubernetes nodes are ready: [OK]
# All kubernetes control plane pods are ready: [OK]
#
# Sample output #2
# ================
# Multiple failed checks, management affecting alarms
#
# System Health:
# All hosts are provisioned: [OK]
# All hosts are unlocked/enabled: [OK]
# All hosts have current configurations: [OK]
# All hosts are patch current: [OK]
# Ceph Storage Healthy: [Fail]
# No alarms: [Fail]
# [7] alarms found, [2] of which are management affecting
# All kubernetes nodes are ready: [OK]
# All kubernetes control plane pods are ready: [OK]
subcloud_type = self.get_sysinv_client(
strategy_step.subcloud.name).get_system().system_mode
upgrades = self.get_sysinv_client(strategy_step.subcloud.name).get_upgrades()
system_health = subcloud_sysinv_client.get_system_health()
fails = re.findall("\[Fail\]", system_health)
failed_alarm_check = re.findall("No alarms: \[Fail\]", system_health)
no_mgmt_alarms = re.findall("\[0\] of which are management affecting",
system_health)
# For duplex upgrade, we skip health checks if an upgrade is in progress.
# The only 2 health conditions acceptable for upgrade are:
# a) subcloud is completely healthy (i.e. no failed checks)
# b) subcloud only fails alarm check and it only has non-management
# affecting alarm(s)
if ((len(fails) == 0) or
(len(fails) == 1 and failed_alarm_check and no_mgmt_alarms)):
self.info_log(strategy_step, "health check passed.")
if (len(upgrades) != 0 and subcloud_type == consts.SYSTEM_MODE_DUPLEX):
self.info_log(strategy_step, "Health check skipped for non-simplex subclouds.")
else:
details = "System health check failed. Please run 'system health-query' " \
"command on the subcloud for more details."
self.error_log(strategy_step, "\n" + system_health)
raise PreCheckFailedException(
subcloud=strategy_step.subcloud.name,
details=details,
)
# check system health
#
# Sample output #1
# ================
# Some non-management affecting alarms, all other checks passed
#
# System Health:
# All hosts are provisioned: [OK]
# All hosts are unlocked/enabled: [OK]
# All hosts have current configurations: [OK]
# All hosts are patch current: [OK]
# Ceph Storage Healthy: [OK]
# No alarms: [Fail]
# [1] alarms found, [0] of which are management affecting
# All kubernetes nodes are ready: [OK]
# All kubernetes control plane pods are ready: [OK]
#
# Sample output #2
# ================
# Multiple failed checks, management affecting alarms
#
# System Health:
# All hosts are provisioned: [OK]
# All hosts are unlocked/enabled: [OK]
# All hosts have current configurations: [OK]
# All hosts are patch current: [OK]
# Ceph Storage Healthy: [Fail]
# No alarms: [Fail]
# [7] alarms found, [2] of which are management affecting
# All kubernetes nodes are ready: [OK]
# All kubernetes control plane pods are ready: [OK]
system_health = subcloud_sysinv_client.get_system_health()
fails = re.findall("\[Fail\]", system_health)
failed_alarm_check = re.findall("No alarms: \[Fail\]", system_health)
no_mgmt_alarms = re.findall("\[0\] of which are management affecting",
system_health)
# The only 2 health conditions acceptable for simplex upgrade are:
# a) subcloud is completely healthy (i.e. no failed checks)
# b) subcloud only fails alarm check and it only has non-management
# affecting alarm(s)
if ((len(fails) == 0) or
(len(fails) == 1 and failed_alarm_check and no_mgmt_alarms)):
self.info_log(strategy_step, "Health check passed.")
else:
details = "System health check failed. " \
"Please run 'system health-query' " \
"command on the subcloud for more details."
self.error_log(strategy_step, "\n" + system_health)
raise PreCheckFailedException(
subcloud=strategy_step.subcloud.name,
details=details,
)
# check scratch
host = subcloud_sysinv_client.get_host("controller-0")
@@ -108,10 +118,13 @@ class PreCheckState(BaseState):
Check the deploy_status and transfer to the correct state.
if an unsupported deploy_status is encountered, fail the upgrade
"""
subcloud_type = self.get_sysinv_client(
strategy_step.subcloud.name).get_system().system_mode
subcloud = db_api.subcloud_get(self.context, strategy_step.subcloud.id)
# check presence of data_install values. These are managed
# semantically on subcloud add or update
if not subcloud.data_install:
if subcloud_type == consts.SYSTEM_MODE_SIMPLEX and not subcloud.data_install:
details = ("Data install values are missing and must be updated "
"via dcmanager subcloud update")
raise PreCheckFailedException(
@@ -152,12 +165,71 @@ class PreCheckState(BaseState):
if subcloud.deploy_status == consts.DEPLOY_STATE_MIGRATED:
self.override_next_state(consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
if (subcloud_type == consts.SYSTEM_MODE_DUPLEX and
subcloud.deploy_status == consts.DEPLOY_STATE_DONE):
upgrades = self.get_sysinv_client(strategy_step.subcloud.name).get_upgrades()
if len(upgrades) != 0:
upgrade_state = upgrades[0].state
if(upgrade_state == consts.UPGRADE_STATE_UPGRADING_CONTROLLERS):
# At this point the subcloud is duplex, deploy state is complete
# and "system upgrade-show" on the subcloud indicates that the
# upgrade state is "upgrading-controllers".
# If controller-0 is the active controller, we need to swact
# else we can proceed to create the VIM strategy.
if host.capabilities.get('Personality') == consts.PERSONALITY_CONTROLLER_ACTIVE:
self.override_next_state(
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1)
else:
self.override_next_state(
consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY)
elif(upgrade_state == consts.UPGRADE_STATE_UPGRADING_HOSTS):
# At this point the subcloud is duplex, deploy state is complete
# and "system upgrade-show" on the subcloud indicates that the
# upgrade state is "upgrading-hosts".
# If both subcloud hosts are upgraded to the newer load,
# we resume the state machine from activate upgrade state.
# Otherwise, we resume from create the VIM strategy state.
# determine the version of the system controller in region one
target_version = self.get_sysinv_client(consts.DEFAULT_REGION_NAME).\
get_system().software_version
all_hosts_upgraded = True
subcloud_hosts = self.get_sysinv_client(
strategy_step.subcloud.name).get_hosts()
for subcloud_host in subcloud_hosts:
if(subcloud_host.software_load != target_version):
all_hosts_upgraded = False
self.override_next_state(
consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY)
if all_hosts_upgraded:
if host.capabilities.get('Personality') == consts.PERSONALITY_CONTROLLER_ACTIVE:
self.override_next_state(
consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
else:
self.override_next_state(
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0)
elif(upgrade_state == consts.UPGRADE_STATE_ACTIVATION_FAILED):
if(host.capabilities.get('Personality') == consts.PERSONALITY_CONTROLLER_ACTIVE):
self.override_next_state(
consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
else:
self.override_next_state(
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0)
elif(upgrade_state == consts.UPGRADE_STATE_ACTIVATION_COMPLETE):
self.override_next_state(consts.STRATEGY_STATE_COMPLETING_UPGRADE)
return self.next_state
# it is offline.
if subcloud.deploy_status in VALID_UPGRADE_STATES:
self.override_next_state(consts.STRATEGY_STATE_UPGRADING_SIMPLEX)
return self.next_state
if subcloud_type == consts.SYSTEM_MODE_SIMPLEX:
self.override_next_state(consts.STRATEGY_STATE_UPGRADING_SIMPLEX)
return self.next_state
else:
self.override_next_state(consts.STRATEGY_STATE_UPGRADING_DUPLEX)
return self.next_state
elif subcloud.deploy_status in VALID_MIGRATE_DATA_STATES:
self.override_next_state(consts.STRATEGY_STATE_MIGRATING_DATA)

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -22,8 +22,16 @@ class StartingUpgradeState(BaseState):
"""Upgrade state for starting an upgrade on a subcloud"""
def __init__(self, region_name):
super(StartingUpgradeState, self).__init__(
next_state=consts.STRATEGY_STATE_LOCKING_CONTROLLER, region_name=region_name)
subcloud_type = self.get_sysinv_client(
region_name).get_system().system_mode
if subcloud_type == consts.SYSTEM_MODE_SIMPLEX:
super(StartingUpgradeState, self).__init__(
next_state=consts.STRATEGY_STATE_LOCKING_CONTROLLER_0,
region_name=region_name)
else:
super(StartingUpgradeState, self).__init__(
next_state=consts.STRATEGY_STATE_LOCKING_CONTROLLER_1,
region_name=region_name)
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES

View File

@@ -0,0 +1,18 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.swact_host import SwactHostState
class SwactToController0State(SwactHostState):
"""Orchestration state for swacting to controller-0 host"""
def __init__(self, region_name):
super(SwactToController0State, self).__init__(
next_state=consts.STRATEGY_STATE_ACTIVATING_UPGRADE,
region_name=region_name,
active="controller-0",
standby="controller-1",)

View File

@@ -0,0 +1,18 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.swact_host import SwactHostState
class SwactToController1State(SwactHostState):
"""Orchestration state for swacting to controller-1 host"""
def __init__(self, region_name):
super(SwactToController1State, self).__init__(
next_state=consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY,
region_name=region_name,
active="controller-1",
standby="controller-0")

View File

@@ -0,0 +1,18 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.unlock_host import UnlockHostState
class UnlockDuplexState(UnlockHostState):
"""Orchestration state for unlocking controller-1 host"""
def __init__(self, region_name):
super(UnlockDuplexState, self).__init__(
next_state=consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1,
region_name=region_name,
hostname="controller-1")

View File

@@ -0,0 +1,18 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.orchestrator.states.unlock_host import UnlockHostState
class UnlockSimplexState(UnlockHostState):
"""Orchestration state for unlocking controller-0 host"""
def __init__(self, region_name):
super(UnlockSimplexState, self).__init__(
next_state=consts.STRATEGY_STATE_ACTIVATING_UPGRADE,
region_name=region_name,
hostname="controller-0")

View File

@@ -0,0 +1,205 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import os
import time
from dccommon.drivers.openstack import patching_v1
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.common import utils
from dcmanager.orchestrator.states.base import BaseState
# Max time: 30 minutes = 180 queries x 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
LOAD_IMPORT_REQUEST_TYPE = 'import'
LOAD_DELETE_REQUEST_TYPE = 'delete'
class UpdatingPatchesState(BaseState):
"""Upgrade state for updating patches"""
def __init__(self, region_name):
super(UpdatingPatchesState, self).__init__(
next_state=consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY,
region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
# todo(jcasteli): Refactor instead of duplicating code from patch_orch_thread.py
def perform_state_action(self, strategy_step):
"""Update patches in this subcloud that need to be applied and
removed to match the applied patches in RegionOne
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
This code is based on patch orchestration.
"""
self.info_log(strategy_step, "Updating patches")
if strategy_step.subcloud_id is None:
# This is the SystemController. It is the master so no update
# is necessary.
self.info_log(strategy_step,
"Skipping update patches for SystemController")
return self.next_state
# First query RegionOne to determine what patches should be applied.
regionone_patches = self.get_patching_client(consts.DEFAULT_REGION_NAME).query()
self.debug_log(strategy_step, "regionone_patches: %s" % regionone_patches)
# Build lists of patches that should be applied in this subcloud,
# based on their state in RegionOne. Check repostate (not patchstate)
# as we only care if the patch has been applied to the repo (not
# whether it is installed on the hosts). If we were to check the
# patchstate, we could end up removing patches from this subcloud
# just because a single host in RegionOne reported that it was not
# patch current.
applied_patch_ids = list()
for patch_id in regionone_patches.keys():
if regionone_patches[patch_id]['repostate'] in [
patching_v1.PATCH_STATE_APPLIED,
patching_v1.PATCH_STATE_COMMITTED]:
applied_patch_ids.append(patch_id)
self.debug_log(strategy_step, "RegionOne applied_patch_ids: %s" % applied_patch_ids)
region = self.get_region_name(strategy_step)
# Retrieve all the patches that are present in this subcloud.
subcloud_patches = self.get_patching_client(region).query()
self.debug_log(strategy_step, "Patches for subcloud: %s" %
(subcloud_patches))
# Determine which loads are present in this subcloud. During an
# upgrade, there will be more than one load installed.
loads = self.get_sysinv_client(region).get_loads()
installed_loads = utils.get_loads_for_patching(loads)
patches_to_upload = list()
patches_to_apply = list()
patches_to_remove = list()
# Figure out which patches in this subcloud need to be applied and
# removed to match the applied patches in RegionOne. Check the
# repostate, which indicates whether it is applied or removed in
# the repo.
subcloud_patch_ids = list(subcloud_patches.keys())
for patch_id in subcloud_patch_ids:
if subcloud_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_APPLIED:
if patch_id not in applied_patch_ids:
self.info_log(strategy_step,
"Patch %s will be removed from subcloud" %
(patch_id))
patches_to_remove.append(patch_id)
elif subcloud_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_COMMITTED:
if patch_id not in applied_patch_ids:
message = ("Patch %s is committed in subcloud but "
"not applied in SystemController" %
patch_id)
self.warn_log(strategy_step, message)
raise Exception(message)
elif subcloud_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_AVAILABLE:
if patch_id in applied_patch_ids:
self.info_log(strategy_step,
"Patch %s will be applied to subcloud" %
(patch_id))
patches_to_apply.append(patch_id)
else:
# This patch is in an invalid state
message = ('Patch %s in subcloud in unexpected state %s' %
(patch_id,
subcloud_patches[patch_id]['repostate']))
self.warn_log(strategy_step, message)
raise Exception(message)
# Check that all applied patches in RegionOne are present in the
# subcloud.
for patch_id in applied_patch_ids:
if regionone_patches[patch_id]['sw_version'] in \
installed_loads and patch_id not in subcloud_patch_ids:
self.info_log(strategy_step,
"Patch %s missing from subcloud" %
(patch_id))
patches_to_upload.append(patch_id)
patches_to_apply.append(patch_id)
if patches_to_remove:
self.info_log(strategy_step,
"Removing patches %s from subcloud" %
(patches_to_remove))
self.get_patching_client(region).remove(patches_to_remove)
if patches_to_upload:
self.info_log(strategy_step,
"Uploading patches %s to subcloud" %
(patches_to_upload))
for patch in patches_to_upload:
patch_sw_version = regionone_patches[patch]['sw_version']
patch_file = "%s/%s/%s.patch" % (consts.PATCH_VAULT_DIR,
patch_sw_version,
patch)
if not os.path.isfile(patch_file):
message = ('Patch file %s is missing' % patch_file)
self.error_log(strategy_step, message)
raise Exception(message)
self.get_patching_client(region).upload([patch_file])
if self.stopped():
self.info_log(strategy_step,
"Exiting because task is stopped")
raise StrategyStoppedException()
if patches_to_apply:
self.info_log(strategy_step,
"Applying patches %s to subcloud" %
(patches_to_apply))
self.get_patching_client(region).apply(patches_to_apply)
# Now that we have applied/removed/uploaded patches, we need to give
# the patch controller on this subcloud time to determine whether
# each host on that subcloud is patch current.
wait_count = 0
while True:
subcloud_hosts = self.get_patching_client(
region).query_hosts()
self.debug_log(strategy_step,
"query_hosts for subcloud: %s" % subcloud_hosts)
for host in subcloud_hosts:
if host['interim_state']:
# This host is not yet ready.
self.debug_log(strategy_step,
"Host %s in subcloud in interim state" %
(host["hostname"]))
break
else:
# All hosts in the subcloud are updated
break
wait_count += 1
if wait_count >= 6:
# We have waited at least 60 seconds. This is too long. We
# will just log it and move on without failing the step.
message = ("Too much time expired after applying patches to "
"subcloud - continuing.")
self.warn_log(strategy_step, message)
break
if self.stopped():
self.info_log(strategy_step, "Exiting because task is stopped")
raise StrategyStoppedException()
# Wait 10 seconds before doing another query.
time.sleep(10)
return self.next_state

View File

@@ -0,0 +1,102 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.orchestrator.states.base import BaseState
# When an unlock occurs, a reboot is triggered. During reboot, API calls fail.
# The max time allowed here is 30 minutes (ie: 180 queries with 10 secs sleep)
DEFAULT_MAX_FAILED_QUERIES = 180
DEFAULT_FAILED_SLEEP = 10
# Max time: 30 minutes = 180 queries x 10 seconds
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
class UpgradingDuplexState(BaseState):
"""Update state for upgrading a non-simplex subcloud host"""
def __init__(self, region_name):
super(UpgradingDuplexState, self).__init__(
next_state=consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1, region_name=region_name)
self.target_hostname = "controller-1"
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
def perform_state_action(self, strategy_step):
"""Upgrade a duplex host on a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
self.info_log(strategy_step, "Performing duplex upgrade for subcloud")
region = self.get_region_name(strategy_step)
host = self.get_sysinv_client(
region).get_host(self.target_hostname)
self.get_sysinv_client(region).upgrade_host(host.id)
# Wait for controller-1 to reinstall with the load N+1
# and become locked-disabled-online state.
# this action is asynchronous, query until it completes or times out
# Allow separate durations for failures (ie: reboot) and api retries
fail_counter = 0
api_counter = 0
while True:
# If event handler stop has been triggered, fail the state
if self.stopped():
raise StrategyStoppedException()
try:
upgrades = self.get_sysinv_client(region).get_upgrades()
if len(upgrades) != 0:
if (upgrades[0].state == consts.UPGRADE_STATE_DATA_MIGRATION_FAILED or
upgrades[0].state == consts.UPGRADE_STATE_DATA_MIGRATION_COMPLETE):
msg = "Upgrade state is %s now" % (upgrades[0].state)
self.info_log(strategy_step, msg)
break
fail_counter = 0
except Exception:
# Handle other exceptions due to being unreachable
# for a significant period of time when there is a
# controller swact
fail_counter += 1
if fail_counter >= self.max_failed_queries:
raise Exception("Timeout waiting for reboot to complete")
time.sleep(self.failed_sleep_duration)
# skip the api_counter
continue
api_counter += 1
if api_counter >= self.max_queries:
raise Exception("Timeout waiting for update state to be updated to "
"updated to 'data-migration-failed' or 'data-migration-complete'."
"Please check sysinv.log on the subcloud "
"for details.")
time.sleep(self.sleep_duration)
# If the upgrade state is 'data-migration-complete' we move to the
# next state, else if it is 'data-migration-failed' we go to the failed
# state.
upgrades = self.get_sysinv_client(region).get_upgrades()
if len(upgrades) == 0:
raise Exception("No upgrades were found")
# The list of upgrades will never contain more than one entry.
if upgrades[0].state == 'data-migration-failed':
raise Exception("Data migration failed on host %s" % self.target_hostname)
# If we reach at this point, the upgrade state is 'data-migration-complete'
# and we can move to the next state.
return self.next_state

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#

View File

@@ -23,26 +23,44 @@ import datetime
import threading
import time
from keystoneauth1 import exceptions as keystone_exceptions
from oslo_log import log as logging
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common import scheduler
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.lock_host import LockHostState
from dcmanager.orchestrator.states.unlock_host import UnlockHostState
from dcmanager.orchestrator.states.upgrade.activating import ActivatingUpgradeState
from dcmanager.orchestrator.states.upgrade.applying_vim_upgrade_strategy \
import ApplyingVIMUpgradeStrategyState
from dcmanager.orchestrator.states.upgrade.completing import CompletingUpgradeState
from dcmanager.orchestrator.states.upgrade.creating_vim_upgrade_strategy \
import CreatingVIMUpgradeStrategyState
from dcmanager.orchestrator.states.upgrade.deleting_load import DeletingLoadState
from dcmanager.orchestrator.states.upgrade.finishing_patch_strategy \
import FinishingPatchStrategyState
from dcmanager.orchestrator.states.upgrade.importing_load import ImportingLoadState
from dcmanager.orchestrator.states.upgrade.installing_license \
import InstallingLicenseState
from dcmanager.orchestrator.states.upgrade.lock_duplex import LockDuplexState
from dcmanager.orchestrator.states.upgrade.lock_simplex import LockSimplexState
from dcmanager.orchestrator.states.upgrade.migrating_data \
import MigratingDataState
from dcmanager.orchestrator.states.upgrade.pre_check import PreCheckState
from dcmanager.orchestrator.states.upgrade.starting_upgrade \
import StartingUpgradeState
from dcmanager.orchestrator.states.upgrade.swact_to_controller_0 \
import SwactToController0State
from dcmanager.orchestrator.states.upgrade.swact_to_controller_1 \
import SwactToController1State
from dcmanager.orchestrator.states.upgrade.unlock_duplex import UnlockDuplexState
from dcmanager.orchestrator.states.upgrade.unlock_simplex import UnlockSimplexState
from dcmanager.orchestrator.states.upgrade.updating_patches import UpdatingPatchesState
from dcmanager.orchestrator.states.upgrade.upgrading_duplex \
import UpgradingDuplexState
from dcmanager.orchestrator.states.upgrade.upgrading_simplex \
import UpgradingSimplexState
@@ -53,13 +71,24 @@ STATE_OPERATORS = {
consts.STRATEGY_STATE_PRE_CHECK: PreCheckState,
consts.STRATEGY_STATE_INSTALLING_LICENSE: InstallingLicenseState,
consts.STRATEGY_STATE_IMPORTING_LOAD: ImportingLoadState,
consts.STRATEGY_STATE_UPDATING_PATCHES: UpdatingPatchesState,
consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY: FinishingPatchStrategyState,
consts.STRATEGY_STATE_STARTING_UPGRADE: StartingUpgradeState,
consts.STRATEGY_STATE_LOCKING_CONTROLLER: LockHostState,
consts.STRATEGY_STATE_LOCKING_CONTROLLER_0: LockSimplexState,
consts.STRATEGY_STATE_LOCKING_CONTROLLER_1: LockDuplexState,
consts.STRATEGY_STATE_UPGRADING_SIMPLEX: UpgradingSimplexState,
consts.STRATEGY_STATE_UPGRADING_DUPLEX: UpgradingDuplexState,
consts.STRATEGY_STATE_MIGRATING_DATA: MigratingDataState,
consts.STRATEGY_STATE_UNLOCKING_CONTROLLER: UnlockHostState,
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0: SwactToController0State,
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1: SwactToController1State,
consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_0: UnlockSimplexState,
consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1: UnlockDuplexState,
consts.STRATEGY_STATE_ACTIVATING_UPGRADE: ActivatingUpgradeState,
consts.STRATEGY_STATE_COMPLETING_UPGRADE: CompletingUpgradeState,
consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY:
CreatingVIMUpgradeStrategyState,
consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY:
ApplyingVIMUpgradeStrategyState,
consts.STRATEGY_STATE_DELETING_LOAD: DeletingLoadState,
}
@@ -119,6 +148,22 @@ class SwUpgradeOrchThread(threading.Thread):
return consts.DEFAULT_REGION_NAME
return strategy_step.subcloud.name
@staticmethod
def get_ks_client(region_name=consts.DEFAULT_REGION_NAME):
"""This will get a cached keystone client (and token)"""
try:
os_client = OpenStackDriver(
region_name=region_name,
region_clients=None)
return os_client.keystone_client
except Exception:
LOG.warn('Failure initializing KeystoneClient')
raise
def get_vim_client(self, region_name=consts.DEFAULT_REGION_NAME):
ks_client = self.get_ks_client(region_name)
return vim.VimClient(region_name, ks_client.session)
@staticmethod
def format_update_details(last_state, info):
# include the last state, since the current state is likely 'failed'
@@ -362,8 +407,14 @@ class SwUpgradeOrchThread(threading.Thread):
LOG.info("Deleting upgrade strategy")
# todo(abailey): determine if we should validate the strategy_steps
# before allowing the delete
strategy_steps = db_api.strategy_step_get_all(self.context)
for strategy_step in strategy_steps:
self.delete_subcloud_strategy(strategy_step)
if self.stopped():
LOG.info("Exiting because task is stopped")
return
# Remove the strategy from the database
try:
@@ -373,6 +424,50 @@ class SwUpgradeOrchThread(threading.Thread):
LOG.exception(e)
raise e
# todo(abailey): refactor delete to reuse patch orch code
def delete_subcloud_strategy(self, strategy_step):
"""Delete the vim strategy in this subcloud"""
strategy_name = vim.STRATEGY_NAME_FW_UPDATE
region = self.get_region_name(strategy_step)
LOG.info("Deleting vim strategy %s for %s" % (strategy_name, region))
# First check if the strategy has been created.
try:
subcloud_strategy = self.get_vim_client(region).get_strategy(
strategy_name=strategy_name)
except (keystone_exceptions.EndpointNotFound, IndexError):
message = ("Endpoint for subcloud: %s not found." %
region)
LOG.error(message)
self.strategy_step_update(
strategy_step.subcloud_id,
state=consts.STRATEGY_STATE_FAILED,
details=message)
return
except Exception:
# Strategy doesn't exist so there is nothing to do
return
if subcloud_strategy.state in [vim.STATE_BUILDING,
vim.STATE_APPLYING,
vim.STATE_ABORTING]:
# Can't delete a strategy in these states
message = ("Strategy for %s in wrong state (%s)for delete" %
(region, subcloud_strategy.state))
LOG.warn(message)
raise Exception(message)
# If we are here, we need to delete the strategy
try:
self.get_vim_client(region).delete_strategy(
strategy_name=strategy_name)
except Exception:
message = "Strategy delete failed for %s" % region
LOG.warn(message)
raise
def process_upgrade_step(self, region, strategy_step, log_error=False):
"""manage the green thread for calling perform_state_action"""
if region in self.subcloud_workers:

View File

@@ -29,7 +29,8 @@ class FakeController(object):
availability=consts.AVAILABILITY_ONLINE,
ihost_action=None,
target_load=UPGRADED_VERSION,
task=None):
task=None,
capabilities={"Personality": "Controller-Active"}):
self.uuid = str(uuid.uuid4())
self.id = host_id
self.hostname = hostname
@@ -39,6 +40,7 @@ class FakeController(object):
self.ihost_action = ihost_action
self.target_load = target_load
self.task = task
self.capabilities = capabilities
class FakeDevice(object):
@@ -126,6 +128,7 @@ class FakeSubcloud(object):
def __init__(self,
subcloud_id=1,
name='subcloud1',
group_id=1,
description='subcloud',
location='A location',
software_version=PREVIOUS_VERSION,
@@ -135,6 +138,7 @@ class FakeSubcloud(object):
data_install=FAKE_SUBCLOUD_INSTALL_VALUES):
self.id = subcloud_id
self.name = name
self.group_id = group_id
self.description = description
self.location = location
self.software_version = software_version
@@ -160,6 +164,11 @@ class FakeSysinvClient(object):
pass
class FakePatchingClient(object):
def __init__(self):
pass
class FakeSystem(object):
def __init__(self,
obj_id=1,

View File

@@ -0,0 +1,182 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states import applying_vim_strategy
from dcmanager.tests.unit.fakes import FakeVimStrategy
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
STRATEGY_READY_TO_APPLY = FakeVimStrategy(state=vim.STATE_READY_TO_APPLY)
STRATEGY_APPLYING = FakeVimStrategy(state=vim.STATE_APPLYING)
STRATEGY_APPLIED = FakeVimStrategy(state=vim.STATE_APPLIED)
STRATEGY_APPLY_FAILED = FakeVimStrategy(vim.STATE_APPLY_FAILED)
@mock.patch("dcmanager.orchestrator.states.applying_vim_strategy."
"DEFAULT_MAX_FAILED_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.applying_vim_strategy."
"DEFAULT_MAX_WAIT_ATTEMPTS", 3)
@mock.patch("dcmanager.orchestrator.states.applying_vim_strategy."
"WAIT_INTERVAL", 1)
class TestSwUpgradeApplyingVIMStrategyStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeApplyingVIMStrategyStage, self).setUp()
# set the next state in the chain (when this state is successful)
self.on_success_state = \
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY)
# Add mock API endpoints for client calls invcked by this state
self.vim_client.get_strategy = mock.MagicMock()
self.vim_client.apply_strategy = mock.MagicMock()
def test_applying_vim_strategy_success(self):
"""Test applying a VIM strategy that succeeds"""
# first api query is before the apply
# remaining api query results are after the apply is invoked
self.vim_client.get_strategy.side_effect = [
STRATEGY_READY_TO_APPLY,
STRATEGY_APPLYING,
STRATEGY_APPLIED,
]
# API calls acts as expected
self.vim_client.apply_strategy.return_value = STRATEGY_APPLYING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Successful promotion to next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_applying_vim_strategy_raises_exception(self):
"""Test applying a VIM strategy that raises an exception"""
# first api query is before the apply
self.vim_client.get_strategy.return_value = STRATEGY_READY_TO_APPLY
# raise an exception during apply_strategy
self.vim_client.apply_strategy.side_effect =\
Exception("HTTPBadRequest: this is a fake exception")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_applying_vim_strategy_fails_apply_immediately(self):
"""Test applying a VIM strategy that returns a failed result"""
# first api query is before the apply
self.vim_client.get_strategy.return_value = STRATEGY_READY_TO_APPLY
# return a failed strategy
self.vim_client.apply_strategy.return_value = STRATEGY_APPLY_FAILED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_applying_vim_strategy_fails_apply_later(self):
"""Test applying a VIM strategy that starts to apply but then fails"""
# first api query is before the apply
self.vim_client.get_strategy.side_effect = [
STRATEGY_READY_TO_APPLY,
STRATEGY_APPLYING,
STRATEGY_APPLY_FAILED,
]
# API calls acts as expected
self.vim_client.apply_strategy.return_value = STRATEGY_APPLYING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_applying_vim_strategy_timeout(self):
"""Test applying a VIM strategy that times out"""
# first api query is before the apply
# test where it never progresses past 'applying'
self.vim_client.get_strategy.side_effect = itertools.chain(
[STRATEGY_READY_TO_APPLY, ], itertools.repeat(STRATEGY_APPLYING))
# API calls acts as expected
self.vim_client.apply_strategy.return_value = STRATEGY_APPLYING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the max number of queries was attempted (plus 1 before loop)
self.assertEqual(applying_vim_strategy.DEFAULT_MAX_WAIT_ATTEMPTS + 1,
self.vim_client.get_strategy.call_count)
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_applying_vim_strategy_already_applying_and_completes(self):
"""Test applying a VIM strategy while one already is applying"""
# first api query is what already exists in applying state
# remainder are during the loop
self.vim_client.get_strategy.side_effect = [
STRATEGY_APPLYING,
STRATEGY_APPLIED,
]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# apply_strategy API call should never be invoked
self.vim_client.apply_strategy.assert_not_called()
# SUCCESS case
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_applying_vim_strategy_already_exists_and_is_broken(self):
"""Test applying a VIM strategy while a broken strategy exists"""
# first api query is what already exists
# remainder are during the loop
self.vim_client.get_strategy.side_effect = [
STRATEGY_APPLY_FAILED,
]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# apply API call should never be invoked
self.vim_client.apply_strategy.assert_not_called()
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@@ -0,0 +1,187 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states import creating_vim_strategy
from dcmanager.tests.unit.fakes import FakeVimStrategy
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
STRATEGY_BUILDING = FakeVimStrategy(state=vim.STATE_BUILDING)
STRATEGY_DONE_BUILDING = FakeVimStrategy(state=vim.STATE_READY_TO_APPLY)
STRATEGY_FAILED_BUILDING = FakeVimStrategy(vim.STATE_BUILD_FAILED)
@mock.patch("dcmanager.orchestrator.states.creating_vim_strategy."
"DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.creating_vim_strategy."
"DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeCreatingVIMStrategyStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeCreatingVIMStrategyStage, self).setUp()
# set the next state in the chain (when this state is successful)
self.on_success_state =\
consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY)
# Add mock API endpoints for sysinv client calls invcked by this state
self.vim_client.create_strategy = mock.MagicMock()
self.vim_client.delete_strategy = mock.MagicMock()
self.vim_client.get_strategy = mock.MagicMock()
def test_creating_vim_strategy_success(self):
"""Test creating a VIM strategy"""
# first api query is before the create
# remaining api query results are waiting for the strategy to build
self.vim_client.get_strategy.side_effect = [
None,
STRATEGY_BUILDING,
STRATEGY_DONE_BUILDING,
]
# API calls acts as expected
self.vim_client.create_strategy.return_value = STRATEGY_BUILDING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Successful promotion to next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_creating_vim_strategy_raises_exception(self):
"""Test creating a VIM strategy that raises an exception"""
# first api query is before the create
self.vim_client.get_strategy.return_value = None
# raise an exception during create_strategy
self.vim_client.create_strategy.side_effect =\
Exception("HTTPBadRequest: this is a fake exception")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_creating_vim_strategy_fails_create_immediately(self):
"""Test creating a VIM strategy that returns a failed create"""
# first api query is before the create
self.vim_client.get_strategy.return_value = None
# return a failed strategy
self.vim_client.create_strategy.return_value = STRATEGY_FAILED_BUILDING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_creating_vim_strategy_fails_create_later(self):
"""Test creating a VIM strategy that starts to build but then fails"""
# first api query is before the create
self.vim_client.get_strategy.side_effect = [
None,
STRATEGY_BUILDING,
STRATEGY_FAILED_BUILDING,
]
# API calls acts as expected
self.vim_client.create_strategy.return_value = STRATEGY_BUILDING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_creating_vim_strategy_timeout(self):
"""Test creating a VIM strategy that times out"""
# first api query is before the create
self.vim_client.get_strategy.side_effect = itertools.chain(
[None, ], itertools.repeat(STRATEGY_BUILDING))
# API calls acts as expected
self.vim_client.create_strategy.return_value = STRATEGY_BUILDING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the max number of queries was attempted (plus 1)
self.assertEqual(creating_vim_strategy.DEFAULT_MAX_QUERIES + 1,
self.vim_client.get_strategy.call_count)
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_creating_vim_strategy_already_exists_and_completes(self):
"""Test creating a VIM strategy while one already exists"""
# first api query is what already exists
# If it is not building, aborting or applying it should be deleted
# remainder are during the loop
self.vim_client.get_strategy.side_effect = [
STRATEGY_FAILED_BUILDING, # old strategy that gets deleted
STRATEGY_BUILDING, # new strategy gets built
STRATEGY_DONE_BUILDING, # new strategy succeeds during while loop
]
# The strategy should be deleted and then created
self.vim_client.create_strategy.return_value = STRATEGY_BUILDING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# delete API should have been invoked
self.assertEqual(1, self.vim_client.delete_strategy.call_count)
# create API call should be invoked
self.assertEqual(1, self.vim_client.create_strategy.call_count)
# SUCCESS case
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_creating_vim_strategy_already_exists_and_is_broken(self):
"""Test creating a VIM strategy while a broken strategy exists"""
# first api query is what already exists
# remainder are during the loop
self.vim_client.get_strategy.side_effect = [
STRATEGY_BUILDING,
]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# create API call should never be invoked
self.vim_client.create_strategy.assert_not_called()
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@@ -0,0 +1,95 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
REGION_ONE_PATCHES = {'DC.2': {'sw_version': '17.07',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.3': {'sw_version': '17.07',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.4': {'sw_version': '17.07',
'repostate': 'Committed',
'patchstate': 'Committed'},
}
SUBCLOUD_PATCHES = {'DC.1': {'sw_version': '17.07',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '17.07',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.3': {'sw_version': '17.07',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.4': {'sw_version': '17.07',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.5': {'sw_version': '17.07',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.6': {'sw_version': '17.07',
'repostate': 'Available',
'patchstate': 'Available'},
}
def compare_call_with_unsorted_list(call, unsorted_list):
call_args, _ = call
return call_args[0].sort() == unsorted_list.sort()
@mock.patch("dcmanager.orchestrator.states.upgrade.finishing_patch_strategy"
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.finishing_patch_strategy"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeFinishingPatchStrategyStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeFinishingPatchStrategyStage, self).setUp()
# next state after 'finishing patch strategy' is 'starting upgrade'
self.on_success_state = consts.STRATEGY_STATE_STARTING_UPGRADE
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = \
self.setup_strategy_step(consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY)
# Add mock API endpoints for patching client calls invoked by this state
self.patching_client.query = mock.MagicMock()
self.patching_client.delete = mock.MagicMock()
self.patching_client.commit = mock.MagicMock()
def test_finishing_patch_strategy_success(self):
"""Test finishing_patch_strategy where the API call succeeds."""
self.patching_client.query.side_effect = [
REGION_ONE_PATCHES,
SUBCLOUD_PATCHES,
]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
assert(compare_call_with_unsorted_list(
self.patching_client.delete.call_args_list[0],
['DC.5', 'DC.6']
))
assert(compare_call_with_unsorted_list(
self.patching_client.commit.call_args_list[0],
['DC.2', 'DC.3']
))
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)

View File

@@ -99,8 +99,8 @@ class TestSwUpgradeImportingLoadStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeImportingLoadStage, self).setUp()
# next state after 'importing load' is 'starting upgrade'
self.on_success_state = consts.STRATEGY_STATE_STARTING_UPGRADE
# next state after 'importing load' is 'updating patches'
self.on_success_state = consts.STRATEGY_STATE_UPDATING_PATCHES
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()

View File

@@ -13,23 +13,15 @@ from dcmanager.tests.unit.orchestrator.states.fakes import FakeController
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
CONTROLLER_0_UNLOCKED = FakeController(administrative=consts.ADMIN_UNLOCKED)
CONTROLLER_0_LOCKED = FakeController(administrative=consts.ADMIN_LOCKED)
CONTROLLER_0_LOCKING = FakeController(administrative=consts.ADMIN_UNLOCKED,
ihost_action='lock',
task='Locking')
CONTROLLER_0_LOCKING_FAILED = \
FakeController(administrative=consts.ADMIN_UNLOCKED,
ihost_action='force-swact',
task='Swacting')
@mock.patch("dcmanager.orchestrator.states.lock_host.DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.lock_host.DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeLockControllerStage(TestSwUpgradeState):
class TestSwUpgradeLockSimplexStage(TestSwUpgradeState):
state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_0
def setUp(self):
super(TestSwUpgradeLockControllerStage, self).setUp()
super(TestSwUpgradeLockSimplexStage, self).setUp()
# next state after a successful lock is upgrading simplex
self.on_success_state = consts.STRATEGY_STATE_UPGRADING_SIMPLEX
@@ -38,12 +30,27 @@ class TestSwUpgradeLockControllerStage(TestSwUpgradeState):
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = \
self.setup_strategy_step(consts.STRATEGY_STATE_LOCKING_CONTROLLER)
self.strategy_step = self.setup_strategy_step(self.state)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.lock_host = mock.MagicMock()
self.setup_fake_controllers('controller-0')
def setup_fake_controllers(self, host_name):
self.CONTROLLER_UNLOCKED = FakeController(hostname=host_name,
administrative=consts.ADMIN_UNLOCKED)
self.CONTROLLER_LOCKED = FakeController(hostname=host_name,
administrative=consts.ADMIN_LOCKED)
self.CONTROLLER_LOCKING = FakeController(hostname=host_name,
administrative=consts.ADMIN_UNLOCKED,
ihost_action='lock',
task='Locking')
self.CONTROLLER_LOCKING_FAILED = \
FakeController(hostname=host_name,
administrative=consts.ADMIN_UNLOCKED,
ihost_action='force-swact',
task='Swacting')
def test_lock_success(self):
"""Test the lock command returns a success"""
@@ -52,13 +59,13 @@ class TestSwUpgradeLockControllerStage(TestSwUpgradeState):
# first query is the starting state
# query 2,3 are are during the lock phase
# query 4 : the host is now locked
self.sysinv_client.get_host.side_effect = [CONTROLLER_0_UNLOCKED,
CONTROLLER_0_LOCKING,
CONTROLLER_0_LOCKING,
CONTROLLER_0_LOCKED]
self.sysinv_client.get_host.side_effect = [self.CONTROLLER_UNLOCKED,
self.CONTROLLER_LOCKING,
self.CONTROLLER_LOCKING,
self.CONTROLLER_LOCKED]
# mock the API call as failed on the subcloud
self.sysinv_client.lock_host.return_value = CONTROLLER_0_LOCKING
self.sysinv_client.lock_host.return_value = self.CONTROLLER_LOCKING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
@@ -74,7 +81,7 @@ class TestSwUpgradeLockControllerStage(TestSwUpgradeState):
"""Test the lock command skips if host is already locked"""
# mock the controller host query as being already locked
self.sysinv_client.get_host.return_value = CONTROLLER_0_LOCKED
self.sysinv_client.get_host.return_value = self.CONTROLLER_LOCKED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
@@ -93,11 +100,11 @@ class TestSwUpgradeLockControllerStage(TestSwUpgradeState):
# first query is the starting state
# all remaining queries, the host returns 'locking'
self.sysinv_client.get_host.side_effect = itertools.chain(
[CONTROLLER_0_UNLOCKED, ],
itertools.repeat(CONTROLLER_0_LOCKING))
[self.CONTROLLER_UNLOCKED, ],
itertools.repeat(self.CONTROLLER_LOCKING))
# mock the API call as successful on the subcloud
self.sysinv_client.lock_host.return_value = CONTROLLER_0_LOCKING
self.sysinv_client.lock_host.return_value = self.CONTROLLER_LOCKING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
@@ -117,10 +124,10 @@ class TestSwUpgradeLockControllerStage(TestSwUpgradeState):
"""Test the lock command returns a failure"""
# mock the controller get_host query
self.sysinv_client.get_host.return_value = CONTROLLER_0_UNLOCKED
self.sysinv_client.get_host.return_value = self.CONTROLLER_UNLOCKED
# mock the API call as failed on the subcloud
self.sysinv_client.lock_host.return_value = CONTROLLER_0_LOCKING_FAILED
self.sysinv_client.lock_host.return_value = self.CONTROLLER_LOCKING_FAILED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
@@ -148,3 +155,16 @@ class TestSwUpgradeLockControllerStage(TestSwUpgradeState):
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
class TestSwUpgradeLockDuplexStage(TestSwUpgradeLockSimplexStage):
def setUp(self):
self.state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_1
super(TestSwUpgradeLockDuplexStage, self).setUp()
# next state after a successful lock is upgrading simplex
self.on_success_state = consts.STRATEGY_STATE_UPGRADING_DUPLEX
# Add mock API endpoints for sysinv client calls invoked by this state
self.setup_fake_controllers('controller-1')

View File

@@ -38,7 +38,7 @@ class TestSwUpgradeMigratingDataStage(TestSwUpgradeState):
super(TestSwUpgradeMigratingDataStage, self).setUp()
# next state after 'migrating data' is 'unlocking controller'
self.on_success_state = consts.STRATEGY_STATE_UNLOCKING_CONTROLLER
self.on_success_state = consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_0
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()

View File

@@ -10,6 +10,7 @@ from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.fakes import FakeController
from dcmanager.tests.unit.orchestrator.states.fakes import FakeHostFilesystem
from dcmanager.tests.unit.orchestrator.states.fakes import FakeSubcloud
from dcmanager.tests.unit.orchestrator.states.fakes import FakeSystem
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
@@ -86,6 +87,11 @@ class TestSwUpgradePreCheckStage(TestSwUpgradeState):
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.get_host_filesystem = mock.MagicMock()
self.sysinv_client.get_system_health = mock.MagicMock()
self.sysinv_client.get_system = mock.MagicMock()
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_SIMPLEX
self.sysinv_client.get_system.return_value = system_values
self.sysinv_client.get_upgrades = mock.MagicMock()
def test_upgrade_pre_check_subcloud_online_fresh(self):
"""Test pre check step where the subcloud is online and running N load

View File

@@ -9,6 +9,7 @@ import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states.upgrade import starting_upgrade
from dcmanager.tests.unit.orchestrator.states.fakes import FakeSystem
from dcmanager.tests.unit.orchestrator.states.fakes import FakeUpgrade
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
@@ -22,13 +23,13 @@ UPGRADE_STARTED = FakeUpgrade(state='started')
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.starting_upgrade"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeStartingUpgradeStage(TestSwUpgradeState):
class TestSwUpgradeSimplexStartingUpgradeStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeStartingUpgradeStage, self).setUp()
super(TestSwUpgradeSimplexStartingUpgradeStage, self).setUp()
# next state after 'starting upgrade' is 'migrating data'
self.on_success_state = consts.STRATEGY_STATE_LOCKING_CONTROLLER
self.on_success_state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_0
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
@@ -40,6 +41,10 @@ class TestSwUpgradeStartingUpgradeStage(TestSwUpgradeState):
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.upgrade_start = mock.MagicMock()
self.sysinv_client.get_upgrades = mock.MagicMock()
self.sysinv_client.get_system = mock.MagicMock()
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_SIMPLEX
self.sysinv_client.get_system.return_value = system_values
def test_upgrade_subcloud_upgrade_start_failure(self):
"""Test the upgrade_start where the API call fails.
@@ -169,3 +174,21 @@ class TestSwUpgradeStartingUpgradeStage(TestSwUpgradeState):
# Verify the timeout leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
@mock.patch("dcmanager.orchestrator.states.upgrade.starting_upgrade"
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.starting_upgrade"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeDuplexStartingUpgradeStage(TestSwUpgradeSimplexStartingUpgradeStage):
def setUp(self):
super(TestSwUpgradeDuplexStartingUpgradeStage, self).setUp()
# next state after 'starting upgrade' is 'migrating data'
self.on_success_state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_1
# Add mock API endpoints for sysinv client calls invoked by this state
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_DUPLEX
self.sysinv_client.get_system.return_value = system_values

View File

@@ -0,0 +1,144 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states import swact_host
from dcmanager.tests.unit.orchestrator.states.fakes import FakeController
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
@mock.patch("dcmanager.orchestrator.states.swact_host.DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.swact_host.DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeSwactToController0Stage(TestSwUpgradeState):
state = consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0
def setUp(self):
super(TestSwUpgradeSwactToController0Stage, self).setUp()
# next state after a successful swact is deleting load
self.on_success_state = consts.STRATEGY_STATE_ACTIVATING_UPGRADE
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = \
self.setup_strategy_step(self.state)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.swact_host = mock.MagicMock()
# In order to swact to controller-0, we run "system host-swact controller-1"
self.setup_fake_controllers('controller-1')
def setup_fake_controllers(self, host_name):
self.CONTROLLER_ACTIVE = FakeController(hostname=host_name)
self.CONTROLLER_STANDBY = FakeController(hostname=host_name,
capabilities={"Personality": "Controller-Standby"})
self.CONTROLLER_SWACTING = FakeController(hostname=host_name,
task='Swacting')
def test_swact_success(self):
"""Test the swact command returns a success"""
# mock the controller host queries
# first query is the starting state
# query 2 is during the ongoing swact phase
# query 3 is after successful host swact
self.sysinv_client.get_host.side_effect = [self.CONTROLLER_STANDBY,
self.CONTROLLER_STANDBY,
self.CONTROLLER_ACTIVE]
# mock the API call as failed on the subcloud
self.sysinv_client.swact_host.return_value = self.CONTROLLER_SWACTING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the swact command was actually attempted
self.sysinv_client.swact_host.assert_called()
# verify that the API moved to the next state on success
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_swact_skipped_when_already_active(self):
"""Test the swact command skips if host is already active controller"""
# mock the controller host query as being already Controller-Active
self.sysinv_client.get_host.return_value = self.CONTROLLER_ACTIVE
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the swact command was never attempted
self.sysinv_client.swact_host.assert_not_called()
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_swact_attempt_timeout(self):
"""Test swact invoked and fails if timeout before host becomes active controller"""
# mock the get_host queries
# all remaining queries, the host returns 'Controller-Standby'
self.sysinv_client.get_host.side_effect = itertools.chain(
itertools.repeat(self.CONTROLLER_STANDBY))
# mock the API call as successful on the subcloud
self.sysinv_client.swact_host.return_value = self.CONTROLLER_SWACTING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the swact command was actually attempted
self.sysinv_client.swact_host.assert_called()
# verify the query was invoked: 1 + max_attempts times
self.assertEqual(swact_host.DEFAULT_MAX_QUERIES + 2,
self.sysinv_client.get_host.call_count)
# verify that state failed due to subcloud never finishing the swact
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_swact_fails_when_host_query_fails(self):
"""Test the swact command fails when it cannot get the controllers"""
# mock the get_host query is empty and raises an exception
self.sysinv_client.get_host.side_effect = \
Exception("Unable to find host controller-0")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the swact command was never attempted
self.sysinv_client.swact_host.assert_not_called()
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
class TestSwUpgradeSwactToController1Stage(TestSwUpgradeSwactToController0Stage):
def setUp(self):
self.state = consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1
super(TestSwUpgradeSwactToController1Stage, self).setUp()
# next state after a successful swact to comntroller-1 is creating VIM
# upgrade strategy
self.on_success_state = consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY
# In order to swact to controller-0, we run "system host-swact controller-0"
self.setup_fake_controllers('controller-0')

View File

@@ -13,28 +13,22 @@ from dcmanager.tests.unit.orchestrator.states.fakes import FakeController
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
CONTROLLER_0_UNLOCKED = \
FakeController(administrative=consts.ADMIN_UNLOCKED,
operational=consts.OPERATIONAL_ENABLED)
CONTROLLER_0_LOCKED = FakeController(administrative=consts.ADMIN_LOCKED)
CONTROLLER_0_UNLOCKING = FakeController(administrative=consts.ADMIN_LOCKED,
ihost_action='unlock',
task='Unlocking')
CONTROLLER_0_UNLOCKING_FAILED = \
FakeController(administrative=consts.ADMIN_LOCKED,
ihost_action='force-swact',
task='Swacting')
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_MAX_API_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_MAX_FAILED_QUERIES",
3)
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_API_SLEEP", 1)
@mock.patch("dcmanager.orchestrator.states.unlock_host.DEFAULT_FAILED_SLEEP", 1)
class TestSwUpgradeUnlockControllerStage(TestSwUpgradeState):
class TestSwUpgradeUnlockSimplexStage(TestSwUpgradeState):
state = consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_0
def setUp(self):
super(TestSwUpgradeUnlockControllerStage, self).setUp()
super(TestSwUpgradeUnlockSimplexStage, self).setUp()
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.unlock_host = mock.MagicMock()
# next state after a successful unlock is 'activating'
self.on_success_state = consts.STRATEGY_STATE_ACTIVATING_UPGRADE
@@ -43,12 +37,29 @@ class TestSwUpgradeUnlockControllerStage(TestSwUpgradeState):
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
consts.STRATEGY_STATE_UNLOCKING_CONTROLLER)
self.strategy_step = self.setup_strategy_step(self.state)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.unlock_host = mock.MagicMock()
self.setup_fake_controllers('controller-0')
def setup_fake_controllers(self, host_name):
self.CONTROLLER_UNLOCKED = \
FakeController(hostname=host_name,
administrative=consts.ADMIN_UNLOCKED,
operational=consts.OPERATIONAL_ENABLED,
availability=consts.AVAILABILITY_AVAILABLE)
self.CONTROLLER_LOCKED = \
FakeController(hostname=host_name,
administrative=consts.ADMIN_LOCKED)
self.CONTROLLER_UNLOCKING = \
FakeController(hostname=host_name,
administrative=consts.ADMIN_LOCKED,
ihost_action='unlock',
task='Unlocking')
self.CONTROLLER_UNLOCKING_FAILED = \
FakeController(hostname=host_name,
administrative=consts.ADMIN_LOCKED,
ihost_action='force-swact',
task='Swacting')
def test_unlock_success(self):
"""Test the unlock command returns a success"""
@@ -57,13 +68,13 @@ class TestSwUpgradeUnlockControllerStage(TestSwUpgradeState):
# first query is the starting state
# query 2,3 are are during the unlock phase
# query 4 : the host is now unlocked
self.sysinv_client.get_host.side_effect = [CONTROLLER_0_LOCKED,
CONTROLLER_0_UNLOCKING,
CONTROLLER_0_UNLOCKING,
CONTROLLER_0_UNLOCKED, ]
self.sysinv_client.get_host.side_effect = [self.CONTROLLER_LOCKED,
self.CONTROLLER_UNLOCKING,
self.CONTROLLER_UNLOCKING,
self.CONTROLLER_UNLOCKED, ]
# mock the API call as failed on the subcloud
self.sysinv_client.unlock_host.return_value = CONTROLLER_0_UNLOCKING
self.sysinv_client.unlock_host.return_value = self.CONTROLLER_UNLOCKING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
@@ -79,7 +90,7 @@ class TestSwUpgradeUnlockControllerStage(TestSwUpgradeState):
"""Test the unlock command skips if host is already unlocked"""
# mock the controller host query as being already unlocked
self.sysinv_client.get_host.return_value = CONTROLLER_0_UNLOCKED
self.sysinv_client.get_host.return_value = self.CONTROLLER_UNLOCKED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
@@ -98,11 +109,11 @@ class TestSwUpgradeUnlockControllerStage(TestSwUpgradeState):
# first query is the starting state
# all remaining queries, the host returns 'unlocking'
self.sysinv_client.get_host.side_effect = itertools.chain(
[CONTROLLER_0_LOCKED, ],
itertools.repeat(CONTROLLER_0_UNLOCKING))
[self.CONTROLLER_LOCKED, ],
itertools.repeat(self.CONTROLLER_UNLOCKING))
# mock the API call as successful on the subcloud
self.sysinv_client.unlock_host.return_value = CONTROLLER_0_UNLOCKING
self.sysinv_client.unlock_host.return_value = self.CONTROLLER_UNLOCKING
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
@@ -122,11 +133,11 @@ class TestSwUpgradeUnlockControllerStage(TestSwUpgradeState):
"""Test the unlock command returns a failure"""
# mock the get_host query
self.sysinv_client.get_host.return_value = CONTROLLER_0_LOCKED
self.sysinv_client.get_host.return_value = self.CONTROLLER_LOCKED
# mock the API call as failed on the subcloud
self.sysinv_client.unlock_host.return_value = \
CONTROLLER_0_UNLOCKING_FAILED
self.CONTROLLER_UNLOCKING_FAILED
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
@@ -154,3 +165,17 @@ class TestSwUpgradeUnlockControllerStage(TestSwUpgradeState):
# verify that the state moves to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
class TestSwUpgradeUnlockDuplexStage(TestSwUpgradeUnlockSimplexStage):
"""This subclasses Controller 0 Unlock, and overides some setup values"""
def setUp(self):
self.state = consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1
super(TestSwUpgradeUnlockDuplexStage, self).setUp()
# override some of the fields that were setup in the super class
# next state after a successful unlock is 'creating vim strategy'
self.on_success_state = consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_1
self.setup_fake_controllers('controller-1')

View File

@@ -0,0 +1,206 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from os import path as os_path
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.fakes import FakeLoad
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
REGION_ONE_PATCHES = {'DC.1': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.3': {'sw_version': '20.12',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.4': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.8': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
}
SUBCLOUD_PATCHES_SUCCESS = {'DC.1': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.3': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Partial-Remove'},
'DC.5': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.6': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Partial-Apply'},
}
SUBCLOUD_PATCHES_BAD_COMMIT = {'DC.1': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.3': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Partial-Remove'},
'DC.5': {'sw_version': '20.12',
'repostate': 'Committed',
'patchstate': 'Committed'},
'DC.6': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Partial-Apply'},
}
SUBCLOUD_PATCHES_BAD_STATE = {'DC.1': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Applied'},
'DC.2': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Available'},
'DC.3': {'sw_version': '20.12',
'repostate': 'Available',
'patchstate': 'Partial-Remove'},
'DC.5': {'sw_version': '20.12',
'repostate': 'Unknown',
'patchstate': 'Unknown'},
'DC.6': {'sw_version': '20.12',
'repostate': 'Applied',
'patchstate': 'Partial-Apply'},
}
def compare_call_with_unsorted_list(call, unsorted_list):
call_args, _ = call
return call_args[0].sort() == unsorted_list.sort()
@mock.patch("dcmanager.orchestrator.states.upgrade.updating_patches"
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.updating_patches"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeUpdatingPatchesStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeUpdatingPatchesStage, self).setUp()
# next state after 'updating patches' is 'finishing patch strategy'
self.on_success_state = consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = \
self.setup_strategy_step(consts.STRATEGY_STATE_UPDATING_PATCHES)
# Add mock API endpoints for patching and sysinv client calls invoked by this state
self.patching_client.query = mock.MagicMock()
self.sysinv_client.get_loads = mock.MagicMock()
self.patching_client.remove = mock.MagicMock()
self.patching_client.upload = mock.MagicMock()
self.patching_client.apply = mock.MagicMock()
self.patching_client.query_hosts = mock.MagicMock()
@mock.patch.object(os_path, 'isfile')
def test_update_subcloud_patches_success(self, mock_os_path_isfile):
"""Test update_patches where the API call succeeds."""
self.patching_client.query.side_effect = [
REGION_ONE_PATCHES,
SUBCLOUD_PATCHES_SUCCESS,
]
self.sysinv_client.get_loads.side_effect = [
[FakeLoad(1,
software_version='20.12',
state=consts.ACTIVE_LOAD_STATE)]
]
mock_os_path_isfile.return_value = True
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
self.patching_client.upload.assert_called_with(
[consts.PATCH_VAULT_DIR + '/20.12/DC.8.patch'])
assert(compare_call_with_unsorted_list(
self.patching_client.remove.call_args_list[0],
['DC.5', 'DC.6']
))
assert(compare_call_with_unsorted_list(
self.patching_client.apply.call_args_list[0],
['DC.2', 'DC.3', 'DC.8']
))
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
@mock.patch.object(os_path, 'isfile')
def test_update_subcloud_patches_bad_committed(self, mock_os_path_isfile):
"""Test update_patches where the API call fails.
The update_patches call fails because the patch is 'committed' in
the subcloud but not 'applied' in the System Controller.
"""
self.patching_client.query.side_effect = [
REGION_ONE_PATCHES,
SUBCLOUD_PATCHES_BAD_COMMIT,
]
self.sysinv_client.get_loads.side_effect = [
[FakeLoad(1,
software_version='20.12',
state=consts.ACTIVE_LOAD_STATE)]
]
mock_os_path_isfile.return_value = True
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Verify it failed and moves to the next step
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
@mock.patch.object(os_path, 'isfile')
def test_update_subcloud_patches_bad_state(self, mock_os_path_isfile):
"""Test update_patches where the API call succeeds.
The update_patches call fails because the patch is 'unknown' in
the subcloud which is not a valid state.
"""
self.patching_client.query.side_effect = [
REGION_ONE_PATCHES,
SUBCLOUD_PATCHES_BAD_STATE,
]
self.sysinv_client.get_loads.side_effect = [
[FakeLoad(1,
software_version='20.12',
state=consts.ACTIVE_LOAD_STATE)]
]
mock_os_path_isfile.return_value = True
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# Verify it failed and moves to the next step
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@@ -0,0 +1,96 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import itertools
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states.upgrade import upgrading_duplex
from dcmanager.tests.unit.orchestrator.states.fakes import FakeUpgrade
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
UPGRADE_ABORTING = FakeUpgrade(state='aborting')
UPGRADE_STARTED = FakeUpgrade(state='started')
UPGRADE_COMPLETE = FakeUpgrade(state='data-migration-complete')
UPGRADE_FAILED = FakeUpgrade(state='data-migration-failed')
@mock.patch("dcmanager.orchestrator.states.upgrade.upgrading_duplex"
".DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.upgrade.upgrading_duplex"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeUpgradingDuplexStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeUpgradingDuplexStage, self).setUp()
# next state after 'upgrading duplex' is 'unlocking controller 1'
self.on_success_state = consts.STRATEGY_STATE_UNLOCKING_CONTROLLER_1
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = \
self.setup_strategy_step(consts.STRATEGY_STATE_UPGRADING_DUPLEX)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.get_host = mock.MagicMock()
self.sysinv_client.upgrade_host = mock.MagicMock()
self.sysinv_client.get_upgrades = mock.MagicMock()
def test_subcloud_duplex_upgrade_success(self):
"""Test upgrading_duplex where the API call succeeds."""
# A successfully completed upgrade exists in the DB"""
self.sysinv_client.get_upgrades.return_value = [UPGRADE_COMPLETE, ]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the get_upgrades query was invoked: 1 + max_attempts times
self.assertEqual(self.sysinv_client.get_upgrades.call_count, 2)
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_subcloud_duplex_upgrade_fails(self):
"""Test the upgrading_duplex fails as data migration fails."""
self.sysinv_client.get_upgrades.return_value = [UPGRADE_FAILED, ]
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the get_upgrades query was invoked: 1 + max_attempts times
self.assertEqual(self.sysinv_client.get_upgrades.call_count, 2)
# Verify it failed and moves to the next step
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_subcloud_duplex_upgrade_timeout(self):
"""Test upgrading_duplex where the API call succeeds but times out."""
# Upgrades state is stuck at 'started' state which eventually
# leads to the timeout
self.sysinv_client.get_upgrades.side_effect = itertools.chain(
itertools.repeat([UPGRADE_STARTED, ]))
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call that succeeded was actually invoked
self.sysinv_client.upgrade_host.assert_called()
# verify the get_upgrades query was invoked: 1 + max_attempts times
self.assertEqual(upgrading_duplex.DEFAULT_MAX_QUERIES,
self.sysinv_client.get_upgrades.call_count)
# Verify the timeout leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)

View File

@@ -31,6 +31,7 @@ from dcmanager.tests.unit.common import fake_strategy
from dcmanager.tests.unit.common import fake_subcloud
from dcmanager.tests.unit.fakes import FakeVimClient
from dcmanager.tests.unit.orchestrator.states.fakes import FakeKeystoneClient
from dcmanager.tests.unit.orchestrator.states.fakes import FakePatchingClient
from dcmanager.tests.unit.orchestrator.states.fakes import FakeSysinvClient
from dcmanager.tests.unit.orchestrator.test_sw_update_manager import FakeOrchThread
from dcmanager.tests import utils
@@ -69,6 +70,13 @@ class TestSwUpdate(base.DCManagerTestCase):
self.mock_sysinv_client.return_value = self.sysinv_client
self.addCleanup(p.stop)
# Mock the patching client defined in the base state class
self.patching_client = FakePatchingClient()
p = mock.patch.object(BaseState, 'get_patching_client')
self.mock_patching_client = p.start()
self.mock_patching_client.return_value = self.patching_client
self.addCleanup(p.stop)
# Mock the vim client defined in the base state class
self.vim_client = FakeVimClient()
p = mock.patch.object(BaseState, 'get_vim_client')