Add recovery code for failures during each upgrade state
In this commit:
- Pre check for online case is updated. If the subcloud
deploy status is 'migrated', advance directly to activating
upgrade step. Otherwise, start from the first step
(installing license).
- The subcloud deploy status is set to 'migrated' at the end
of migrating data step.
- The subcloud deploy status is set to 'complete' at the end
of completing upgrade step.
- Skip data migration if deploy status is either 'migrated'
or 'complete'.
- Reduce the number of retries for failed activation from 45
to 10. The overall wait time is still 45 minutes.
- Improve error messages to user.
- Add another log to give clarity to the time it takes the
subcloud to respond to load import request.
- Various small fixes.
Tests:
- Load import failure - not enough disk space
- Load import failure - bad load file
- Upgrade start failure - sysadmin directory too large
- Controller lock failure - processing failure
- Upgrade simplex failure - incorrect bmc password
- Upgrade simplex failure - network glitch
- Data migration failure - network glitch
- Upgrade activation failure - processing failure
- Upgrade completing failure - processing failure
Story: 2007403
Task: 40351
Change-Id: Idf0bf8009122018e4597a7f1dede58658d0a064b
Signed-off-by: Tee Ngo <tee.ngo@windriver.com>
This commit is contained in:
@@ -182,7 +182,8 @@ class SubcloudAuditManager(manager.Manager):
|
||||
consts.DEPLOY_STATE_DEPLOY_FAILED,
|
||||
consts.DEPLOY_STATE_INSTALL_FAILED,
|
||||
consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
|
||||
consts.DEPLOY_STATE_DATA_MIGRATION_FAILED]):
|
||||
consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
|
||||
consts.DEPLOY_STATE_MIGRATED]):
|
||||
LOG.debug("Skip subcloud %s audit, deploy_status: %s" %
|
||||
(subcloud.name, subcloud.deploy_status))
|
||||
continue
|
||||
|
||||
@@ -131,6 +131,7 @@ DEPLOY_STATE_DEPLOYING = 'deploying'
|
||||
DEPLOY_STATE_DEPLOY_FAILED = 'deploy-failed'
|
||||
DEPLOY_STATE_MIGRATING_DATA = 'migrating-data'
|
||||
DEPLOY_STATE_DATA_MIGRATION_FAILED = 'data-migration-failed'
|
||||
DEPLOY_STATE_MIGRATED = 'migrated'
|
||||
DEPLOY_STATE_DONE = 'complete'
|
||||
|
||||
# Alarm aggregation
|
||||
|
||||
@@ -180,7 +180,7 @@ class LicenseMissingError(DCManagerException):
|
||||
|
||||
class ManualRecoveryRequiredException(DCManagerException):
|
||||
message = _("Offline Subcloud: %(subcloud)s needs manual recovery from "
|
||||
"deploy state:%(deploy_state)s")
|
||||
"deploy state: %(deploy_status)s")
|
||||
|
||||
|
||||
class PreCheckFailedException(DCManagerException):
|
||||
|
||||
@@ -67,7 +67,9 @@ class LockHostState(BaseState):
|
||||
break
|
||||
counter += 1
|
||||
if counter >= self.max_queries:
|
||||
raise Exception("Timeout waiting for lock to complete")
|
||||
raise Exception("Timeout waiting for lock to complete. "
|
||||
"Please check sysinv.log on the subcloud "
|
||||
"for details.")
|
||||
time.sleep(self.sleep_duration)
|
||||
|
||||
# If we are here, the loop broke out cleanly and the action succeeded
|
||||
|
||||
@@ -14,9 +14,12 @@ ACTIVATING_COMPLETED_STATES = ['activation-complete',
|
||||
|
||||
ACTIVATING_RETRY_STATES = ['activation-failed', ]
|
||||
|
||||
ACTIVATING_IN_PROGRESS_STATES = ['activating', ]
|
||||
|
||||
# Max time: 45 minutes = 45 queries x 60 seconds sleep between queries
|
||||
DEFAULT_MAX_QUERIES = 45
|
||||
DEFAULT_SLEEP_DURATION = 60
|
||||
MAX_FAILED_RETRIES = 10
|
||||
|
||||
|
||||
class ActivatingUpgradeState(BaseState):
|
||||
@@ -28,6 +31,7 @@ class ActivatingUpgradeState(BaseState):
|
||||
# max time to wait (in seconds) is: sleep_duration * max_queries
|
||||
self.sleep_duration = DEFAULT_SLEEP_DURATION
|
||||
self.max_queries = DEFAULT_MAX_QUERIES
|
||||
self.max_failed_retries = MAX_FAILED_RETRIES
|
||||
|
||||
def get_upgrade_state(self, sysinv_client):
|
||||
upgrades = sysinv_client.get_upgrades()
|
||||
@@ -67,15 +71,29 @@ class ActivatingUpgradeState(BaseState):
|
||||
raise StrategyStoppedException()
|
||||
upgrade_state = self.get_upgrade_state(sysinv_client)
|
||||
if upgrade_state in ACTIVATING_RETRY_STATES:
|
||||
if counter >= self.max_failed_retries:
|
||||
raise Exception("Failed to activate upgrade. Please "
|
||||
"check sysinv.log on the subcloud for "
|
||||
"details.")
|
||||
# We failed. Better try again
|
||||
self.info_log(strategy_step,
|
||||
"Activation failed, retrying... State=%s"
|
||||
% upgrade_state)
|
||||
sysinv_client.upgrade_activate()
|
||||
elif upgrade_state in ACTIVATING_IN_PROGRESS_STATES:
|
||||
self.info_log(strategy_step,
|
||||
"Activation in progress, waiting... State=%s"
|
||||
% upgrade_state)
|
||||
elif upgrade_state in ACTIVATING_COMPLETED_STATES:
|
||||
self.info_log(strategy_step,
|
||||
"Activation completed. State=%s" % upgrade_state)
|
||||
"Activation completed. State=%s"
|
||||
% upgrade_state)
|
||||
break
|
||||
counter += 1
|
||||
if counter >= self.max_queries:
|
||||
raise Exception("Timeout waiting for activation to complete")
|
||||
raise Exception("Timeout waiting for activation to complete. "
|
||||
"Please check sysinv.log on the subcloud for "
|
||||
"details.")
|
||||
time.sleep(self.sleep_duration)
|
||||
|
||||
# When we return from this method without throwing an exception, the
|
||||
|
||||
@@ -7,8 +7,10 @@ import time
|
||||
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common.exceptions import StrategyStoppedException
|
||||
from dcmanager.db import api as db_api
|
||||
from dcmanager.manager.states.base import BaseState
|
||||
|
||||
|
||||
# Max time: 10 minutes = 60 queries x 10 seconds between each query
|
||||
DEFAULT_MAX_QUERIES = 60
|
||||
DEFAULT_SLEEP_DURATION = 10
|
||||
@@ -24,6 +26,19 @@ class CompletingUpgradeState(BaseState):
|
||||
self.sleep_duration = DEFAULT_SLEEP_DURATION
|
||||
self.max_queries = DEFAULT_MAX_QUERIES
|
||||
|
||||
def finalize_upgrade(self, strategy_step):
|
||||
ks_client = self.get_keystone_client(strategy_step.subcloud.name)
|
||||
sysinv_client = self.get_sysinv_client(strategy_step.subcloud.name,
|
||||
ks_client.session)
|
||||
|
||||
software_version = sysinv_client.get_system().software_version
|
||||
|
||||
db_api.subcloud_update(
|
||||
self.context, strategy_step.subcloud_id,
|
||||
deploy_status=consts.DEPLOY_STATE_DONE,
|
||||
software_version=software_version)
|
||||
return self.next_state
|
||||
|
||||
def perform_state_action(self, strategy_step):
|
||||
"""Complete an upgrade on a subcloud
|
||||
|
||||
@@ -42,7 +57,7 @@ class CompletingUpgradeState(BaseState):
|
||||
if len(upgrades) == 0:
|
||||
self.info_log(strategy_step,
|
||||
"No upgrades exist. Nothing needs completing")
|
||||
return self.next_state
|
||||
return self.finalize_upgrade(strategy_step)
|
||||
|
||||
# invoke the API 'upgrade-complete'
|
||||
# This is a partially blocking call that raises exception on failure.
|
||||
@@ -66,4 +81,4 @@ class CompletingUpgradeState(BaseState):
|
||||
|
||||
# When we return from this method without throwing an exception, the
|
||||
# state machine can proceed to the next state
|
||||
return self.next_state
|
||||
return self.finalize_upgrade(strategy_step)
|
||||
|
||||
@@ -56,12 +56,26 @@ class ImportingLoadState(BaseState):
|
||||
iso_path, sig_path = utils.get_vault_load_files(target_version)
|
||||
|
||||
# Call the API. import_load blocks until the load state is 'importing'
|
||||
imported_load = sysinv_client.import_load(iso_path, sig_path)
|
||||
try:
|
||||
imported_load = sysinv_client.import_load(iso_path, sig_path)
|
||||
except Exception as e:
|
||||
self.error_log(strategy_step, str(e))
|
||||
raise Exception("Failed to import load. Please check sysinv.log on "
|
||||
"the subcloud for details.")
|
||||
|
||||
new_load = imported_load.get('new_load', {})
|
||||
if new_load.get('software_version') != target_version:
|
||||
raise Exception("The imported load was not the expected version")
|
||||
if new_load:
|
||||
if new_load.get('software_version') != target_version:
|
||||
raise Exception("The imported load was not the expected version.")
|
||||
else:
|
||||
self.error_log(strategy_step, imported_load.get('error'))
|
||||
raise Exception("Failed to import load. Please check sysinv.log on "
|
||||
"the subcloud for details.")
|
||||
|
||||
new_load_id = new_load.get('id')
|
||||
self.info_log(strategy_step,
|
||||
"Load import request accepted, load software version = %s"
|
||||
% new_load.get('software_version'))
|
||||
# repeatedly query until load state changes to 'imported' or we timeout
|
||||
counter = 0
|
||||
while True:
|
||||
|
||||
@@ -62,30 +62,6 @@ class MigratingDataState(BaseState):
|
||||
self.max_failed_queries = DEFAULT_MAX_FAILED_QUERIES
|
||||
self.failed_sleep_duration = DEFAULT_FAILED_SLEEP
|
||||
|
||||
def is_subcloud_data_migration_required(self, strategy_step):
|
||||
local_ks_client = self.get_keystone_client()
|
||||
local_sysinv_client = \
|
||||
self.get_sysinv_client(consts.DEFAULT_REGION_NAME,
|
||||
local_ks_client.session)
|
||||
sc_version = local_sysinv_client.get_system().software_version
|
||||
|
||||
try:
|
||||
ks_client = self.get_keystone_client(strategy_step.subcloud.name)
|
||||
sysinv_client = self.get_sysinv_client(strategy_step.subcloud.name,
|
||||
ks_client.session)
|
||||
subcloud_version = sysinv_client.get_system().software_version
|
||||
if subcloud_version == sc_version:
|
||||
self.debug_log(strategy_step, "Subcloud upgrade is already done.")
|
||||
else:
|
||||
# Subcloud data migration is complete but not yet activated
|
||||
self.info_log(strategy_step, "Data migration is already done.")
|
||||
|
||||
return False
|
||||
except Exception as e:
|
||||
# After a fresh install, subcloud keystone is not yet accessible
|
||||
self.info_log(strategy_step, str(e))
|
||||
return True
|
||||
|
||||
def wait_for_unlock(self, strategy_step):
|
||||
"""This method returns successfully when the unlock completes.
|
||||
|
||||
@@ -169,10 +145,23 @@ class MigratingDataState(BaseState):
|
||||
Any exceptions raised by this method set the strategy to FAILED.
|
||||
"""
|
||||
|
||||
if not self.is_subcloud_data_migration_required(strategy_step):
|
||||
self.info_log(strategy_step, "Data migration is already done.")
|
||||
# To account for abrupt termination of dcmanager, check the last known
|
||||
# subcloud deploy status. If it is migrated/complete, advance to the next
|
||||
# stage. If it is 'migrating', fail the strategy. The user will need to
|
||||
# delete the existing strategy, create a new one and apply. Pre-check will
|
||||
# set the appropriate next step for this subcloud.
|
||||
subcloud = db_api.subcloud_get(self.context, strategy_step.subcloud.id)
|
||||
if (subcloud.deploy_status == consts.DEPLOY_STATE_MIGRATED or
|
||||
subcloud.deploy_status == consts.DEPLOY_STATE_DONE):
|
||||
return self.next_state
|
||||
elif subcloud.deploy_status == consts.DEPLOY_STATE_MIGRATING_DATA:
|
||||
db_api.subcloud_update(
|
||||
self.context, strategy_step.subcloud_id,
|
||||
deploy_status=consts.DEPLOY_STATE_DATA_MIGRATION_FAILED)
|
||||
raise Exception("Previous data migration was abruptly terminated. "
|
||||
"Please try again with a new upgrade strategy.")
|
||||
|
||||
# If it gets here, the subcloud deploy status must be 'installed'.
|
||||
self.info_log(strategy_step, "Start migrating data...")
|
||||
db_api.subcloud_update(
|
||||
self.context, strategy_step.subcloud_id,
|
||||
@@ -198,10 +187,6 @@ class MigratingDataState(BaseState):
|
||||
self.error_log(strategy_step, str(e))
|
||||
raise
|
||||
|
||||
db_api.subcloud_update(
|
||||
self.context, strategy_step.subcloud_id,
|
||||
deploy_status=consts.DEPLOY_STATE_DONE)
|
||||
|
||||
# Ansible invokes an unlock. Need to wait for the unlock to complete.
|
||||
# Wait for 3 minutes for mtc/scripts to shut down services
|
||||
# todo(abailey): split this into smaller sleeps to allow stopping early
|
||||
@@ -209,5 +194,9 @@ class MigratingDataState(BaseState):
|
||||
# wait up to 60 minutes for reboot to complete
|
||||
self.wait_for_unlock(strategy_step)
|
||||
|
||||
db_api.subcloud_update(
|
||||
self.context, strategy_step.subcloud_id,
|
||||
deploy_status=consts.DEPLOY_STATE_MIGRATED)
|
||||
|
||||
self.info_log(strategy_step, "Data migration completed.")
|
||||
return self.next_state
|
||||
|
||||
@@ -13,12 +13,16 @@ from dcmanager.db import api as db_api
|
||||
from dcmanager.manager.states.base import BaseState
|
||||
|
||||
# These deploy states should transition to the 'upgrading' state
|
||||
VALID_UPGRADE_STATES = [consts.DEPLOY_STATE_INSTALL_FAILED,
|
||||
VALID_UPGRADE_STATES = [consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
|
||||
consts.DEPLOY_STATE_INSTALL_FAILED,
|
||||
consts.DEPLOY_STATE_DATA_MIGRATION_FAILED, ]
|
||||
|
||||
# These deploy states should transition to the 'migrating_data' state
|
||||
VALID_MIGRATE_DATA_STATES = [consts.DEPLOY_STATE_INSTALLED, ]
|
||||
|
||||
# These deploy states should transition to the 'activating_upgrade' state
|
||||
VALID_ACTIVATION_STATES = [consts.DEPLOY_STATE_MIGRATED, ]
|
||||
|
||||
MIN_SCRATCH_SIZE_REQUIRED_GB = 16
|
||||
|
||||
|
||||
@@ -76,13 +80,18 @@ class PreCheckState(BaseState):
|
||||
def perform_state_action(self, strategy_step):
|
||||
"""This state will check if the subcloud is offline:
|
||||
|
||||
if online, proceed to INSTALLING_LICENSE state
|
||||
if offline, check the deploy_status and transfer to the correct state.
|
||||
Check the deploy_status and transfer to the correct state.
|
||||
if an unsupported deploy_status is encountered, fail the upgrade
|
||||
"""
|
||||
subcloud = db_api.subcloud_get(self.context, strategy_step.subcloud.id)
|
||||
if subcloud.availability_status == consts.AVAILABILITY_ONLINE:
|
||||
self._perform_subcloud_online_checks(strategy_step, subcloud)
|
||||
# If the subcloud has completed data migration and is online,
|
||||
# advance directly to activating upgrade step. Otherwise, start
|
||||
# from installing license step.
|
||||
if subcloud.deploy_status == consts.DEPLOY_STATE_MIGRATED:
|
||||
self.override_next_state(consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
|
||||
|
||||
return self.next_state
|
||||
|
||||
# it is offline.
|
||||
@@ -90,10 +99,14 @@ class PreCheckState(BaseState):
|
||||
self.override_next_state(consts.STRATEGY_STATE_UPGRADING_SIMPLEX)
|
||||
return self.next_state
|
||||
|
||||
if subcloud.deploy_status in VALID_MIGRATE_DATA_STATES:
|
||||
elif subcloud.deploy_status in VALID_MIGRATE_DATA_STATES:
|
||||
self.override_next_state(consts.STRATEGY_STATE_MIGRATING_DATA)
|
||||
return self.next_state
|
||||
|
||||
elif subcloud.deploy_status in VALID_ACTIVATION_STATES:
|
||||
self.override_next_state(consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
|
||||
return self.next_state
|
||||
|
||||
# FAIL: We are offline and encountered an un-recoverable deploy status
|
||||
self.info_log(strategy_step,
|
||||
"Un-handled deploy_status: %s" % subcloud.deploy_status)
|
||||
|
||||
@@ -30,7 +30,8 @@ class StartingUpgradeState(BaseState):
|
||||
def get_upgrade_state(self, sysinv_client):
|
||||
upgrades = sysinv_client.get_upgrades()
|
||||
if len(upgrades) == 0:
|
||||
raise Exception("No upgrades were found")
|
||||
raise Exception("Failed to generate upgrade data. Please "
|
||||
"check sysinv.log on the subcloud for details.")
|
||||
# The list of upgrades will never contain more than one entry.
|
||||
return upgrades[0].state
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class TestSwUpgradeActivatingStage(TestSwUpgradeState):
|
||||
self.strategy_step = \
|
||||
self.setup_strategy_step(consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
|
||||
|
||||
# Add mock API endpoints for sysinv client calls invcked by this state
|
||||
# Add mock API endpoints for sysinv client calls invoked by this state
|
||||
self.sysinv_client.upgrade_activate = mock.MagicMock()
|
||||
self.sysinv_client.get_upgrades = mock.MagicMock()
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import mock
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.manager.states.upgrade import completing
|
||||
|
||||
from dcmanager.tests.unit.manager.states.upgrade.test_base import FakeSystem
|
||||
from dcmanager.tests.unit.manager.states.upgrade.test_base import FakeUpgrade
|
||||
from dcmanager.tests.unit.manager.states.upgrade.test_base \
|
||||
import TestSwUpgradeState
|
||||
@@ -33,9 +34,12 @@ class TestSwUpgradeCompletingStage(TestSwUpgradeState):
|
||||
self.strategy_step = \
|
||||
self.setup_strategy_step(consts.STRATEGY_STATE_COMPLETING_UPGRADE)
|
||||
|
||||
# Add mock API endpoints for sysinv client calls invcked by this state
|
||||
# Add mock API endpoints for sysinv client calls invoked by this state
|
||||
self.sysinv_client.upgrade_complete = mock.MagicMock()
|
||||
self.sysinv_client.get_upgrades = mock.MagicMock()
|
||||
self.sysinv_client.get_system = mock.MagicMock()
|
||||
self.sysinv_client.get_system.return_value = FakeSystem()
|
||||
self.sysinv_client.get_system = mock.MagicMock()
|
||||
|
||||
def test_upgrade_subcloud_completing_upgrade_failure(self):
|
||||
"""Test the completing upgrade API call fails."""
|
||||
@@ -70,12 +74,20 @@ class TestSwUpgradeCompletingStage(TestSwUpgradeState):
|
||||
# API call will not raise an exception. It will delete the upgrade
|
||||
self.sysinv_client.upgrade_complete.return_value = UPGRADE_COMPLETING
|
||||
|
||||
# Mock the db API call
|
||||
p = mock.patch('dcmanager.db.api.subcloud_update')
|
||||
self.mock_db_update = p.start()
|
||||
self.addCleanup(p.stop)
|
||||
|
||||
# invoke the strategy state operation on the orch thread
|
||||
self.worker.perform_state_action(self.strategy_step)
|
||||
|
||||
# verify the API call was invoked
|
||||
self.sysinv_client.upgrade_complete.assert_called()
|
||||
|
||||
# verify the DB update was invoked
|
||||
self.mock_db_update.assert_called()
|
||||
|
||||
# On success, the state should be updated to the next state
|
||||
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||
self.on_success_state)
|
||||
@@ -89,12 +101,20 @@ class TestSwUpgradeCompletingStage(TestSwUpgradeState):
|
||||
|
||||
# API call will not be invoked, so no need to mock it
|
||||
|
||||
# Mock the db API call
|
||||
p = mock.patch('dcmanager.db.api.subcloud_update')
|
||||
self.mock_db_update = p.start()
|
||||
self.addCleanup(p.stop)
|
||||
|
||||
# invoke the strategy state operation on the orch thread
|
||||
self.worker.perform_state_action(self.strategy_step)
|
||||
|
||||
# upgrade is already in one of the completing states so skip completing
|
||||
self.sysinv_client.upgrade_complete.assert_not_called()
|
||||
|
||||
# verify the DB update was invoked
|
||||
self.mock_db_update.assert_called()
|
||||
|
||||
# On success, the state is set to the next state
|
||||
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||
self.on_success_state)
|
||||
|
||||
@@ -79,7 +79,7 @@ class TestSwUpgradeImportingLoadStage(TestSwUpgradeState):
|
||||
self.mock_vault_files.return_value = (FAKE_ISO, FAKE_SIG)
|
||||
self.addCleanup(p.stop)
|
||||
|
||||
# Add mock API endpoints for sysinv client calls invcked by this state
|
||||
# Add mock API endpoints for sysinv client calls invoked by this state
|
||||
self.sysinv_client.get_system = mock.MagicMock()
|
||||
self.sysinv_client.get_system.return_value = FakeSystem()
|
||||
self.sysinv_client.get_loads = mock.MagicMock()
|
||||
|
||||
@@ -38,7 +38,7 @@ class TestSwUpgradeInstallingLicenseStage(TestSwUpgradeState):
|
||||
self.strategy_step = \
|
||||
self.setup_strategy_step(consts.STRATEGY_STATE_INSTALLING_LICENSE)
|
||||
|
||||
# Add mock API endpoints for sysinv client calls invcked by this state
|
||||
# Add mock API endpoints for sysinv client calls invoked by this state
|
||||
self.sysinv_client.get_license = mock.MagicMock()
|
||||
self.sysinv_client.install_license = mock.MagicMock()
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ class TestSwUpgradeLockControllerStage(TestSwUpgradeState):
|
||||
self.strategy_step = \
|
||||
self.setup_strategy_step(consts.STRATEGY_STATE_LOCKING_CONTROLLER)
|
||||
|
||||
# Add mock API endpoints for sysinv client calls invcked by this state
|
||||
# Add mock API endpoints for sysinv client calls invoked by this state
|
||||
self.sysinv_client.get_host = mock.MagicMock()
|
||||
self.sysinv_client.lock_host = mock.MagicMock()
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from dcmanager.common import consts
|
||||
from dcmanager.manager.states.upgrade import migrating_data
|
||||
from dcmanager.tests.unit.manager.states.upgrade.test_base \
|
||||
import FakeController
|
||||
from dcmanager.tests.unit.manager.states.upgrade.test_base import FakeSystem
|
||||
from dcmanager.tests.unit.manager.states.upgrade.test_base import FakeSubcloud
|
||||
from dcmanager.tests.unit.manager.states.upgrade.test_base \
|
||||
import TestSwUpgradeState
|
||||
|
||||
@@ -45,18 +45,12 @@ class TestSwUpgradeMigratingDataStage(TestSwUpgradeState):
|
||||
self.setup_strategy_step(consts.STRATEGY_STATE_MIGRATING_DATA)
|
||||
|
||||
# Add mock API endpoints for sysinv client calls invoked by this state
|
||||
self.sysinv_client.get_system = mock.MagicMock()
|
||||
self.sysinv_client.get_system.return_value = FakeSystem()
|
||||
self.sysinv_client.get_host = mock.MagicMock()
|
||||
|
||||
@mock.patch.object(migrating_data, 'db_api')
|
||||
def test_upgrade_subcloud_migrating_data_failure(self, mock_db_api):
|
||||
"""Test migrating data step where the subprocess call fails."""
|
||||
|
||||
# Simulate data migration has not started yet
|
||||
self.sysinv_client.get_system.side_effect = \
|
||||
[FakeSystem(), Exception("Fresh install!")]
|
||||
|
||||
# Simulate a failed subprocess call to the platform upgrade playbook
|
||||
# on the subcloud.
|
||||
p = mock.patch(
|
||||
@@ -76,10 +70,6 @@ class TestSwUpgradeMigratingDataStage(TestSwUpgradeState):
|
||||
def test_upgrade_subcloud_migrating_data_success(self, mock_db_api):
|
||||
"""Test migrating data step where the subprocess call passes."""
|
||||
|
||||
# Simulate data migration has not started yet
|
||||
self.sysinv_client.get_system.side_effect = \
|
||||
[FakeSystem(), Exception("Fresh install!")]
|
||||
|
||||
# Simulate a successful subprocess call to the platform upgrade playbook
|
||||
# on the subcloud.
|
||||
p = mock.patch(
|
||||
@@ -103,11 +93,17 @@ class TestSwUpgradeMigratingDataStage(TestSwUpgradeState):
|
||||
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||
self.on_success_state)
|
||||
|
||||
def test_upgrade_subcloud_migrating_data_skip(self):
|
||||
"""Test the migrating data step skipped"""
|
||||
def test_upgrade_subcloud_migrating_data_skip_migration_done(self):
|
||||
"""Test the migrating data step skipped (migration completed)"""
|
||||
|
||||
# get_system is mocked to return the same fake system for both
|
||||
# system controller and subclould.
|
||||
# Mock the db API call
|
||||
p = mock.patch('dcmanager.db.api.subcloud_get')
|
||||
self.mock_db_query = p.start()
|
||||
self.addCleanup(p.stop)
|
||||
|
||||
# online subcloud running N load
|
||||
self.mock_db_query.return_value = FakeSubcloud(
|
||||
deploy_status=consts.DEPLOY_STATE_MIGRATED)
|
||||
|
||||
# Invoke the strategy state operation on the orch thread
|
||||
self.worker.perform_state_action(self.strategy_step)
|
||||
@@ -116,6 +112,51 @@ class TestSwUpgradeMigratingDataStage(TestSwUpgradeState):
|
||||
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||
self.on_success_state)
|
||||
|
||||
def test_upgrade_subcloud_migrating_data_skip_deployment_done(self):
|
||||
"""Test the migrating data step skipped (deployment completed)"""
|
||||
|
||||
# Mock the db API call
|
||||
p = mock.patch('dcmanager.db.api.subcloud_get')
|
||||
self.mock_db_query = p.start()
|
||||
self.addCleanup(p.stop)
|
||||
|
||||
# online subcloud running N load
|
||||
self.mock_db_query.return_value = FakeSubcloud(
|
||||
deploy_status=consts.DEPLOY_STATE_DONE)
|
||||
|
||||
# Invoke the strategy state operation on the orch thread
|
||||
self.worker.perform_state_action(self.strategy_step)
|
||||
|
||||
# On success, should have moved to the next state
|
||||
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||
self.on_success_state)
|
||||
|
||||
def test_upgrade_subcloud_migrating_data_interrupted_migration(self):
|
||||
"""Test the migrating data step skipped"""
|
||||
|
||||
# Mock the db API calls
|
||||
p1 = mock.patch('dcmanager.db.api.subcloud_get')
|
||||
self.mock_db_query = p1.start()
|
||||
self.addCleanup(p1.stop)
|
||||
|
||||
p2 = mock.patch('dcmanager.db.api.subcloud_update')
|
||||
self.mock_db_update = p2.start()
|
||||
self.addCleanup(p2.stop)
|
||||
|
||||
# online subcloud running N load
|
||||
self.mock_db_query.return_value = FakeSubcloud(
|
||||
deploy_status=consts.DEPLOY_STATE_MIGRATING_DATA)
|
||||
|
||||
# Invoke the strategy state operation on the orch thread
|
||||
self.worker.perform_state_action(self.strategy_step)
|
||||
|
||||
# verify the DB update was invoked
|
||||
self.mock_db_update.assert_called()
|
||||
|
||||
# Cannot resume the migration, the state goes to failed
|
||||
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||
consts.STRATEGY_STATE_FAILED)
|
||||
|
||||
@mock.patch.object(migrating_data, 'db_api')
|
||||
def test_upgrade_subcloud_migrating_data_reboot_timeout(self, mock_db_api):
|
||||
"""Test migrating data step times out during reboot
|
||||
@@ -123,10 +164,6 @@ class TestSwUpgradeMigratingDataStage(TestSwUpgradeState):
|
||||
The subprocess call passes however the reboot times out.
|
||||
"""
|
||||
|
||||
# Simulate data migration has not started yet
|
||||
self.sysinv_client.get_system.side_effect = \
|
||||
[FakeSystem(), Exception("Fresh install!")]
|
||||
|
||||
# Simulate a successful subprocess call to the platform upgrade playbook
|
||||
# on the subcloud.
|
||||
p = mock.patch(
|
||||
@@ -156,10 +193,6 @@ class TestSwUpgradeMigratingDataStage(TestSwUpgradeState):
|
||||
The subprocess call passes however the unlock enable times out.
|
||||
"""
|
||||
|
||||
# Simulate data migration has not started yet
|
||||
self.sysinv_client.get_system.side_effect = \
|
||||
[FakeSystem(), Exception("Fresh install!")]
|
||||
|
||||
# Simulate a successful subprocess call to the platform upgrade playbook
|
||||
# on the subcloud.
|
||||
p = mock.patch(
|
||||
|
||||
@@ -34,16 +34,17 @@ class TestSwUpgradePreCheckStage(TestSwUpgradeState):
|
||||
self.sysinv_client.get_host = mock.MagicMock()
|
||||
self.sysinv_client.get_host_filesystem = mock.MagicMock()
|
||||
|
||||
def test_upgrade_pre_check_subcloud_online(self):
|
||||
"""Test pre check step where the subcloud is online.
|
||||
def test_upgrade_pre_check_subcloud_online_fresh(self):
|
||||
"""Test pre check step where the subcloud is online and running N load
|
||||
|
||||
The pre-check should transition in this scenario to the first state
|
||||
of a normal upgrade orchestation which is 'installing license'.
|
||||
"""
|
||||
|
||||
# subcloud is online
|
||||
self.mock_db_query.return_value = \
|
||||
FakeSubcloud(availability_status=consts.AVAILABILITY_ONLINE)
|
||||
# online subcloud running N load
|
||||
self.mock_db_query.return_value = FakeSubcloud(
|
||||
availability_status=consts.AVAILABILITY_ONLINE,
|
||||
deploy_status=consts.DEPLOY_STATE_DONE)
|
||||
|
||||
self.sysinv_client.get_host_filesystem.side_effect = \
|
||||
[CONTROLLER_0_HOST_FS_SCRATCH_MIN_SIZED]
|
||||
@@ -58,6 +59,30 @@ class TestSwUpgradePreCheckStage(TestSwUpgradeState):
|
||||
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||
consts.STRATEGY_STATE_INSTALLING_LICENSE)
|
||||
|
||||
def test_upgrade_pre_check_subcloud_online_migrated(self):
|
||||
"""Test pre check step where the subcloud is online and running N+1 load
|
||||
|
||||
The pre-check in this scenario should advance directly to 'activating upgrade'.
|
||||
"""
|
||||
|
||||
# online subcloud running N+1 load
|
||||
self.mock_db_query.return_value = FakeSubcloud(
|
||||
availability_status=consts.AVAILABILITY_ONLINE,
|
||||
deploy_status=consts.DEPLOY_STATE_MIGRATED)
|
||||
|
||||
self.sysinv_client.get_host_filesystem.side_effect = \
|
||||
[CONTROLLER_0_HOST_FS_SCRATCH_MIN_SIZED]
|
||||
|
||||
# invoke the strategy state operation on the orch thread
|
||||
self.worker.perform_state_action(self.strategy_step)
|
||||
|
||||
# verify the DB query was invoked
|
||||
self.mock_db_query.assert_called()
|
||||
|
||||
# Verify the expected next state happened (activating upgrade)
|
||||
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||
consts.STRATEGY_STATE_ACTIVATING_UPGRADE)
|
||||
|
||||
def test_upgrade_pre_check_subcloud_online_scratch_undersized(self):
|
||||
"""Test pre check step where the subcloud is online undersized scratch
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ class TestSwUpgradeStartingUpgradeStage(TestSwUpgradeState):
|
||||
self.strategy_step = \
|
||||
self.setup_strategy_step(consts.STRATEGY_STATE_STARTING_UPGRADE)
|
||||
|
||||
# Add mock API endpoints for sysinv client calls invcked by this state
|
||||
# Add mock API endpoints for sysinv client calls invoked by this state
|
||||
self.sysinv_client.upgrade_start = mock.MagicMock()
|
||||
self.sysinv_client.get_upgrades = mock.MagicMock()
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ class TestSwUpgradeUnlockControllerStage(TestSwUpgradeState):
|
||||
self.strategy_step = self.setup_strategy_step(
|
||||
consts.STRATEGY_STATE_UNLOCKING_CONTROLLER)
|
||||
|
||||
# Add mock API endpoints for sysinv client calls invcked by this state
|
||||
# Add mock API endpoints for sysinv client calls invoked by this state
|
||||
self.sysinv_client.get_host = mock.MagicMock()
|
||||
self.sysinv_client.unlock_host = mock.MagicMock()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user