From bd7d2a69221e69da2403bf6c46c6ef393f0cd34b Mon Sep 17 00:00:00 2001 From: Gustavo Herzmann Date: Tue, 14 Feb 2023 14:36:41 -0300 Subject: [PATCH] Refactor distributed cloud patch orchestration Makes PatchOrchThread a subclass of the OrchThread, following the same design used in the other types of orchestration. It also removes the strategy step to orchestrate the system controller patching, as it should be done separately, before initializing the patch orchestration to patch the subclouds. The alarm checks that were executed inside the 'updating' state are now part of a new 'pre-check' state that runs at the beggining of the orchestration process. It also refactors the unit tests to be in line with the unit tests of the other orchestrator types by using the TestSwUpdate base class. Test Plan: 1. PASS - Verify successfull patch orchestration by applying and removing a NRR patch; 2. PASS - Verify successfull patch orchestration by applying and removing a RR patch; 3. PASS - Induce a management affecting alarm in a subcloud and verify that orchestration fails for that subcloud; 4. PASS - Induce a 900.001 alarm by partially apply a patch in a subcloud beforehand and verify that orchestration completes successfully for that subcloud; 5. PASS - Create a DC orch patch strategy, then manually patch a subcloud using the sw-manager patch-strategy command and then apply the DC orch patch strategy, verifying if the state machine skips from the 'creating VIM patch strategy' state directly to the 'finishing patch strategy' state; 6. PASS - Verify that no strategy step is created for the system controller; 7. PASS - Execute another orchestration type (e.g. prestage) and verify that it still works as expected. Story: 2010584 Task: 47371 Co-Authored-By: Al Bailey Signed-off-by: Gustavo Herzmann Change-Id: I2c37bd59696e6f9e4fd706f3b3c97f8f9e4499b0 --- distributedcloud/dcmanager/common/consts.py | 14 +- .../dcmanager/orchestrator/orch_thread.py | 43 +- .../orchestrator/patch_orch_thread.py | 1346 +---------------- .../dcmanager/orchestrator/states/base.py | 14 +- .../orchestrator/states/patch/__init__.py | 0 .../patch/applying_vim_patch_strategy.py | 20 + .../patch/creating_vim_patch_strategy.py | 45 + .../states/patch/finishing_patch_strategy.py | 71 + .../orchestrator/states/patch/job_data.py | 43 + .../orchestrator/states/patch/pre_check.py | 51 + .../states/patch/updating_patches.py | 159 ++ .../orchestrator/sw_update_manager.py | 15 +- .../orchestrator/states/patch/__init__.py | 0 .../patch/test_applying_vim_patch_strategy.py | 18 + .../orchestrator/states/patch/test_base.py | 14 + .../patch/test_creating_vim_patch_strategy.py | 59 + .../patch/test_finishing_patch_strategy.py | 116 ++ .../states/patch/test_pre_check.py | 134 ++ .../states/patch/test_updating_patches.py | 228 +++ .../upgrade/test_finishing_patch_strategy.py | 20 +- .../states/upgrade/test_updating_patches.py | 20 +- .../tests/unit/orchestrator/test_base.py | 10 +- .../orchestrator/test_sw_update_manager.py | 936 +----------- 23 files changed, 1119 insertions(+), 2257 deletions(-) create mode 100644 distributedcloud/dcmanager/orchestrator/states/patch/__init__.py create mode 100644 distributedcloud/dcmanager/orchestrator/states/patch/applying_vim_patch_strategy.py create mode 100644 distributedcloud/dcmanager/orchestrator/states/patch/creating_vim_patch_strategy.py create mode 100644 distributedcloud/dcmanager/orchestrator/states/patch/finishing_patch_strategy.py create mode 100644 distributedcloud/dcmanager/orchestrator/states/patch/job_data.py create mode 100644 distributedcloud/dcmanager/orchestrator/states/patch/pre_check.py create mode 100644 distributedcloud/dcmanager/orchestrator/states/patch/updating_patches.py create mode 100644 distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/__init__.py create mode 100644 distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_applying_vim_patch_strategy.py create mode 100644 distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_base.py create mode 100644 distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_creating_vim_patch_strategy.py create mode 100644 distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_finishing_patch_strategy.py create mode 100644 distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_pre_check.py create mode 100644 distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_updating_patches.py diff --git a/distributedcloud/dcmanager/common/consts.py b/distributedcloud/dcmanager/common/consts.py index b4fac362d..6a69c1a92 100644 --- a/distributedcloud/dcmanager/common/consts.py +++ b/distributedcloud/dcmanager/common/consts.py @@ -1,5 +1,5 @@ # Copyright (c) 2016 Ericsson AB. -# Copyright (c) 2017-2022 Wind River Systems, Inc. +# Copyright (c) 2017-2023 Wind River Systems, Inc. # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at @@ -92,16 +92,18 @@ DEFAULT_SUBCLOUD_GROUP_DESCRIPTION = 'Default Subcloud Group' DEFAULT_SUBCLOUD_GROUP_UPDATE_APPLY_TYPE = SUBCLOUD_APPLY_TYPE_PARALLEL DEFAULT_SUBCLOUD_GROUP_MAX_PARALLEL_SUBCLOUDS = 2 -# Strategy step states +# Common strategy step states STRATEGY_STATE_INITIAL = "initial" -STRATEGY_STATE_UPDATING_PATCHES = "updating patches" -STRATEGY_STATE_CREATING_STRATEGY = "creating strategy" -STRATEGY_STATE_APPLYING_STRATEGY = "applying strategy" -STRATEGY_STATE_FINISHING = "finishing" STRATEGY_STATE_COMPLETE = "complete" STRATEGY_STATE_ABORTED = "aborted" STRATEGY_STATE_FAILED = "failed" +# Patch orchestrations states +STRATEGY_STATE_CREATING_VIM_PATCH_STRATEGY = "creating VIM patch strategy" +STRATEGY_STATE_DELETING_VIM_PATCH_STRATEGY = "deleting VIM patch strategy" +STRATEGY_STATE_APPLYING_VIM_PATCH_STRATEGY = "applying VIM patch strategy" + +# Upgrade orchestration states STRATEGY_STATE_PRE_CHECK = "pre check" STRATEGY_STATE_INSTALLING_LICENSE = "installing license" STRATEGY_STATE_IMPORTING_LOAD = "importing load" diff --git a/distributedcloud/dcmanager/orchestrator/orch_thread.py b/distributedcloud/dcmanager/orchestrator/orch_thread.py index 50a4e34dc..63185e404 100644 --- a/distributedcloud/dcmanager/orchestrator/orch_thread.py +++ b/distributedcloud/dcmanager/orchestrator/orch_thread.py @@ -1,5 +1,5 @@ # Copyright 2017 Ericsson AB. -# Copyright (c) 2017-2022 Wind River Systems, Inc. +# Copyright (c) 2017-2023 Wind River Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,7 +23,9 @@ from keystoneauth1 import exceptions as keystone_exceptions from oslo_log import log as logging from dccommon import consts as dccommon_consts +from dccommon.drivers.openstack.patching_v1 import PatchingClient from dccommon.drivers.openstack.sdk_platform import OpenStackDriver +from dccommon.drivers.openstack.sysinv_v1 import SysinvClient from dccommon.drivers.openstack import vim from dcmanager.common import consts from dcmanager.common import context @@ -78,6 +80,8 @@ class OrchThread(threading.Thread): thread_pool_size=500) # Track worker created for each subcloud. self.subcloud_workers = dict() + # Track if the strategy setup function was executed + self._setup = False @abc.abstractmethod def trigger_audit(self): @@ -85,6 +89,28 @@ class OrchThread(threading.Thread): LOG.warn("(%s) OrchThread subclass must override trigger_audit" % self.update_type) + def _pre_apply_setup(self): + """Setup performed once before a strategy starts to apply""" + if not self._setup: + LOG.info("(%s) OrchThread Pre-Apply Setup" % self.update_type) + self._setup = True + self.pre_apply_setup() + + def pre_apply_setup(self): + """Subclass can override this method""" + pass + + def _post_delete_teardown(self): + """Cleanup code executed once after deleting a strategy""" + if self._setup: + LOG.info("(%s) OrchThread Post-Delete Teardown" % self.update_type) + self._setup = False + self.post_delete_teardown() + + def post_delete_teardown(self): + """Subclass can override this method""" + pass + def stopped(self): return self._stop.isSet() @@ -114,6 +140,19 @@ class OrchThread(threading.Thread): ks_client = OrchThread.get_ks_client(region_name) return vim.VimClient(region_name, ks_client.session) + @staticmethod + def get_sysinv_client(region_name=dccommon_consts.DEFAULT_REGION_NAME): + ks_client = OrchThread.get_ks_client(region_name) + endpoint = ks_client.endpoint_cache.get_endpoint('sysinv') + return SysinvClient(region_name, + ks_client.session, + endpoint=endpoint) + + @staticmethod + def get_patching_client(region_name=dccommon_consts.DEFAULT_REGION_NAME): + ks_client = OrchThread.get_ks_client(region_name) + return PatchingClient(region_name, ks_client.session) + @staticmethod def get_region_name(strategy_step): """Get the region name for a strategy step""" @@ -184,6 +223,7 @@ class OrchThread(threading.Thread): if sw_update_strategy.state in [ consts.SW_UPDATE_STATE_APPLYING, consts.SW_UPDATE_STATE_ABORTING]: + self._pre_apply_setup() self.apply(sw_update_strategy) elif sw_update_strategy.state == \ consts.SW_UPDATE_STATE_ABORT_REQUESTED: @@ -191,6 +231,7 @@ class OrchThread(threading.Thread): elif sw_update_strategy.state == \ consts.SW_UPDATE_STATE_DELETING: self.delete(sw_update_strategy) + self._post_delete_teardown() except exceptions.NotFound: # Nothing to do if a strategy doesn't exist diff --git a/distributedcloud/dcmanager/orchestrator/patch_orch_thread.py b/distributedcloud/dcmanager/orchestrator/patch_orch_thread.py index f37a4a701..3c48f16bb 100644 --- a/distributedcloud/dcmanager/orchestrator/patch_orch_thread.py +++ b/distributedcloud/dcmanager/orchestrator/patch_orch_thread.py @@ -1,5 +1,5 @@ # Copyright 2017 Ericsson AB. -# Copyright (c) 2017-2022 Wind River Systems, Inc. +# Copyright (c) 2017-2023 Wind River Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,1305 +14,63 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import datetime -import os -import re -import threading -import time -from keystoneauth1 import exceptions as keystone_exceptions -from oslo_log import log as logging - -from dccommon import consts as dccommon_consts -from dccommon.drivers.openstack.fm import FmClient -from dccommon.drivers.openstack import patching_v1 -from dccommon.drivers.openstack.patching_v1 import PatchingClient -from dccommon.drivers.openstack.sdk_platform import OpenStackDriver -from dccommon.drivers.openstack.sysinv_v1 import SysinvClient from dccommon.drivers.openstack import vim - from dcmanager.common import consts -from dcmanager.common import context -from dcmanager.common import exceptions -from dcmanager.common import scheduler -from dcmanager.common import utils -from dcmanager.db import api as db_api +from dcmanager.orchestrator.orch_thread import OrchThread +from dcmanager.orchestrator.states.patch.applying_vim_patch_strategy import \ + ApplyingVIMPatchStrategyState +from dcmanager.orchestrator.states.patch.creating_vim_patch_strategy import \ + CreatingVIMPatchStrategyState +from dcmanager.orchestrator.states.patch.finishing_patch_strategy import \ + FinishingPatchStrategyState +from dcmanager.orchestrator.states.patch.job_data import PatchJobData +from dcmanager.orchestrator.states.patch.pre_check import PreCheckState +from dcmanager.orchestrator.states.patch.updating_patches import \ + UpdatingPatchesState +from oslo_log import log as logging LOG = logging.getLogger(__name__) -IGNORE_ALARMS = ['900.001', ] # Patch in progress - -class PatchOrchThread(threading.Thread): - """Patch Orchestration Thread - - This thread is responsible for executing the patch orchestration strategy. - Here is how it works: - - The user creates a patch strategy from the CLI (or REST API). - - This ends up being handled by the SwUpdateManager class (above), which - runs under the main dcmanager thread. The strategy is created and stored - in the database. - - The user then applies the strategy from the CLI (or REST API). The - SwUpdateManager code updates the state of the strategy in the database. - - The PatchOrchThread wakes up periodically and checks the database for - a strategy that is in an active state (applying, aborting, etc...). If - so, it executes the strategy, updating the strategy and steps in the - database as it goes, with state and progress information. - """ +class PatchOrchThread(OrchThread): + STATE_OPERATORS = { + consts.STRATEGY_STATE_PRE_CHECK: + PreCheckState, + consts.STRATEGY_STATE_UPDATING_PATCHES: + UpdatingPatchesState, + consts.STRATEGY_STATE_CREATING_VIM_PATCH_STRATEGY: + CreatingVIMPatchStrategyState, + consts.STRATEGY_STATE_APPLYING_VIM_PATCH_STRATEGY: + ApplyingVIMPatchStrategyState, + consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY: + FinishingPatchStrategyState, + } def __init__(self, strategy_lock, audit_rpc_client): - super(PatchOrchThread, self).__init__() - self.context = context.get_admin_context() - self._stop = threading.Event() - # Used to protect strategy when an atomic read/update is required. - self.strategy_lock = strategy_lock - # Used to notify dcmanager-audit to trigger a patch audit - self.audit_rpc_client = audit_rpc_client - # Keeps track of greenthreads we create to do work. - self.thread_group_manager = scheduler.ThreadGroupManager( - thread_pool_size=500) - # Track worker created for each subcloud. - self.subcloud_workers = dict() - # Used to store RegionOne patches. - self.regionone_patches = dict() - # Used to store the list of patch ids that should be applied, based on - # their state in the central region. - self.regionone_applied_patch_ids = list() - # Used to store the list patch ids are committed in the central region. - self.regionone_committed_patch_ids = list() - - def stopped(self): - return self._stop.isSet() - - def stop(self): - LOG.info("PatchOrchThread Stopping") - self._stop.set() - - def run(self): - LOG.info("PatchOrchThread Starting") - # Build region one patches cache whenever the service is reloaded - self.get_region_one_patches() - self.patch_orch() - # Stop any greenthreads that are still running - self.thread_group_manager.stop() - LOG.info("PatchOrchThread Stopped") - - @staticmethod - def get_ks_client(region_name=dccommon_consts.DEFAULT_REGION_NAME): - """This will get a cached keystone client (and token)""" - try: - os_client = OpenStackDriver( - region_name=region_name, - region_clients=None) - return os_client.keystone_client - except Exception: - LOG.warn('Failure initializing KeystoneClient %s' % region_name) - raise - - def get_sysinv_client(self, region_name=dccommon_consts.DEFAULT_REGION_NAME): - ks_client = self.get_ks_client(region_name) - return SysinvClient(region_name, ks_client.session, - endpoint=ks_client.endpoint_cache.get_endpoint('sysinv')) - - def get_patching_client(self, region_name=dccommon_consts.DEFAULT_REGION_NAME): - ks_client = self.get_ks_client(region_name) - return PatchingClient(region_name, ks_client.session, - endpoint=ks_client.endpoint_cache.get_endpoint('patching')) - - def get_vim_client(self, region_name=dccommon_consts.DEFAULT_REGION_NAME): - ks_client = self.get_ks_client(region_name) - return vim.VimClient(region_name, ks_client.session, - endpoint=ks_client.endpoint_cache.get_endpoint('vim')) - - # TODO(yuxing) need to remove this function after the ctgs client accept - # alarm_ignore_list. - def get_fm_client(self, region_name): - ks_client = self.get_ks_client(region_name) - return FmClient(region_name, ks_client.session) - - @staticmethod - def get_region_name(strategy_step): - """Get the region name for a strategy step""" - if strategy_step.subcloud_id is None: - # This is the SystemController. - return dccommon_consts.DEFAULT_REGION_NAME - else: - return strategy_step.subcloud.name - - def strategy_step_update(self, subcloud_id, state=None, details=None): - """Update the strategy step in the DB - - Sets the start and finished timestamp if necessary, based on state. - """ - started_at = None - finished_at = None - if state in [consts.STRATEGY_STATE_UPDATING_PATCHES]: - started_at = datetime.datetime.now() - elif state in [consts.STRATEGY_STATE_COMPLETE, - consts.STRATEGY_STATE_ABORTED, - consts.STRATEGY_STATE_FAILED]: - finished_at = datetime.datetime.now() - db_api.strategy_step_update( - self.context, - subcloud_id, - state=state, - details=details, - started_at=started_at, - finished_at=finished_at) - - def get_region_one_patches(self): - """Query the RegionOne to determine what patches should be applied/committed.""" - - self.regionone_patches = \ - self.get_patching_client(dccommon_consts.DEFAULT_REGION_NAME).query() - LOG.debug("regionone_patches: %s" % self.regionone_patches) - - # Build lists of patches that should be applied in this subcloud, - # based on their state in RegionOne. Check repostate (not patchstate) - # as we only care if the patch has been applied to the repo (not - # whether it is installed on the hosts). If we were to check the - # patchstate, we could end up removing patches from this subcloud - # just because a single host in RegionOne reported that it was not - # patch current. - self.regionone_applied_patch_ids = [ - patch_id for patch_id in self.regionone_patches.keys() - if self.regionone_patches[patch_id]['repostate'] in [ - patching_v1.PATCH_STATE_APPLIED, - patching_v1.PATCH_STATE_COMMITTED]] - - # Then query RegionOne to determine what patches should be committed. - regionone_committed_patches = self.get_patching_client( - dccommon_consts.DEFAULT_REGION_NAME).query( - state=patching_v1.PATCH_STATE_COMMITTED) - LOG.debug("regionone_committed_patches: %s" % - regionone_committed_patches) - - self.regionone_committed_patch_ids = [ - patch_id for patch_id in regionone_committed_patches.keys()] - - def patch_orch(self): - while not self.stopped(): - try: - LOG.debug('Running patch orchestration') - - sw_update_strategy = db_api.sw_update_strategy_get( - self.context, - update_type=consts.SW_UPDATE_TYPE_PATCH) - - if sw_update_strategy.type == consts.SW_UPDATE_TYPE_PATCH: - if sw_update_strategy.state in [ - consts.SW_UPDATE_STATE_APPLYING, - consts.SW_UPDATE_STATE_ABORTING]: - self.apply(sw_update_strategy) - elif sw_update_strategy.state == \ - consts.SW_UPDATE_STATE_ABORT_REQUESTED: - self.abort(sw_update_strategy) - elif sw_update_strategy.state == \ - consts.SW_UPDATE_STATE_DELETING: - self.delete(sw_update_strategy) - - except exceptions.NotFound: - # Nothing to do if a strategy doesn't exist - pass - - except Exception as e: - # We catch all exceptions to avoid terminating the thread. - LOG.exception(e) - - # Wake up every 10 seconds to see if there is work to do. - time.sleep(10) - - LOG.info("PatchOrchThread ended main loop") - - def pre_check_management_affected_alarm(self, subcloud_name): - # The health conditions acceptable for subcloud patching are: - # a) subcloud is completely healthy (i.e. no failed checks) - # b) there is alarm but no management affected alarm - # c) subcloud fails alarm check and it only has non-management - # affecting alarm(s) - # d) subcloud fails alarm check but the alarms are in the - # IGNORE_ALARMS list - # TODO(yuxing) Update the cgtsclient and the sysinv client driver to - # accept alarm_ignore_list to avoid retrieving alarms from FM client. - system_health = self.get_sysinv_client(subcloud_name).get_system_health() - - failed_alarm_check = re.findall("No alarms: \[Fail\]", system_health) - no_mgmt_alarms = re.findall("\[0\] of which are management affecting", - system_health) - if not failed_alarm_check or no_mgmt_alarms: - return True - else: - alarms = self.get_fm_client(subcloud_name).get_alarms() - for alarm in alarms: - # This alarm cannot be ignored - - if (alarm.mgmt_affecting == "True") and ( - alarm.alarm_id not in IGNORE_ALARMS): - return False - # Either the non-management affecting alarms or the skippable alarm - # can be ignored, return true - return True - - def apply(self, sw_update_strategy): - """Apply a patch strategy""" - - LOG.info("Applying patch strategy") - strategy_steps = db_api.strategy_step_get_all(self.context) - - # Figure out which stage we are working on - current_stage = None - stop_after_stage = None - failure_detected = False - abort_detected = False - for strategy_step in strategy_steps: - if strategy_step.state == consts.STRATEGY_STATE_COMPLETE: - # This step is complete - continue - elif strategy_step.state == consts.STRATEGY_STATE_ABORTED: - # This step was aborted - abort_detected = True - continue - elif strategy_step.state == consts.STRATEGY_STATE_FAILED: - failure_detected = True - # This step has failed and needs no further action - if strategy_step.subcloud_id is None: - # Strategy on SystemController failed. We are done. - LOG.info("Stopping strategy due to failure while " - "patching SystemController") - with self.strategy_lock: - db_api.sw_update_strategy_update( - self.context, state=consts.SW_UPDATE_STATE_FAILED) - # Trigger patch audit to update the sync status for - # each subcloud. - self.audit_rpc_client.trigger_patch_audit(self.context) - return - elif sw_update_strategy.stop_on_failure: - # We have been told to stop on failures - stop_after_stage = strategy_step.stage - current_stage = strategy_step.stage - break - continue - # We have found the first step that isn't complete or failed. - # This is the stage we are working on now. - current_stage = strategy_step.stage - break - else: - # The strategy application is complete - if failure_detected: - LOG.info("Strategy application has failed.") - with self.strategy_lock: - db_api.sw_update_strategy_update( - self.context, state=consts.SW_UPDATE_STATE_FAILED) - elif abort_detected: - LOG.info("Strategy application was aborted.") - with self.strategy_lock: - db_api.sw_update_strategy_update( - self.context, state=consts.SW_UPDATE_STATE_ABORTED) - else: - LOG.info("Strategy application is complete.") - with self.strategy_lock: - db_api.sw_update_strategy_update( - self.context, state=consts.SW_UPDATE_STATE_COMPLETE) - # Trigger patch audit to update the sync status for each subcloud. - self.audit_rpc_client.trigger_patch_audit(self.context) - self.subcloud_workers.clear() - LOG.info("subcloud_workers list is cleared, workers_num: %d" - % len(self.subcloud_workers)) - return - - if stop_after_stage is not None: - work_remaining = False - # We are going to stop after the steps in this stage have finished. - for strategy_step in strategy_steps: - if strategy_step.stage == stop_after_stage: - if strategy_step.state != consts.STRATEGY_STATE_COMPLETE \ - and strategy_step.state != \ - consts.STRATEGY_STATE_FAILED: - # There is more work to do in this stage - work_remaining = True - break - - if not work_remaining: - # We have completed the stage that failed - LOG.info("Stopping strategy due to failure in stage %d" % - stop_after_stage) - with self.strategy_lock: - db_api.sw_update_strategy_update( - self.context, state=consts.SW_UPDATE_STATE_FAILED) - # Trigger patch audit to update the sync status for each - # subcloud. - self.audit_rpc_client.trigger_patch_audit(self.context) - return - - LOG.debug("Working on stage %d, num_workers: %d" - % (current_stage, len(self.subcloud_workers))) - for strategy_step in strategy_steps: - if strategy_step.stage == current_stage: - region = self.get_region_name(strategy_step) - - if strategy_step.state == \ - consts.STRATEGY_STATE_INITIAL: - - # Retrieve the list of patches from RegionOne once. This list - # will be referenced when the subcloud patch strategy is executed. - if strategy_step.subcloud_id is None: - self.get_region_one_patches() - - # Don't start patching this subcloud if it has been - # unmanaged by the user. If orchestration was already - # started, it will be allowed to complete. - if strategy_step.subcloud_id is not None and \ - strategy_step.subcloud.management_state == \ - dccommon_consts.MANAGEMENT_UNMANAGED: - message = ("Subcloud %s is unmanaged." % - strategy_step.subcloud.name) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - continue - - # We are just getting started, enter the first state - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_UPDATING_PATCHES) - - # Create a greenthread to do the update patches - self._create_worker_thread( - region, consts.STRATEGY_STATE_UPDATING_PATCHES, - strategy_step, self.update_subcloud_patches) - - elif strategy_step.state == \ - consts.STRATEGY_STATE_UPDATING_PATCHES: - if region not in self.subcloud_workers: - # The worker is missed, caused by host swact or service - # reload. - self._create_worker_thread( - region, strategy_step.state, strategy_step, - self.update_subcloud_patches) - else: - # The update is in progress - LOG.debug("Update patches is in progress for %s." - % region) - - elif strategy_step.state == \ - consts.STRATEGY_STATE_CREATING_STRATEGY: - if region not in self.subcloud_workers: - # The worker is missed, caused by host swact or service - # reload. - self._create_worker_thread( - region, consts.STRATEGY_STATE_CREATING_STRATEGY, - strategy_step, self.create_subcloud_strategy) - elif self.subcloud_workers[region][0] != \ - consts.STRATEGY_STATE_CREATING_STRATEGY: - self._create_worker_thread( - region, consts.STRATEGY_STATE_CREATING_STRATEGY, - strategy_step, self.create_subcloud_strategy) - else: - LOG.debug("Creating strategy is in progress for %s." - % region) - elif strategy_step.state == \ - consts.STRATEGY_STATE_APPLYING_STRATEGY: - if region not in self.subcloud_workers: - # The worker is missed, caused by host swact or service - # reload. - self._create_worker_thread( - region, consts.STRATEGY_STATE_APPLYING_STRATEGY, - strategy_step, self.apply_subcloud_strategy) - elif self.subcloud_workers[region][0] != \ - consts.STRATEGY_STATE_APPLYING_STRATEGY: - self._create_worker_thread( - region, consts.STRATEGY_STATE_APPLYING_STRATEGY, - strategy_step, self.apply_subcloud_strategy) - else: - LOG.debug("Applying strategy is in progress for %s." - % region) - elif strategy_step.state == \ - consts.STRATEGY_STATE_FINISHING: - if region not in self.subcloud_workers: - # The worker is missed, caused by host swact or service - # reload. - self._create_worker_thread( - region, consts.STRATEGY_STATE_FINISHING, - strategy_step, self.finish) - elif self.subcloud_workers[region][0] != \ - consts.STRATEGY_STATE_FINISHING: - self._create_worker_thread( - region, consts.STRATEGY_STATE_FINISHING, - strategy_step, self.finish) - else: - LOG.debug("Finishing strategy is in progress for %s." - % region) - - if self.stopped(): - LOG.info("Exiting because task is stopped") - return - - def update_subcloud_patches(self, strategy_step): - """Upload/Apply/Remove patches in this subcloud - - Removes the worker reference after the operation is complete. - """ - - try: - self.do_update_subcloud_patches(strategy_step) - except Exception as e: - self._handle_unexpected_error(e, strategy_step) - - def do_update_subcloud_patches(self, strategy_step): - """Upload/Apply/Remove patches in this subcloud""" - - if strategy_step.subcloud_id is None: - # This is the SystemController. It is the master so no update - # is necessary. - LOG.info("Skipping update patches for SystemController") - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_CREATING_STRATEGY) - return - - # Don't start patching if the subcloud contains management - # affected alarm. Continue patching on the next subcloud - error_msg = None - try: - # If management affected alarm check failed - if not self.pre_check_management_affected_alarm( - strategy_step.subcloud.name): - error_msg = ("Subcloud %s contains one or more management " - "affecting alarm(s). It will not be patched. " - "Please resolve the alarm condition(s) and try again." - % strategy_step.subcloud.name) - LOG.warn(error_msg) - - except Exception: - # If the system health report was not obtained - error_msg = ("Failed to obtain health report for %s. " - "Please see logs for details." - % strategy_step.subcloud.name) - LOG.exception(error_msg) - if error_msg: - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=error_msg) - return - - LOG.info("Updating patches for subcloud %s" % - strategy_step.subcloud.name) - - # Retrieve all the patches that are present in this subcloud. - try: - subcloud_patches = self.get_patching_client( - strategy_step.subcloud.name).query() - LOG.debug("Patches for subcloud %s: %s" % - (strategy_step.subcloud.name, subcloud_patches)) - except Exception: - message = ('Cannot retrieve patches for subcloud: %s' % - strategy_step.subcloud.name) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - # Determine which loads are present in this subcloud. During an - # upgrade, there will be more than one load installed. - try: - loads = self.get_sysinv_client( - strategy_step.subcloud.name).get_loads() - except Exception: - message = ('Cannot retrieve loads for subcloud: %s' % - strategy_step.subcloud.name) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - installed_loads = utils.get_loads_for_patching(loads) - - patches_to_upload = list() - patches_to_apply = list() - patches_to_remove = list() - - # Figure out which patches in this subcloud need to be applied and - # removed to match the applied patches in RegionOne. Check the - # repostate, which indicates whether it is applied or removed in - # the repo. - subcloud_patch_ids = list(subcloud_patches.keys()) - for patch_id in subcloud_patch_ids: - if subcloud_patches[patch_id]['repostate'] == \ - patching_v1.PATCH_STATE_APPLIED: - if patch_id not in self.regionone_applied_patch_ids: - LOG.info("Patch %s will be removed from subcloud %s" % - (patch_id, strategy_step.subcloud.name)) - patches_to_remove.append(patch_id) - elif subcloud_patches[patch_id]['repostate'] == \ - patching_v1.PATCH_STATE_COMMITTED: - if patch_id not in self.regionone_applied_patch_ids: - message = ("Patch %s is committed in subcloud %s but " - "not applied in SystemController" % - (patch_id, strategy_step.subcloud.name)) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - elif subcloud_patches[patch_id]['repostate'] == \ - patching_v1.PATCH_STATE_AVAILABLE: - if patch_id in self.regionone_applied_patch_ids: - LOG.info("Patch %s will be applied to subcloud %s" % - (patch_id, strategy_step.subcloud.name)) - patches_to_apply.append(patch_id) - else: - # This patch is in an invalid state - message = ('Patch %s in subcloud %s in unexpected state %s' % - (patch_id, strategy_step.subcloud.name, - subcloud_patches[patch_id]['repostate'])) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - # Check that all applied patches in RegionOne are present in the - # subcloud. - for patch_id in self.regionone_applied_patch_ids: - if self.regionone_patches[patch_id]['sw_version'] in \ - installed_loads and patch_id not in subcloud_patch_ids: - LOG.info("Patch %s missing from %s" % - (patch_id, strategy_step.subcloud.name)) - patches_to_upload.append(patch_id) - patches_to_apply.append(patch_id) - - LOG.info("%s: patches_to_upload=%s, patches_to_remove=%s, " - "patches_to_apply=%s" % (strategy_step.subcloud.name, - patches_to_upload, - patches_to_remove, - patches_to_apply)) - - if patches_to_remove: - LOG.info("Removing patches %s from subcloud %s" % - (patches_to_remove, strategy_step.subcloud.name)) - try: - self.get_patching_client( - strategy_step.subcloud.name).remove(patches_to_remove) - except Exception: - message = ('Failed to remove patches %s from subcloud %s' % - (patches_to_remove, strategy_step.subcloud.name)) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - if patches_to_upload: - LOG.info("Uploading patches %s to subcloud %s" % - (patches_to_upload, strategy_step.subcloud.name)) - for patch in patches_to_upload: - patch_sw_version = self.regionone_patches[patch]['sw_version'] - patch_file = "%s/%s/%s.patch" % (consts.PATCH_VAULT_DIR, - patch_sw_version, - patch) - if not os.path.isfile(patch_file): - message = ('Patch file %s is missing' % patch_file) - LOG.error(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - try: - self.get_patching_client( - strategy_step.subcloud.name).upload([patch_file]) - except Exception: - message = ('Failed to upload patch file %s to subcloud %s' - % (patch_file, strategy_step.subcloud.name)) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - if self.stopped(): - LOG.info("Exiting because task is stopped") - return - - if patches_to_apply: - LOG.info("Applying patches %s to subcloud %s" % - (patches_to_apply, strategy_step.subcloud.name)) - try: - self.get_patching_client( - strategy_step.subcloud.name).apply(patches_to_apply) - except Exception: - message = ("Failed to apply patches %s to subcloud %s" % - (patches_to_apply, strategy_step.subcloud.name)) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - # Now that we have applied/removed/uploaded patches, we need to give - # the patch controller on this subcloud time to determine whether - # each host on that subcloud is patch current. - wait_count = 0 - while True: - try: - subcloud_hosts = self.get_patching_client( - strategy_step.subcloud.name).query_hosts() - except Exception: - message = ("Failed to query patch status of hosts on " - "subcloud %s" % strategy_step.subcloud.name) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - LOG.debug("query_hosts for subcloud %s: %s" % - (strategy_step.subcloud.name, subcloud_hosts)) - for host in subcloud_hosts: - if host['interim_state']: - # This host is not yet ready. - LOG.debug("Host %s in subcloud %s in interim state" % - (host["hostname"], strategy_step.subcloud.name)) - break - else: - # All hosts in the subcloud are updated - break - wait_count += 1 - if wait_count >= 6: - # We have waited at least 60 seconds. This is too long. We - # will just log it and move on without failing the step. - message = ("Too much time expired after applying patches to " - "subcloud %s - continuing." % - strategy_step.subcloud.name) - LOG.warn(message) - break - - if self.stopped(): - LOG.info("Exiting because task is stopped") - return - - # Wait 10 seconds before doing another query. - time.sleep(10) - - LOG.info("Updating patches completed for %s " - % strategy_step.subcloud.name) - - # Move on to the next state - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_CREATING_STRATEGY) - - def create_subcloud_strategy(self, strategy_step): - """Create the patch strategy in this subcloud - - Removes the worker reference after the operation is complete. - """ - - try: - self.do_create_subcloud_strategy(strategy_step) - except Exception as e: - self._handle_unexpected_error(e, strategy_step) - - def do_create_subcloud_strategy(self, strategy_step): - """Create the patch strategy in this subcloud""" - - region = self.get_region_name(strategy_step) - - LOG.info("Creating patch strategy for %s" % region) - - # First check if the strategy has been created. - try: - subcloud_strategy = self.get_vim_client(region).get_strategy( - strategy_name=vim.STRATEGY_NAME_SW_PATCH) - except (keystone_exceptions.EndpointNotFound, IndexError): - message = ("Endpoint for subcloud: %s not found." % - region) - LOG.error(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - except Exception: - # Strategy doesn't exist yet - subcloud_strategy = None - - if subcloud_strategy is not None: - # if a strategy exists, it should be deleted and a new one created - LOG.info("Patch VIM strategy for: %s already exists with state: %s" - % (region, subcloud_strategy.state)) - # A VIM strategy in building/applying/aborting can not be deleted. - # Set as FAILED if we encounter a strategy in one of those states. - if subcloud_strategy.state in [vim.STATE_BUILDING, - vim.STATE_APPLYING, - vim.STATE_ABORTING]: - # Can't delete a strategy in these states - message = ("Failed to create a VIM strategy for %s. " - "There already is an existing strategy in %s state" - % (region, subcloud_strategy.state)) - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - else: - try: - self.get_vim_client(region).delete_strategy( - strategy_name=vim.STRATEGY_NAME_SW_PATCH) - # If we get here, the delete worked, so set it to None - subcloud_strategy = None - except Exception: - # we were unable to delete (and set to None) the strategy - message = ("Strategy delete for %s failed" % region) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - if subcloud_strategy is None: - # Check whether any patch orchestration is actually required. We - # always create a step for the SystemController and it may have - # been done (e.g. in a previous attempt). Also, if we are just - # committing patches, patch orchestration is not required. - orch_required = False - try: - cloud_hosts = self.get_patching_client(region).query_hosts() - except Exception: - message = ("Failed to query patch status of hosts on %s" % - region) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - LOG.debug("query_hosts for %s: %s" % (region, cloud_hosts)) - for host in cloud_hosts: - if not host['patch_current']: - LOG.debug("Host %s in %s is not patch current" % - (host["hostname"], region)) - orch_required = True - break - - if not orch_required: - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FINISHING, - details="") - return - - # Retrieve sw update options. For the controller, the default - # options will be used, as subcloud_id will be None - - opts_dict = \ - utils.get_sw_update_opts(self.context, - for_sw_update=True, - subcloud_id=strategy_step.subcloud_id) - - # If we are here, we need to create the strategy - try: - subcloud_strategy = self.get_vim_client(region).create_strategy( - strategy_name=vim.STRATEGY_NAME_SW_PATCH, - storage_apply_type=opts_dict['storage-apply-type'], - worker_apply_type=opts_dict['worker-apply-type'], - max_parallel_worker_hosts=opts_dict[ - 'max-parallel-workers'], - default_instance_action=opts_dict[ - 'default-instance-action'], - alarm_restrictions=opts_dict['alarm-restriction-type']) - except Exception: - message = "Strategy creation failed for %s" % region - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - if subcloud_strategy.state == vim.STATE_BUILDING: - LOG.debug("Strategy build in progress for %s" % region) - else: - message = ("Strategy build failed - unexpected strategy state " - "%s for %s" % - (subcloud_strategy.state, region)) - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - # Wait for the strategy to be built. - WAIT_INTERVAL = 10 - WAIT_LIMIT = 2 * 60 # 2 minutes - wait_count = 0 - while True: - try: - subcloud_strategy = self.get_vim_client(region).get_strategy( - strategy_name=vim.STRATEGY_NAME_SW_PATCH) - except Exception: - message = ("Failed to get patch strategy for %s" % region) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - if subcloud_strategy.state == vim.STATE_READY_TO_APPLY: - # Move on to the next state - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_APPLYING_STRATEGY) - LOG.info("Vim strategy created for %s, state updated to %s" - % (region, consts.STRATEGY_STATE_APPLYING_STRATEGY)) - return - elif subcloud_strategy.state == vim.STATE_BUILDING: - # Strategy is being built - LOG.info("Strategy build in progress for %s" % region) - elif subcloud_strategy.state in [vim.STATE_BUILD_FAILED, - vim.STATE_BUILD_TIMEOUT]: - # Build failed - message = "Strategy build failed for %s - %s" % \ - (region, subcloud_strategy.build_phase.reason) - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - else: - # Other states are bad - message = "Strategy build failed for %s - unexpected " \ - "state %s" % (region, subcloud_strategy.state) - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - wait_count += 1 - if wait_count >= (WAIT_LIMIT // WAIT_INTERVAL): - # We have waited too long. - message = ("Too much time expired after creating strategy for " - "%s." % region) - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - if self.stopped(): - LOG.info("Exiting because task is stopped") - return - - # Wait before doing another query. - time.sleep(WAIT_INTERVAL) - - def apply_subcloud_strategy(self, strategy_step): - """Apply the patch strategy in this subcloud - - Removes the worker reference after the operation is complete. - """ - - try: - self.do_apply_subcloud_strategy(strategy_step) - except Exception as e: - self._handle_unexpected_error(e, strategy_step) - - def do_apply_subcloud_strategy(self, strategy_step): - """Apply the patch strategy in this subcloud""" - - region = self.get_region_name(strategy_step) - - LOG.info("Applying patch strategy for %s" % region) - - # First check if the strategy has been created. - try: - subcloud_strategy = self.get_vim_client(region).get_strategy( - strategy_name=vim.STRATEGY_NAME_SW_PATCH) - except Exception: - # Strategy doesn't exist - message = "Strategy does not exist for %s" % region - LOG.warn(message) - raise - - if subcloud_strategy.state == vim.STATE_READY_TO_APPLY: - try: - subcloud_strategy = self.get_vim_client(region).apply_strategy( - strategy_name=vim.STRATEGY_NAME_SW_PATCH) - except Exception: - message = "Strategy apply failed for %s" % region - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - if subcloud_strategy.state == vim.STATE_APPLYING: - LOG.info("Strategy apply in progress for %s" % region) - else: - message = ("Strategy apply failed - unexpected strategy state " - "%s for %s" % - (subcloud_strategy.state, region)) - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - # Wait for the strategy to be applied. This could potentially take - # hours. We will wait up to 60 minutes for the current phase or - # completion percentage to change before we give up. - WAIT_INTERVAL = 60 - WAIT_LIMIT = 60 * 60 # 60 minutes - GET_FAIL_LIMIT = 30 * 60 # 30 minutes - wait_count = 0 - get_fail_count = 0 - last_details = "" - while True: - try: - subcloud_strategy = self.get_vim_client(region).get_strategy( - strategy_name=vim.STRATEGY_NAME_SW_PATCH) - get_fail_count = 0 - except Exception as e: - # When applying the strategy to a subcloud, the VIM can - # be unreachable for a significant period of time when - # there is a controller swact, or in the case of AIO-SX, - # when the controller reboots. - get_fail_count += 1 - wait_count += 1 - if get_fail_count >= (GET_FAIL_LIMIT // WAIT_INTERVAL): - # We have waited too long. - message = ("Failed to get patch strategy for %s" % - region) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - else: - LOG.info("Unable to get patch strategy for %s - " - "attempt %d - reason: %s" % - (region, get_fail_count, e)) - - if self.stopped(): - LOG.info("Exiting because task is stopped") - return - - # Wait before doing another query. - time.sleep(WAIT_INTERVAL) - - if subcloud_strategy.state == vim.STATE_APPLIED: - # Move on to the next state - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FINISHING, - details="") - return - elif subcloud_strategy.state == vim.STATE_APPLYING: - # Still applying. Update the details for this step if they have - # changed. - new_details = ("%s phase is %s%% complete" % ( - subcloud_strategy.current_phase, - subcloud_strategy.current_phase_completion_percentage)) - if new_details != last_details: - # Progress is being made - wait_count = 0 - self.strategy_step_update( - strategy_step.subcloud_id, - details=new_details) - elif subcloud_strategy.state in [vim.STATE_APPLY_FAILED, - vim.STATE_APPLY_TIMEOUT]: - # Apply failed - message = "Strategy apply failed for %s - %s" % \ - (region, subcloud_strategy.apply_phase.reason) - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - else: - # Other states are bad - message = "Strategy apply failed for %s - unexpected " \ - "state %s" % (region, subcloud_strategy.state) - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - wait_count += 1 - if wait_count >= (WAIT_LIMIT // WAIT_INTERVAL): - # We have waited too long. - message = ("Too much time expired while applying strategy for " - "%s." % region) - LOG.warn(message) - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - if self.stopped(): - LOG.info("Exiting because task is stopped") - return - - # Wait before doing another query. - time.sleep(WAIT_INTERVAL) - - def delete_subcloud_strategy(self, strategy_step): - """Delete the patch strategy in this subcloud - - Removes the worker reference after the operation is complete. - """ - - try: - self.do_delete_subcloud_strategy(strategy_step) - except Exception as e: - LOG.exception(e) - finally: - # The worker is done. - region = self.get_region_name(strategy_step) - if region in self.subcloud_workers: - del self.subcloud_workers[region] - - def do_delete_subcloud_strategy(self, strategy_step): - """Delete the patch strategy in this subcloud""" - - region = self.get_region_name(strategy_step) - - LOG.info("Deleting patch strategy for %s" % region) - - # First check if the strategy has been created. - try: - subcloud_strategy = self.get_vim_client(region).get_strategy( - strategy_name=vim.STRATEGY_NAME_SW_PATCH) - except (keystone_exceptions.EndpointNotFound, IndexError): - message = ("Endpoint for subcloud: %s not found." % - region) - LOG.warn(message) - return - except Exception: - # Strategy doesn't exist so there is nothing to do - return - - if subcloud_strategy.state in [vim.STATE_BUILDING, - vim.STATE_APPLYING, - vim.STATE_ABORTING]: - # Can't delete a strategy in these states - message = ("Strategy for %s in wrong state (%s)for delete" % - (region, subcloud_strategy.state)) - LOG.warn(message) - return - - # If we are here, we need to delete the strategy - try: - self.get_vim_client(region).delete_strategy( - strategy_name=vim.STRATEGY_NAME_SW_PATCH) - except Exception: - message = "Strategy delete failed for %s" % region - LOG.warn(message) - return - - def finish(self, strategy_step): - """Clean up patches in this subcloud (commit, delete) - - Removes the worker reference after the operation is complete. - """ - - try: - self.do_finish(strategy_step) - except Exception as e: - self._handle_unexpected_error(e, strategy_step) - - def do_finish(self, strategy_step): - """Clean up patches in this subcloud (commit, delete).""" - - if strategy_step.subcloud_id is None: - # This is the SystemController. No cleanup is required. - LOG.info("Skipping finish for SystemController") - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_COMPLETE) - return - - LOG.info("Finishing patch strategy for %s" % - strategy_step.subcloud.name) - - try: - subcloud_patches = self.get_patching_client( - strategy_step.subcloud.name).query() - LOG.debug("Patches for subcloud %s: %s" % - (strategy_step.subcloud.name, subcloud_patches)) - except Exception: - message = ('Cannot retrieve patches for subcloud: %s' % - strategy_step.subcloud.name) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - patches_to_commit = list() - patches_to_delete = list() - - # For this subcloud, determine which patches should be committed and - # which should be deleted. We check the patchstate here because - # patches cannot be deleted or committed if they are in a partial - # state (e.g. Partial-Apply or Partial-Remove). - subcloud_patch_ids = list(subcloud_patches.keys()) - for patch_id in subcloud_patch_ids: - if subcloud_patches[patch_id]['patchstate'] == \ - patching_v1.PATCH_STATE_AVAILABLE: - LOG.info("Patch %s will be deleted from subcloud %s" % - (patch_id, strategy_step.subcloud.name)) - patches_to_delete.append(patch_id) - elif subcloud_patches[patch_id]['patchstate'] == \ - patching_v1.PATCH_STATE_APPLIED: - if patch_id in self.regionone_committed_patch_ids: - LOG.info("Patch %s will be committed in subcloud %s" % - (patch_id, strategy_step.subcloud.name)) - patches_to_commit.append(patch_id) - - if patches_to_delete: - LOG.info("Deleting patches %s from subcloud %s" % - (patches_to_delete, strategy_step.subcloud.name)) - try: - self.get_patching_client( - strategy_step.subcloud.name).delete(patches_to_delete) - except Exception: - message = ('Failed to delete patches %s from subcloud %s' % - (patches_to_delete, strategy_step.subcloud.name)) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - if self.stopped(): - LOG.info("Exiting because task is stopped") - return - - if patches_to_commit: - LOG.info("Committing patches %s in subcloud %s" % - (patches_to_commit, strategy_step.subcloud.name)) - try: - self.get_patching_client( - strategy_step.subcloud.name).commit(patches_to_commit) - except Exception: - message = ('Failed to commit patches %s in subcloud %s' % - (patches_to_commit, strategy_step.subcloud.name)) - LOG.warn(message) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) - return - - # We are done. - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_COMPLETE) - - def abort(self, sw_update_strategy): - """Abort a patch strategy""" - - LOG.info("Aborting patch strategy") - - # Mark any steps that have not yet started as aborted, - # so we will not run them later. - strategy_steps = db_api.strategy_step_get_all(self.context) - - for strategy_step in strategy_steps: - if strategy_step.state == consts.STRATEGY_STATE_INITIAL: - LOG.info("Aborting step for subcloud %s" % - self.get_region_name(strategy_step)) - self.strategy_step_update( - strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_ABORTED, - details="") - - with self.strategy_lock: - db_api.sw_update_strategy_update( - self.context, state=consts.SW_UPDATE_STATE_ABORTING) - - def delete(self, sw_update_strategy): - """Delete a patch strategy""" - - LOG.info("Deleting patch strategy") - - strategy_steps = db_api.strategy_step_get_all(self.context) - - for strategy_step in strategy_steps: - region = self.get_region_name(strategy_step) - if region in self.subcloud_workers: - # A worker already exists. Let it finish whatever it - # was doing. - LOG.debug("Worker already exists for %s." % region) - else: - # Create a greenthread to delete the subcloud strategy - self.subcloud_workers[region] = \ - self.thread_group_manager.start( - self.delete_subcloud_strategy, - strategy_step) - - if self.stopped(): - LOG.info("Exiting because task is stopped") - self.subcloud_workers.clear() - return - - # Wait up to 180 seconds for the worker threads to complete - # their execution - counter = 0 - while len(self.subcloud_workers) > 0: - time.sleep(10) - counter = counter + 1 - if counter > 18: - break - - # Remove the strategy from the database if all workers - # have completed their execution - try: - db_api.strategy_step_destroy_all(self.context) - db_api.sw_update_strategy_destroy(self.context) - except Exception as e: - LOG.exception(e) - raise e - finally: - # Make sure the dictionary is reset for the next strategy apply - self.subcloud_workers.clear() - - def _create_worker_thread(self, region, state, strategy_step, state_op): - if region in self.subcloud_workers: - # Worker is not in the right state, delete it. - del self.subcloud_workers[region] - - self.subcloud_workers[region] = \ - (state, self.thread_group_manager.start(state_op, - strategy_step)) - LOG.info("Worker thread created for %s in %s." - % (region, strategy_step.state)) - - def _handle_unexpected_error(self, ex_obj, strategy_step): - LOG.exception(ex_obj) - message = "Unexpected error occurred while in %s" % strategy_step.state - if strategy_step.subcloud_id is not None: - self.strategy_step_update(strategy_step.subcloud_id, - state=consts.STRATEGY_STATE_FAILED, - details=message) + super(PatchOrchThread, self).__init__( + strategy_lock, + audit_rpc_client, + consts.SW_UPDATE_TYPE_PATCH, + vim.STRATEGY_NAME_SW_PATCH, + starting_state=consts.STRATEGY_STATE_PRE_CHECK) + + self.job_data = None + + def pre_apply_setup(self): + super(PatchOrchThread, self).pre_apply_setup() + self.job_data = PatchJobData() + + def post_delete_teardown(self): + super(PatchOrchThread, self).post_delete_teardown() + self.job_data = None + + def determine_state_operator(self, strategy_step): + state = super(PatchOrchThread, self).determine_state_operator( + strategy_step) + # Share job data with the next state operator + state.set_job_data(self.job_data) + return state + + def trigger_audit(self): + self.audit_rpc_client.trigger_patch_audit(self.context) diff --git a/distributedcloud/dcmanager/orchestrator/states/base.py b/distributedcloud/dcmanager/orchestrator/states/base.py index 2e3bbe76d..3786e80b7 100644 --- a/distributedcloud/dcmanager/orchestrator/states/base.py +++ b/distributedcloud/dcmanager/orchestrator/states/base.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2022 Wind River Systems, Inc. +# Copyright (c) 2020-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -31,10 +31,15 @@ class BaseState(object): self._stop = None self.region_name = region_name self._shared_caches = None + self._job_data = None def override_next_state(self, next_state): self.next_state = next_state + def set_job_data(self, job_data): + """Store an orch_thread job data object""" + self._job_data = job_data + def registerStopEvent(self, stop_event): """Store an orch_thread threading.Event to detect stop.""" self._stop = stop_event @@ -74,6 +79,13 @@ class BaseState(object): self.get_region_name(strategy_step), details)) + def exception_log(self, strategy_step, details): + LOG.exception("Stage: %s, State: %s, Subcloud: %s, Details: %s" + % (strategy_step.stage, + strategy_step.state, + self.get_region_name(strategy_step), + details)) + @staticmethod def get_region_name(strategy_step): """Get the region name for a strategy step""" diff --git a/distributedcloud/dcmanager/orchestrator/states/patch/__init__.py b/distributedcloud/dcmanager/orchestrator/states/patch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/distributedcloud/dcmanager/orchestrator/states/patch/applying_vim_patch_strategy.py b/distributedcloud/dcmanager/orchestrator/states/patch/applying_vim_patch_strategy.py new file mode 100644 index 000000000..be7e31f53 --- /dev/null +++ b/distributedcloud/dcmanager/orchestrator/states/patch/applying_vim_patch_strategy.py @@ -0,0 +1,20 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +from dccommon.drivers.openstack import vim +from dcmanager.common import consts +from dcmanager.orchestrator.states.applying_vim_strategy import \ + ApplyingVIMStrategyState + + +class ApplyingVIMPatchStrategyState(ApplyingVIMStrategyState): + """State for applying a VIM patch strategy.""" + + def __init__(self, region_name): + super(ApplyingVIMPatchStrategyState, self).__init__( + next_state=consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY, + region_name=region_name, + strategy_name=vim.STRATEGY_NAME_SW_PATCH) diff --git a/distributedcloud/dcmanager/orchestrator/states/patch/creating_vim_patch_strategy.py b/distributedcloud/dcmanager/orchestrator/states/patch/creating_vim_patch_strategy.py new file mode 100644 index 000000000..330c51d85 --- /dev/null +++ b/distributedcloud/dcmanager/orchestrator/states/patch/creating_vim_patch_strategy.py @@ -0,0 +1,45 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +from dccommon.drivers.openstack import vim +from dcmanager.common import consts +from dcmanager.orchestrator.states.creating_vim_strategy import \ + CreatingVIMStrategyState + + +# Max time: 2 minutes = 12 queries x 10 seconds between +DEFAULT_MAX_QUERIES = 12 +DEFAULT_SLEEP_DURATION = 10 + + +class CreatingVIMPatchStrategyState(CreatingVIMStrategyState): + """State for creating a VIM patch strategy.""" + + def __init__(self, region_name): + super(CreatingVIMPatchStrategyState, self).__init__( + next_state=consts.STRATEGY_STATE_APPLYING_VIM_PATCH_STRATEGY, + region_name=region_name, + strategy_name=vim.STRATEGY_NAME_SW_PATCH) + + self.SKIP_REASON = "no software patches need to be applied" + self.SKIP_STATE = consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY + + # Change CreatingVIMStrategyState default values + self.sleep_duration = DEFAULT_SLEEP_DURATION + self.max_queries = DEFAULT_MAX_QUERIES + + def skip_check(self, strategy_step, subcloud_strategy): + """Check if the VIM stategy needs to be skipped""" + + if (subcloud_strategy and + (subcloud_strategy.state == vim.STATE_BUILD_FAILED) and + (subcloud_strategy.build_phase.reason == self.SKIP_REASON)): + self.info_log(strategy_step, "Skip forward in state machine due to:" + " ({})".format(self.SKIP_REASON)) + return self.SKIP_STATE + + # If we get here, there is not a reason to skip + return None diff --git a/distributedcloud/dcmanager/orchestrator/states/patch/finishing_patch_strategy.py b/distributedcloud/dcmanager/orchestrator/states/patch/finishing_patch_strategy.py new file mode 100644 index 000000000..34866aa1a --- /dev/null +++ b/distributedcloud/dcmanager/orchestrator/states/patch/finishing_patch_strategy.py @@ -0,0 +1,71 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +from dccommon.drivers.openstack import patching_v1 +from dcmanager.common import consts +from dcmanager.common.exceptions import StrategyStoppedException +from dcmanager.orchestrator.states.base import BaseState + + +class FinishingPatchStrategyState(BaseState): + """Patch orchestration state for cleaning up patches""" + + def __init__(self, region_name): + super(FinishingPatchStrategyState, self).__init__( + next_state=consts.STRATEGY_STATE_COMPLETE, + region_name=region_name) + self.region_one_commited_patch_ids = None + + def set_job_data(self, job_data): + """Store an orch_thread job data object""" + # This will immediately fail if these attributes are a mismatch + self.region_one_commited_patch_ids = \ + job_data.region_one_commited_patch_ids + + def perform_state_action(self, strategy_step): + self.info_log(strategy_step, "Finishing subcloud patching") + + subcloud_patches = self.get_patching_client(self.region_name).query() + self.debug_log(strategy_step, "Patches for subcloud: %s" % + subcloud_patches) + + # For this subcloud, determine which patches should be committed and + # which should be deleted. We check the patchstate here because + # patches cannot be deleted or committed if they are in a partial + # state (e.g. Partial-Apply or Partial-Remove). + patches_to_commit = [] + patches_to_delete = [] + + for patch_id in subcloud_patches.keys(): + patch_state = subcloud_patches[patch_id]["patchstate"] + + if patch_state == patching_v1.PATCH_STATE_AVAILABLE: + self.info_log(strategy_step, + "Patch %s will be deleted from subcloud" % + patch_id) + patches_to_delete.append(patch_id) + + elif (patch_state == patching_v1.PATCH_STATE_APPLIED + and patch_id in self.region_one_commited_patch_ids): + self.info_log(strategy_step, + "Patch %s will be committed in subcloud" % + patch_id) + patches_to_commit.append(patch_id) + + if patches_to_delete: + self.info_log(strategy_step, "Deleting patches %s from subcloud" % + patches_to_delete) + self.get_patching_client(self.region_name).delete(patches_to_delete) + + if self.stopped(): + raise StrategyStoppedException() + + if patches_to_commit: + self.info_log(strategy_step, "Committing patches %s in subcloud" % + patches_to_commit) + self.get_patching_client(self.region_name).commit(patches_to_commit) + + return self.next_state diff --git a/distributedcloud/dcmanager/orchestrator/states/patch/job_data.py b/distributedcloud/dcmanager/orchestrator/states/patch/job_data.py new file mode 100644 index 000000000..02bca42b8 --- /dev/null +++ b/distributedcloud/dcmanager/orchestrator/states/patch/job_data.py @@ -0,0 +1,43 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +from dccommon import consts as dccommon_consts +from dccommon.drivers.openstack import patching_v1 +from dcmanager.common import utils +from dcmanager.orchestrator.orch_thread import OrchThread +from oslo_log import log as logging + +LOG = logging.getLogger(__name__) + + +class PatchJobData(object): + """Job data initialized once and shared across state operators""" + + def __init__(self): + self.initialize_data() + + def initialize_data(self): + LOG.info("Initializing PatchOrchThread job data") + + loads = OrchThread.get_sysinv_client( + dccommon_consts.DEFAULT_REGION_NAME).get_loads() + + installed_loads = utils.get_loads_for_patching(loads) + + self.region_one_patches = OrchThread.get_patching_client( + dccommon_consts.DEFAULT_REGION_NAME).query() + + self.region_one_applied_patch_ids = [] + self.region_one_commited_patch_ids = [] + for patch_id, patch in self.region_one_patches.items(): + # Only the patches for the installed loads will be stored + if patch["sw_version"] in installed_loads: + if patch["repostate"] == patching_v1.PATCH_STATE_APPLIED: + self.region_one_applied_patch_ids.append(patch_id) + elif patch["repostate"] == patching_v1.PATCH_STATE_COMMITTED: + self.region_one_commited_patch_ids.append(patch_id) + # A commited patch is also an applied one + self.region_one_applied_patch_ids.append(patch_id) diff --git a/distributedcloud/dcmanager/orchestrator/states/patch/pre_check.py b/distributedcloud/dcmanager/orchestrator/states/patch/pre_check.py new file mode 100644 index 000000000..486df0058 --- /dev/null +++ b/distributedcloud/dcmanager/orchestrator/states/patch/pre_check.py @@ -0,0 +1,51 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +from dcmanager.common import consts +from dcmanager.orchestrator.states.base import BaseState + +IGNORED_ALARMS_IDS = ("900.001",) # Patch in progress + + +class PreCheckState(BaseState): + """Pre check patch orchestration state""" + + def __init__(self, region_name): + super(PreCheckState, self).__init__( + next_state=consts.STRATEGY_STATE_UPDATING_PATCHES, + region_name=region_name) + + def has_mgmt_affecting_alarms(self, ignored_alarms=()): + alarms = self.get_fm_client(self.region_name).get_alarms() + for alarm in alarms: + if alarm.mgmt_affecting == "True" and \ + alarm.alarm_id not in ignored_alarms: + return True + # No management affecting alarms + return False + + def perform_state_action(self, strategy_step): + """Pre check region status""" + self.info_log(strategy_step, "Checking subcloud alarm status") + + # Stop patching if the subcloud contains management affecting alarms. + message = None + try: + if self.has_mgmt_affecting_alarms(ignored_alarms=IGNORED_ALARMS_IDS): + message = ("Subcloud contains one or more management affecting" + " alarm(s). It will not be patched. Please resolve" + " the alarm condition(s) and try again.") + except Exception as e: + self.exception_log(strategy_step, + "Failed to obtain subcloud alarm report") + message = ("Failed to obtain subcloud alarm report due to: (%s)." + " Please see /var/log/dcmanager/orchestrator.log for" + " details" % str(e)) + + if message: + raise Exception(message) + + return self.next_state diff --git a/distributedcloud/dcmanager/orchestrator/states/patch/updating_patches.py b/distributedcloud/dcmanager/orchestrator/states/patch/updating_patches.py new file mode 100644 index 000000000..f1c0b0423 --- /dev/null +++ b/distributedcloud/dcmanager/orchestrator/states/patch/updating_patches.py @@ -0,0 +1,159 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import os +import time + +from dccommon.drivers.openstack import patching_v1 +from dcmanager.common import consts +from dcmanager.common.exceptions import StrategyStoppedException +from dcmanager.orchestrator.states.base import BaseState + +# Max time: 1 minute = 6 queries x 10 seconds between +DEFAULT_MAX_QUERIES = 6 +DEFAULT_SLEEP_DURATION = 10 + + +class UpdatingPatchesState(BaseState): + """Patch orchestration state for updating patches""" + + def __init__(self, region_name): + super(UpdatingPatchesState, self).__init__( + next_state=consts.STRATEGY_STATE_CREATING_VIM_PATCH_STRATEGY, + region_name=region_name) + self.max_queries = DEFAULT_MAX_QUERIES + self.sleep_duration = DEFAULT_SLEEP_DURATION + + self.region_one_patches = None + self.region_one_applied_patch_ids = None + + def set_job_data(self, job_data): + """Store an orch_thread job data object""" + self.region_one_patches = job_data.region_one_patches + self.region_one_applied_patch_ids = job_data.\ + region_one_applied_patch_ids + + def perform_state_action(self, strategy_step): + """Update patches in this subcloud""" + self.info_log(strategy_step, "Updating patches") + + # Retrieve all subcloud patches + try: + subcloud_patches = self.get_patching_client(self.region_name).\ + query() + except Exception: + message = ("Cannot retrieve subcloud patches. Please see logs for" + " details.") + self.exception_log(strategy_step, message) + raise Exception(message) + + patches_to_upload = [] + patches_to_apply = [] + patches_to_remove = [] + + subcloud_patch_ids = subcloud_patches.keys() + + # RegionOne applied patches not present on the subcloud needs to + # be uploaded and applied to the subcloud + for patch_id in self.region_one_applied_patch_ids: + if patch_id not in subcloud_patch_ids: + self.info_log(strategy_step, "Patch %s missing from subloud" % + patch_id) + patches_to_upload.append(patch_id) + patches_to_apply.append(patch_id) + + # Check that all applied patches in subcloud match RegionOne + for patch_id in subcloud_patch_ids: + repostate = subcloud_patches[patch_id]["repostate"] + if repostate == patching_v1.PATCH_STATE_APPLIED: + if patch_id not in self.region_one_applied_patch_ids: + self.info_log(strategy_step, + "Patch %s will be removed from subcloud" % + patch_id) + patches_to_remove.append(patch_id) + elif repostate == patching_v1.PATCH_STATE_COMMITTED: + if patch_id not in self.region_one_applied_patch_ids: + message = ("Patch %s is committed in subcloud but " + "not applied in SystemController" % patch_id) + self.warn_log(strategy_step, message) + raise Exception(message) + elif repostate == patching_v1.PATCH_STATE_AVAILABLE: + if patch_id in self.region_one_applied_patch_ids: + patches_to_apply.append(patch_id) + + else: + # This patch is in an invalid state + message = ("Patch %s in subcloud is in an unexpected state: %s" + % (patch_id, repostate)) + self.warn_log(strategy_step, message) + raise Exception(message) + + if patches_to_upload: + self.info_log(strategy_step, "Uploading patches %s to subcloud" % + patches_to_upload) + for patch in patches_to_upload: + patch_sw_version = self.region_one_patches[patch]["sw_version"] + patch_file = "%s/%s/%s.patch" % (consts.PATCH_VAULT_DIR, + patch_sw_version, patch) + if not os.path.isfile(patch_file): + message = "Patch file %s is missing" % patch_file + self.error_log(strategy_step, message) + raise Exception(message) + + self.get_patching_client(self.region_name).upload([patch_file]) + if self.stopped(): + self.info_log(strategy_step, + "Exiting because task is stopped") + raise StrategyStoppedException() + + if patches_to_remove: + self.info_log(strategy_step, "Removing patches %s from subcloud" % + patches_to_remove) + self.get_patching_client(self.region_name).remove(patches_to_remove) + + if patches_to_apply: + self.info_log(strategy_step, "Applying patches %s to subcloud" % + patches_to_apply) + self.get_patching_client(self.region_name).apply(patches_to_apply) + + # Now that we have applied/removed/uploaded patches, we need to give + # the patch controller on this subcloud time to determine whether + # each host on that subcloud is patch current. + wait_count = 0 + while True: + subcloud_hosts = self.get_patching_client(self.region_name).\ + query_hosts() + self.debug_log(strategy_step, + "query_hosts for subcloud returned %s" % + subcloud_hosts) + + for host in subcloud_hosts: + if host["interim_state"]: + # This host is not yet ready. + self.debug_log(strategy_step, + "Host %s in subcloud in interim state" % + host["hostname"]) + break + else: + # All hosts in the subcloud are updated + break + + wait_count += 1 + if wait_count >= self.max_queries: + # We have waited too long. + # We log a warning but do not fail the step + message = ("Applying patches to subcloud " + "taking too long to recover. " + "Continuing..") + self.warn_log(strategy_step, message) + break + if self.stopped(): + self.info_log(strategy_step, "Exiting because task is stopped") + raise StrategyStoppedException() + # Delay between queries + time.sleep(self.sleep_duration) + + return self.next_state diff --git a/distributedcloud/dcmanager/orchestrator/sw_update_manager.py b/distributedcloud/dcmanager/orchestrator/sw_update_manager.py index 708acda98..2bc2afaa5 100644 --- a/distributedcloud/dcmanager/orchestrator/sw_update_manager.py +++ b/distributedcloud/dcmanager/orchestrator/sw_update_manager.py @@ -1,5 +1,5 @@ # Copyright 2017 Ericsson AB. -# Copyright (c) 2017-2022 Wind River Systems, Inc. +# Copyright (c) 2017-2023 Wind River Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -514,19 +514,6 @@ class SwUpdateManager(manager.Manager): consts.SW_UPDATE_STATE_INITIAL, extra_args=extra_args) - # For 'patch', always create a strategy step for the system controller - # A strategy step for the system controller is not added for: - # 'upgrade', 'firmware', 'kube upgrade', 'kube rootca update' - if strategy_type == consts.SW_UPDATE_TYPE_PATCH: - current_stage_counter += 1 - db_api.strategy_step_create( - context, - None, # None means not a subcloud. ie: SystemController - stage=current_stage_counter, - state=consts.STRATEGY_STATE_INITIAL, - details='') - strategy_step_created = True - # Create a strategy step for each subcloud that is managed, online and # out of sync # special cases: diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/__init__.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_applying_vim_patch_strategy.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_applying_vim_patch_strategy.py new file mode 100644 index 000000000..d015b257c --- /dev/null +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_applying_vim_patch_strategy.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +from dcmanager.common import consts +from dcmanager.tests.unit.orchestrator.states.patch.test_base import \ + TestPatchState +from dcmanager.tests.unit.orchestrator.states.test_applying_vim_strategy import \ + ApplyingVIMStrategyMixin + + +class TestApplyingVIMPatchStrategyStage(ApplyingVIMStrategyMixin, + TestPatchState): + def setUp(self): + super(TestApplyingVIMPatchStrategyStage, self).setUp() + self.set_state(consts.STRATEGY_STATE_APPLYING_VIM_PATCH_STRATEGY, + consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY) diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_base.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_base.py new file mode 100644 index 000000000..ed75419ee --- /dev/null +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_base.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +from dcmanager.common import consts +from dcmanager.tests.unit.orchestrator.test_base import TestSwUpdate + + +class TestPatchState(TestSwUpdate): + DEFAULT_STRATEGY_TYPE = consts.SW_UPDATE_TYPE_PATCH + + def setUp(self): + super(TestPatchState, self).setUp() diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_creating_vim_patch_strategy.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_creating_vim_patch_strategy.py new file mode 100644 index 000000000..8437f2836 --- /dev/null +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_creating_vim_patch_strategy.py @@ -0,0 +1,59 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +from collections import namedtuple + +from dccommon.drivers.openstack import vim +from dcmanager.common import consts +from dcmanager.tests.unit.fakes import FakeVimStrategy +from dcmanager.tests.unit.orchestrator.states.patch.test_base import \ + TestPatchState +from dcmanager.tests.unit.orchestrator.states.test_creating_vim_strategy import \ + CreatingVIMStrategyStageMixin +import mock + + +BuildPhase = namedtuple("BuildPhase", "reason") + + +REASON = "no software patches need to be applied" +STRATEGY_BUILDING = FakeVimStrategy(state=vim.STATE_BUILDING) +STRATEGY_FAILED_BUILDING = FakeVimStrategy(state=vim.STATE_BUILD_FAILED, + build_phase=BuildPhase(REASON)) + + +@mock.patch("dcmanager.orchestrator.states.patch.creating_vim_patch_strategy." + "DEFAULT_MAX_QUERIES", 3) +@mock.patch("dcmanager.orchestrator.states.patch.creating_vim_patch_strategy." + "DEFAULT_SLEEP_DURATION", 1) +class TestCreatingVIMPatchStrategyStage(CreatingVIMStrategyStageMixin, + TestPatchState): + def setUp(self): + super(TestCreatingVIMPatchStrategyStage, self).setUp() + self.set_state(consts.STRATEGY_STATE_CREATING_VIM_PATCH_STRATEGY, + consts.STRATEGY_STATE_APPLYING_VIM_PATCH_STRATEGY) + self.skip_state = consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY + + def test_skip_if_not_needed(self): + """Test creating VIM strategy when no patches need to be applied. + + When VIM returns 'no software patches need to be applied' the state + should skip the 'applying VIM strategy' state, returning the 'finishing' + state instead. + """ + + # first api query is before the create + self.vim_client.get_strategy.side_effect = [None, + STRATEGY_BUILDING, + STRATEGY_FAILED_BUILDING] + + # API calls acts as expected + self.vim_client.create_strategy.return_value = STRATEGY_BUILDING + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + self.assert_step_updated(self.strategy_step.subcloud_id, + self.skip_state) diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_finishing_patch_strategy.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_finishing_patch_strategy.py new file mode 100644 index 000000000..f7f4f6356 --- /dev/null +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_finishing_patch_strategy.py @@ -0,0 +1,116 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +from dcmanager.common import consts +from dcmanager.orchestrator.orch_thread import OrchThread +from dcmanager.tests.unit.orchestrator.states.fakes import FakeLoad +from dcmanager.tests.unit.orchestrator.states.patch.test_base import \ + TestPatchState +import mock + +REGION_ONE_PATCHES = {"DC.1": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.2": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.3": {"sw_version": "20.12", + "repostate": "Committed", + "patchstate": "Committed"}, + "DC.4": {"sw_version": "20.12", + "repostate": "Available", + "patchstate": "Available"}, + "DC.8": {"sw_version": "20.12", + "repostate": "Committed", + "patchstate": "Committed"}} + +SUBCLOUD_PATCHES = {"DC.1": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.2": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.3": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.5": {"sw_version": "20.12", + "repostate": "Available", + "patchstate": "Available"}, + "DC.8": {"sw_version": "20.12", + "repostate": "Committed", + "patchstate": "Committed"}} + + +class TestPatchFinishingStage(TestPatchState): + def setUp(self): + super(TestPatchFinishingStage, self).setUp() + + self.success_state = consts.STRATEGY_STATE_COMPLETE + + # Add the subcloud being processed by this unit test + self.subcloud = self.setup_subcloud() + + # Add the strategy_step state being processed by this unit test + self.strategy_step = self.setup_strategy_step( + self.subcloud.id, consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY) + + # Add mock API endpoints for patching and sysinv client calls + # invoked by this state + self.patching_client.query = mock.MagicMock() + self.patching_client.delete = mock.MagicMock() + self.patching_client.commit = mock.MagicMock() + self.sysinv_client.get_loads = mock.MagicMock() + + # Mock OrchThread functions used by PatchJobData class + p = mock.patch.object(OrchThread, "get_patching_client") + self.mock_orch_patching_client = p.start() + self.mock_orch_patching_client.return_value = self.patching_client + self.addCleanup(p.stop) + + p = mock.patch.object(OrchThread, "get_sysinv_client") + self.mock_orch_sysinv_client = p.start() + self.mock_orch_sysinv_client.return_value = self.sysinv_client + self.addCleanup(p.stop) + + self.fake_load = FakeLoad(1, software_version="20.12", + state=consts.ACTIVE_LOAD_STATE) + + def test_set_job_data(self): + """Test the 'set_job_data' method""" + self.patching_client.query.side_effect = [REGION_ONE_PATCHES, + SUBCLOUD_PATCHES] + + self.sysinv_client.get_loads.side_effect = [[self.fake_load]] + + # invoke the pre apply setup to create the PatchJobData object + self.worker.pre_apply_setup() + + # call determine_state_operator to invoke the set_job_data method + state = self.worker.determine_state_operator(self.strategy_step) + + # Assert that the state has the proper region_one_commited_patch_ids + # attribute + self.assertItemsEqual(["DC.3", "DC.8"], + state.region_one_commited_patch_ids) + + def test_finish(self): + """Test whether the 'finishing' state completes successfully""" + self.patching_client.query.side_effect = [REGION_ONE_PATCHES, + SUBCLOUD_PATCHES] + + self.sysinv_client.get_loads.side_effect = [[self.fake_load]] + + # invoke the pre apply setup to create the PatchJobData object + self.worker.pre_apply_setup() + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + self.patching_client.delete.assert_called_with(["DC.5"]) + self.patching_client.commit.assert_called_with(["DC.3"]) + + # On success, the state should transition to the next state + self.assert_step_updated(self.strategy_step.subcloud_id, + self.success_state) diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_pre_check.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_pre_check.py new file mode 100644 index 000000000..1f7177305 --- /dev/null +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_pre_check.py @@ -0,0 +1,134 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +from dcmanager.common import consts +from dcmanager.orchestrator.states.patch.pre_check import IGNORED_ALARMS_IDS +from dcmanager.tests.unit.orchestrator.states.fakes import FakeAlarm +from dcmanager.tests.unit.orchestrator.states.patch.test_base import \ + TestPatchState +import mock + + +class TestPatchPreCheckStage(TestPatchState): + def setUp(self): + super(TestPatchPreCheckStage, self).setUp() + + self.success_state = consts.STRATEGY_STATE_UPDATING_PATCHES + + # Add the subcloud being processed by this unit test + self.subcloud = self.setup_subcloud() + + # Add the strategy_step state being processed by this unit test + self.strategy_step = self.setup_strategy_step( + self.subcloud.id, consts.STRATEGY_STATE_PRE_CHECK) + + self.fm_client.get_alarms = mock.MagicMock() + + def test_no_alarms(self): + """Test pre check step where there are no alarms + + The pre-check should transition to the updating patches state + """ + + self.fm_client.get_alarms.return_value = [] + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + # verify the get alarms API call was invoked + self.fm_client.get_alarms.assert_called() + + # verify the expected next state happened + self.assert_step_updated(self.strategy_step.subcloud_id, + self.success_state) + + def test_no_management_affecting_alarm(self): + """Test pre check step where there are no management affecting alarms + + The pre-check should transition to the updating patches state + """ + + self.fm_client.get_alarms.return_value = [FakeAlarm("100.114", "False")] + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + # verify the get alarms API call was invoked + self.fm_client.get_alarms.assert_called() + + # verify the expected next state happened + self.assert_step_updated(self.strategy_step.subcloud_id, + self.success_state) + + def test_management_affected_alarm(self): + """Test pre check step where there is a management affecting alarm + + The pre-check should transition to the failed state + """ + + alarm_list = [FakeAlarm("100.001", "True"), + FakeAlarm("100.002", "True")] + + # also add ignored alarms + for alarm_str in IGNORED_ALARMS_IDS: + alarm_list.append(FakeAlarm(alarm_str, "True")) + + self.fm_client.get_alarms.return_value = alarm_list + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + # verify the get alarms API call was invoked + self.fm_client.get_alarms.assert_called() + + # verify the expected next state happened + self.assert_step_updated(self.strategy_step.subcloud_id, + consts.STRATEGY_STATE_FAILED) + + def test_ignored_alarm(self): + """Test pre check step where there is only a ignored alarm + + The pre-check should transition to the updating patches state + """ + # add ignored alarms + alarm_list = [] + for alarm_str in IGNORED_ALARMS_IDS: + alarm_list.append(FakeAlarm(alarm_str, "True")) + + self.fm_client.get_alarms.return_value = alarm_list + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + # verify the get alarms API call was invoked + self.fm_client.get_alarms.assert_called() + + # verify the expected next state happened + self.assert_step_updated(self.strategy_step.subcloud_id, + self.success_state) + + def test_get_alarms_unexpected_failure(self): + """Test pre check step where fm-client get_alarms() fails + + The pre-check should transition to the failed state and the 'details' + field should contain the correct message detailing the error + """ + + self.fm_client.get_alarms.side_effect = Exception('Test error message') + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + # verify the get alarms API call was invoked + self.fm_client.get_alarms.assert_called() + + # verify the expected next state happened + self.assert_step_updated(self.strategy_step.subcloud_id, + consts.STRATEGY_STATE_FAILED) + + details = ("pre check: Failed to obtain subcloud alarm report due to:" + " (Test error message). Please see /var/log/dcmanager/orche" + "strator.log for details") + self.assert_step_details(self.strategy_step.subcloud_id, details) diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_updating_patches.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_updating_patches.py new file mode 100644 index 000000000..65e758567 --- /dev/null +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/patch/test_updating_patches.py @@ -0,0 +1,228 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +from os import path as os_path + +from dcmanager.common import consts +from dcmanager.orchestrator.orch_thread import OrchThread +from dcmanager.tests.unit.orchestrator.states.fakes import FakeLoad +from dcmanager.tests.unit.orchestrator.states.patch.test_base import \ + TestPatchState +import mock + +REGION_ONE_PATCHES = {"DC.1": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.2": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.3": {"sw_version": "20.12", + "repostate": "Committed", + "patchstate": "Committed"}, + "DC.4": {"sw_version": "20.12", + "repostate": "Available", + "patchstate": "Available"}, + "DC.8": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}} + +SUBCLOUD_PATCHES_SUCCESS = {"DC.1": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.2": {"sw_version": "20.12", + "repostate": "Available", + "patchstate": "Available"}, + "DC.3": {"sw_version": "20.12", + "repostate": "Available", + "patchstate": "Partial-Remove"}, + "DC.5": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.6": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Partial-Apply"}} + +SUBCLOUD_PATCHES_BAD_COMMIT = {"DC.1": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.2": {"sw_version": "20.12", + "repostate": "Available", + "patchstate": "Available"}, + "DC.3": {"sw_version": "20.12", + "repostate": "Available", + "patchstate": "Partial-Remove"}, + "DC.5": {"sw_version": "20.12", + "repostate": "Committed", + "patchstate": "Committed"}, + "DC.6": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Partial-Apply"}} + +SUBCLOUD_PATCHES_BAD_STATE = {"DC.1": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Applied"}, + "DC.2": {"sw_version": "20.12", + "repostate": "Available", + "patchstate": "Available"}, + "DC.3": {"sw_version": "20.12", + "repostate": "Available", + "patchstate": "Partial-Remove"}, + "DC.5": {"sw_version": "20.12", + "repostate": "Unknown", + "patchstate": "Unknown"}, + "DC.6": {"sw_version": "20.12", + "repostate": "Applied", + "patchstate": "Partial-Apply"}} + + +@mock.patch("dcmanager.orchestrator.states.patch.updating_patches." + "DEFAULT_MAX_QUERIES", 3) +@mock.patch("dcmanager.orchestrator.states.patch.updating_patches" + ".DEFAULT_SLEEP_DURATION", 1) +class TestUpdatingPatchesStage(TestPatchState): + def setUp(self): + super(TestUpdatingPatchesStage, self).setUp() + + self.success_state = consts.STRATEGY_STATE_CREATING_VIM_PATCH_STRATEGY + + # Add the subcloud being processed by this unit test + self.subcloud = self.setup_subcloud() + + # Add the strategy_step state being processed by this unit test + self.strategy_step = self.setup_strategy_step( + self.subcloud.id, consts.STRATEGY_STATE_UPDATING_PATCHES) + + # Add mock API endpoints for patching and sysinv client calls + # invoked by this state + self.patching_client.query = mock.MagicMock() + self.sysinv_client.get_loads = mock.MagicMock() + self.patching_client.remove = mock.MagicMock() + self.patching_client.upload = mock.MagicMock() + self.patching_client.apply = mock.MagicMock() + self.patching_client.query_hosts = mock.MagicMock() + + # Mock OrchThread functions used by PatchJobData class + p = mock.patch.object(OrchThread, "get_patching_client") + self.mock_orch_patching_client = p.start() + self.mock_orch_patching_client.return_value = self.patching_client + self.addCleanup(p.stop) + + p = mock.patch.object(OrchThread, "get_sysinv_client") + self.mock_orch_sysinv_client = p.start() + self.mock_orch_sysinv_client.return_value = self.sysinv_client + self.addCleanup(p.stop) + + self.fake_load = FakeLoad(1, software_version="20.12", + state=consts.ACTIVE_LOAD_STATE) + + def test_set_job_data(self): + """Test the 'set_job_data' method""" + self.patching_client.query.side_effect = [REGION_ONE_PATCHES, + SUBCLOUD_PATCHES_SUCCESS] + + self.sysinv_client.get_loads.side_effect = [[self.fake_load]] + + # invoke the pre apply setup to create the PatchJobData object + self.worker.pre_apply_setup() + + # call determine_state_operator to invoke the set_job_data method + state = self.worker.determine_state_operator(self.strategy_step) + + # Assert that the state has the proper region_one_patches and + # region_one_applied_patch_ids attributes + self.assertItemsEqual(REGION_ONE_PATCHES, + state.region_one_patches) + self.assertItemsEqual(["DC.1", "DC.2", "DC.3", "DC.8"], + state.region_one_applied_patch_ids) + + @mock.patch.object(os_path, "isfile") + def test_update_subcloud_patches_success(self, mock_os_path_isfile): + """Test update_patches where the API call succeeds.""" + + self.patching_client.query.side_effect = [REGION_ONE_PATCHES, + SUBCLOUD_PATCHES_SUCCESS] + + self.sysinv_client.get_loads.side_effect = [[self.fake_load]] + + mock_os_path_isfile.return_value = True + + # invoke the pre apply setup to create the PatchJobData object + self.worker.pre_apply_setup() + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + self.patching_client.upload.assert_called_with([consts.PATCH_VAULT_DIR + + "/20.12/DC.8.patch"]) + + call_args, _ = self.patching_client.remove.call_args_list[0] + self.assertItemsEqual(["DC.5", "DC.6"], call_args[0]) + + call_args, _ = self.patching_client.apply.call_args_list[0] + self.assertItemsEqual(["DC.2", "DC.3", "DC.8"], call_args[0]) + + # On success, the state should transition to the next state + self.assert_step_updated(self.strategy_step.subcloud_id, + self.success_state) + + self.assert_step_details(self.strategy_step.subcloud_id, "") + + @mock.patch.object(os_path, "isfile") + def test_update_subcloud_patches_bad_committed(self, mock_os_path_isfile): + """Test update_patches where the API call fails. + + The update_patches call fails because the patch is 'committed' in + the subcloud but not 'applied' in the System Controller. + """ + + self.patching_client.query.side_effect = [REGION_ONE_PATCHES, + SUBCLOUD_PATCHES_BAD_COMMIT] + + self.sysinv_client.get_loads.side_effect = [[self.fake_load]] + + mock_os_path_isfile.return_value = True + + # invoke the pre apply setup to create the PatchJobData object + self.worker.pre_apply_setup() + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + # Verify it failed and moves to the next step + self.assert_step_updated(self.strategy_step.subcloud_id, + consts.STRATEGY_STATE_FAILED) + + self.assert_step_details(self.strategy_step.subcloud_id, + "updating patches: Patch DC.5 is committed in " + "subcloud but not applied in SystemController") + + @mock.patch.object(os_path, "isfile") + def test_update_subcloud_patches_bad_state(self, mock_os_path_isfile): + """Test update_patches where the API call fails. + + The update_patches call fails because the patch is 'unknown' in + the subcloud which is not a valid state. + """ + + self.patching_client.query.side_effect = [REGION_ONE_PATCHES, + SUBCLOUD_PATCHES_BAD_STATE] + + self.sysinv_client.get_loads.side_effect = [[self.fake_load]] + + mock_os_path_isfile.return_value = True + + # invoke the pre apply setup to create the PatchJobData object + self.worker.pre_apply_setup() + + # invoke the strategy state operation on the orch thread + self.worker.perform_state_action(self.strategy_step) + + # Verify it failed and moves to the next step + self.assert_step_updated(self.strategy_step.subcloud_id, + consts.STRATEGY_STATE_FAILED) + + self.assert_step_details(self.strategy_step.subcloud_id, + "updating patches: Patch DC.5 in subcloud is" + " in an unexpected state: Unknown") diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/upgrade/test_finishing_patch_strategy.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/upgrade/test_finishing_patch_strategy.py index de727895b..c35a9c2c1 100644 --- a/distributedcloud/dcmanager/tests/unit/orchestrator/states/upgrade/test_finishing_patch_strategy.py +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/upgrade/test_finishing_patch_strategy.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, 2022 Wind River Systems, Inc. +# Copyright (c) 2020, 2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -41,11 +41,6 @@ SUBCLOUD_PATCHES = {'DC.1': {'sw_version': '17.07', } -def compare_call_with_unsorted_list(call, unsorted_list): - call_args, _ = call - return call_args[0].sort() == unsorted_list.sort() - - @mock.patch("dcmanager.orchestrator.states.upgrade.finishing_patch_strategy" ".DEFAULT_MAX_QUERIES", 3) @mock.patch("dcmanager.orchestrator.states.upgrade.finishing_patch_strategy" @@ -81,14 +76,11 @@ class TestSwUpgradeFinishingPatchStrategyStage(TestSwUpgradeState): # invoke the strategy state operation on the orch thread self.worker.perform_state_action(self.strategy_step) - assert(compare_call_with_unsorted_list( - self.patching_client.delete.call_args_list[0], - ['DC.5', 'DC.6'] - )) - assert(compare_call_with_unsorted_list( - self.patching_client.commit.call_args_list[0], - ['DC.2', 'DC.3'] - )) + call_args, _ = self.patching_client.delete.call_args_list[0] + self.assertItemsEqual(['DC.5', 'DC.6'], call_args[0]) + + call_args, _ = self.patching_client.commit.call_args_list[0] + self.assertItemsEqual(['DC.2', 'DC.3'], call_args[0]) # On success, the state should transition to the next state self.assert_step_updated(self.strategy_step.subcloud_id, diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/upgrade/test_updating_patches.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/upgrade/test_updating_patches.py index 9f2d84e5b..e75796939 100644 --- a/distributedcloud/dcmanager/tests/unit/orchestrator/states/upgrade/test_updating_patches.py +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/upgrade/test_updating_patches.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, 2022 Wind River Systems, Inc. +# Copyright (c) 2020, 2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -81,11 +81,6 @@ SUBCLOUD_PATCHES_BAD_STATE = {'DC.1': {'sw_version': '20.12', } -def compare_call_with_unsorted_list(call, unsorted_list): - call_args, _ = call - return call_args[0].sort() == unsorted_list.sort() - - @mock.patch("dcmanager.orchestrator.states.upgrade.updating_patches" ".DEFAULT_MAX_QUERIES", 3) @mock.patch("dcmanager.orchestrator.states.upgrade.updating_patches" @@ -136,14 +131,11 @@ class TestSwUpgradeUpdatingPatchesStage(TestSwUpgradeState): self.patching_client.upload.assert_called_with( [consts.PATCH_VAULT_DIR + '/20.12/DC.8.patch']) - assert(compare_call_with_unsorted_list( - self.patching_client.remove.call_args_list[0], - ['DC.5', 'DC.6'] - )) - assert(compare_call_with_unsorted_list( - self.patching_client.apply.call_args_list[0], - ['DC.2', 'DC.3', 'DC.8'] - )) + call_args, _ = self.patching_client.remove.call_args_list[0] + self.assertItemsEqual(['DC.5', 'DC.6'], call_args[0]) + + call_args, _ = self.patching_client.apply.call_args_list[0] + self.assertItemsEqual(['DC.2', 'DC.3', 'DC.8'], call_args[0]) # On success, the state should transition to the next state self.assert_step_updated(self.strategy_step.subcloud_id, diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/test_base.py b/distributedcloud/dcmanager/tests/unit/orchestrator/test_base.py index caafe264d..182b3dea0 100644 --- a/distributedcloud/dcmanager/tests/unit/orchestrator/test_base.py +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/test_base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2022 Wind River Systems, Inc. +# Copyright (c) 2017-2023 Wind River Systems, Inc. # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at @@ -201,12 +201,16 @@ class TestSwUpdate(base.DCManagerTestCase): def assert_step_updated(self, subcloud_id, update_state): step = db_api.strategy_step_get(self.ctx, subcloud_id) - self.assertEqual(step.state, update_state) + self.assertEqual(update_state, step.state) + + def assert_step_details(self, subcloud_id, details): + step = db_api.strategy_step_get(self.ctx, subcloud_id) + self.assertEqual(details, step.details) # utility methods to help assert the value of any subcloud attribute def assert_subcloud_attribute(self, subcloud_id, attr_name, expected_val): subcloud = db_api.subcloud_get(self.ctx, subcloud_id) - self.assertEqual(subcloud[attr_name], expected_val) + self.assertEqual(expected_val, subcloud[attr_name]) def assert_subcloud_software_version(self, subcloud_id, expected_val): self.assert_subcloud_attribute(subcloud_id, diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/test_sw_update_manager.py b/distributedcloud/dcmanager/tests/unit/orchestrator/test_sw_update_manager.py index d134cdf8e..eebc5acda 100644 --- a/distributedcloud/dcmanager/tests/unit/orchestrator/test_sw_update_manager.py +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/test_sw_update_manager.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2022 Wind River Systems, Inc. +# Copyright (c) 2017-2023 Wind River Systems, Inc. # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at @@ -15,8 +15,6 @@ import base64 import copy import mock -from os import path as os_path -import threading from oslo_config import cfg @@ -26,13 +24,9 @@ from dcmanager.common import context from dcmanager.common import exceptions from dcmanager.common import prestage from dcmanager.db.sqlalchemy import api as db_api -from dcmanager.orchestrator import patch_orch_thread from dcmanager.orchestrator import sw_update_manager from dcmanager.tests import base -from dcmanager.tests.unit.common import fake_strategy -from dcmanager.tests.unit.common import fake_subcloud -from dcmanager.tests.unit.orchestrator.states.fakes import FakeAlarm from dcmanager.tests import utils @@ -66,15 +60,6 @@ FAKE_SW_PATCH_DATA = { "state": consts.SW_UPDATE_STATE_INITIAL, } -FAKE_STRATEGY_STEP_DATA = { - "id": 1, - "subcloud_id": 1, - "stage": 1, - "state": consts.STRATEGY_STATE_INITIAL, - "details": '', - "subcloud": None -} - health_report_no_mgmt_alarm = \ "System Health:\n \ All hosts are provisioned: [Fail]\n \ @@ -87,11 +72,6 @@ health_report_no_mgmt_alarm = \ All kubernetes control plane pods are ready: [OK]" -def compare_call_with_unsorted_list(call, unsorted_list): - call_args, _ = call - return call_args[0].sort() == unsorted_list.sort() - - class Subcloud(object): def __init__(self, id, name, group_id, is_managed, is_online): self.id = id @@ -108,427 +88,6 @@ class Subcloud(object): self.availability_status = dccommon_consts.AVAILABILITY_OFFLINE -class StrategyStep(object): - def __init__(self, id=1, subcloud_id=1, stage=1, - state=consts.STRATEGY_STATE_INITIAL, details='', - subcloud=None, subcloud_name=None): - self.id = id - self.subcloud_id = subcloud_id - self.stage = stage - self.state = state - self.details = details - self.subcloud = subcloud - self.subcloud_name = subcloud_name - - -class Load(object): - def __init__(self, software_version): - self.software_version = software_version - self.state = consts.ACTIVE_LOAD_STATE - - -class FakePatchingClientOutOfSync(mock.Mock): - def __init__(self, region, session, endpoint): - super(FakePatchingClientOutOfSync, self).__init__() - self.region = region - self.session = session - self.endpoint = endpoint - - def query(self, state=None): - if state == 'Committed': - if self.region == dccommon_consts.DEFAULT_REGION_NAME: - return {'DC.3': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'} - } - else: - return {} - else: - if self.region == dccommon_consts.DEFAULT_REGION_NAME: - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.2': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.3': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.4': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - 'DC.8': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - } - elif self.region == 'subcloud1': - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.2': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - 'DC.3': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Partial-Remove'}, - 'DC.5': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.6': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Partial-Apply'}, - } - else: - return {} - - def query_hosts(self): - return [] - - -class FakePatchingClientSubcloudCommitted(mock.Mock): - def __init__(self, region, session, endpoint): - super(FakePatchingClientSubcloudCommitted, self).__init__() - self.region = region - self.session = session - self.endpoint = endpoint - - def query(self, state=None): - if state == 'Committed': - if self.region == dccommon_consts.DEFAULT_REGION_NAME: - return {'DC.3': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'} - } - elif self.region == 'subcloud1': - return {'DC.5': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - - } - else: - return {} - else: - if self.region == dccommon_consts.DEFAULT_REGION_NAME: - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.2': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.3': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.4': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - 'DC.8': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - } - elif self.region == 'subcloud1': - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.2': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - 'DC.3': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Partial-Remove'}, - 'DC.5': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.6': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Partial-Apply'}, - } - else: - return {} - - def query_hosts(self): - return [] - - -class FakePatchingClientSubcloudUnknown(mock.Mock): - def __init__(self, region, session, endpoint): - super(FakePatchingClientSubcloudUnknown, self).__init__() - self.region = region - self.session = session - self.endpoint = endpoint - - def query(self, state=None): - if state == 'Committed': - if self.region == dccommon_consts.DEFAULT_REGION_NAME: - return {'DC.3': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'} - } - else: - return {} - else: - if self.region == dccommon_consts.DEFAULT_REGION_NAME: - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.2': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.3': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.4': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - 'DC.8': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - } - elif self.region == 'subcloud1': - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.2': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - 'DC.3': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Partial-Remove'}, - 'DC.5': {'sw_version': '17.07', - 'repostate': 'Unknown', - 'patchstate': 'Unknown'}, - 'DC.6': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Partial-Apply'}, - } - else: - return {} - - def query_hosts(self): - return [] - - -class FakePatchingClientAvailable(mock.Mock): - def __init__(self, region, session, endpoint): - super(FakePatchingClientAvailable, self).__init__() - self.region = region - self.session = session - self.endpoint = endpoint - - def query(self, state=None): - if self.region == dccommon_consts.DEFAULT_REGION_NAME: - if state == 'Committed': - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - } - else: - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - } - - elif self.region == 'subcloud1': - if state != 'Committed': - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - } - - else: - return {} - - def query_hosts(self): - return [] - - -class FakePatchingClientFinish(mock.Mock): - def __init__(self, region, session, endpoint): - super(FakePatchingClientFinish, self).__init__() - self.region = region - self.session = session - self.endpoint = endpoint - - def query(self, state=None): - if self.region == dccommon_consts.DEFAULT_REGION_NAME: - if state == 'Committed': - return {'DC.2': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.3': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.4': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - } - else: - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.2': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.3': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.4': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.8': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - } - elif self.region == 'subcloud1': - if state == 'Committed': - return {'DC.4': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - } - else: - return {'DC.1': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.2': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.3': {'sw_version': '17.07', - 'repostate': 'Applied', - 'patchstate': 'Applied'}, - 'DC.4': {'sw_version': '17.07', - 'repostate': 'Committed', - 'patchstate': 'Committed'}, - 'DC.5': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - 'DC.6': {'sw_version': '17.07', - 'repostate': 'Available', - 'patchstate': 'Available'}, - } - else: - return {} - - def query_hosts(self): - return [] - - -class FakeSysinvClientOneLoad(object): - def __init__(self, region, session, endpoint): - self.region = region - self.session = session - self.endpoint = endpoint - self.loads = [Load('17.07')] - self.health_report = \ - "System Health:\n \ - All hosts are provisioned: [Fail]\n \ - 1 Unprovisioned hosts\n \ - All hosts are unlocked/enabled: [OK]\n \ - All hosts have current configurations: [OK]\n \ - All hosts are patch current: [OK]\n \ - No alarms: [OK]\n \ - All kubernetes nodes are ready: [OK]\n \ - All kubernetes control plane pods are ready: [OK]" - - def get_loads(self): - return self.loads - - def get_system_health(self): - return self.health_report - - -class FakeSysinvClientNoMgmtAffectAlarm(object): - def __init__(self, region, session, endpoint): - self.region = region - self.session = session - self.endpoint = endpoint - self.loads = [Load('17.07')] - self.no_mgmt_alarm = True - - self.health_report = \ - "System Health:\n" \ - "All hosts are provisioned: [OK]\n" \ - "All hosts are unlocked/enabled: [OK]\n" \ - "All hosts have current configurations: [OK]\n" \ - "All hosts are patch current: [OK]\n" \ - "Ceph Storage Healthy: [OK]\n" \ - "No alarms: [Fail]\n" \ - "[1] alarms found, [0] of which are management affecting\n" \ - "All kubernetes nodes are ready: [OK]\n" \ - "All kubernetes control plane pods are ready: [OK]" - - def get_loads(self): - return self.loads - - def get_system_health(self): - return self.health_report - - -class FakeSysinvClientReportTimeOut(object): - def __init__(self, region, session, endpoint): - self.region = region - self.session = session - self.endpoint = endpoint - self.loads = [Load('17.07')] - self.no_mgmt_alarm = True - - def get_loads(self): - return self.loads - - def get_system_health(self): - raise ValueError('Fake Sysinv Time Out') - - -class FakeSysinvClientMgmtAffectAlarm(object): - def __init__(self, region, session, endpoint): - self.region = region - self.session = session - self.endpoint = endpoint - self.loads = [Load('17.07')] - self.no_mgmt_alarm = True - - self.health_report = \ - "System Health:\n" \ - "All hosts are provisioned: [OK]\n" \ - "All hosts are unlocked/enabled: [OK]\n" \ - "All hosts have current configurations: [OK]\n" \ - "All hosts are patch current: [OK]\n" \ - "Ceph Storage Healthy: [OK]\n" \ - "No alarms: [Fail]\n" \ - "[1] alarms found, [1] of which are management affecting\n" \ - "All kubernetes nodes are ready: [OK]\n" \ - "All kubernetes control plane pods are ready: [OK]" - - def get_loads(self): - return self.loads - - def get_system_health(self): - return self.health_report - - -class FakeFMClientIgnoredAlarm(object): - def __init__(self, region, session): - self.region = region - self.session = session - self.alarm_list = [FakeAlarm('900.001', 'True')] - - def get_alarms(self): - return self.alarm_list - - -class FakeFMClientAlarm(object): - def __init__(self, region, session): - self.region = region - self.session = session - self.alarm_list = [FakeAlarm('100.001', 'True'), FakeAlarm('100.002', 'True')] - - def get_alarms(self): - return self.alarm_list - - -class Controller(object): - def __init__(self, hostname): - self.hostname = hostname - - # All orch_threads can be mocked the same way class FakeOrchThread(object): def __init__(self): @@ -706,23 +265,8 @@ class TestSwUpdateManager(base.DCManagerTestCase): def test_create_sw_update_strategy_no_subclouds( self, mock_patch_orch_thread): um = sw_update_manager.SwUpdateManager() - response = um.create_sw_update_strategy( - self.ctxt, payload=FAKE_SW_UPDATE_DATA) - - # Verify strategy was created as expected - self.assertEqual(response['type'], - FAKE_SW_UPDATE_DATA['type']) - - # Verify strategy step was created as expected - strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(strategy_steps[0]['state'], - consts.STRATEGY_STATE_INITIAL) - self.assertEqual(strategy_steps[0]['stage'], - 1) - self.assertEqual(strategy_steps[0]['details'], - '') - self.assertEqual(strategy_steps[0]['subcloud_id'], - None) + # No strategy will be created, so it should raise: + # 'Bad strategy request: Strategy has no steps to apply' self.assertRaises(exceptions.BadRequest, um.create_sw_update_strategy, self.ctxt, payload=FAKE_SW_UPDATE_DATA) @@ -765,7 +309,7 @@ class TestSwUpdateManager(base.DCManagerTestCase): self.assertEqual(strategy_steps[0]['details'], '') self.assertEqual(strategy_steps[0]['subcloud_id'], - None) + 1) @mock.patch.object(sw_update_manager, 'PatchOrchThread') def test_create_sw_update_strategy_parallel_for_a_single_group( @@ -1021,8 +565,8 @@ class TestSwUpdateManager(base.DCManagerTestCase): consts.SUBCLOUD_APPLY_TYPE_PARALLEL) # Verify the strategy step list - subcloud_ids = [None, 1, 3, 5, 6, 7] - stage = [1, 2, 2, 3, 4, 4] + subcloud_ids = [1, 3, 5, 6, 7] + stage = [1, 1, 2, 3, 3] strategy_step_list = db_api.strategy_step_get_all(self.ctxt) for index, strategy_step in enumerate(strategy_step_list): self.assertEqual(subcloud_ids[index], strategy_step.subcloud_id) @@ -1083,14 +627,13 @@ class TestSwUpdateManager(base.DCManagerTestCase): self.assertEqual(response['type'], consts.SW_UPDATE_TYPE_PATCH) # Verify the strategy step list - # System Controller denoted as ID None was added to strategy list in patching - # System Controller will be patched prior to all other subclouds - subcloud_ids = [None, 1, 3] - stage = [1, 2, 2] + subcloud_ids = [1, 3] + # Both subclouds are added to the first stage (max-parallel-subclouds=2) + stage = [1, 1] strategy_step_list = db_api.strategy_step_get_all(self.ctxt) subcloud_id_processed = [] stage_processed = [] - for index, strategy_step in enumerate(strategy_step_list): + for strategy_step in strategy_step_list: subcloud_id_processed.append(strategy_step.subcloud_id) stage_processed.append(strategy_step.stage) self.assertEqual(subcloud_ids, subcloud_id_processed) @@ -1213,8 +756,8 @@ class TestSwUpdateManager(base.DCManagerTestCase): consts.SUBCLOUD_APPLY_TYPE_SERIAL) # Verify the strategy step list - subcloud_ids = [None, 1, 3, 5, 6, 7] - stage = [1, 2, 3, 4, 5, 6] + subcloud_ids = [1, 3, 5, 6, 7] + stage = [1, 2, 3, 4, 5] strategy_step_list = db_api.strategy_step_get_all(self.ctxt) for index, strategy_step in enumerate(strategy_step_list): self.assertEqual(subcloud_ids[index], strategy_step.subcloud_id) @@ -1306,8 +849,8 @@ class TestSwUpdateManager(base.DCManagerTestCase): self.assertEqual(strategy_dict['max-parallel-subclouds'], 2) # Verify the strategy step list - subcloud_ids = [None, 1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13] - stage = [1, 2, 2, 3, 4, 4, 5, 6, 7, 8, 8, 9] + subcloud_ids = [1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13] + stage = [1, 1, 2, 3, 3, 4, 5, 6, 7, 7, 8] strategy_step_list = db_api.strategy_step_get_all(self.ctxt) for index, strategy_step in enumerate(strategy_step_list): self.assertEqual(subcloud_ids[index], strategy_step.subcloud_id) @@ -1400,8 +943,8 @@ class TestSwUpdateManager(base.DCManagerTestCase): self.assertEqual(strategy_dict['max-parallel-subclouds'], None) # Verify the strategy step list - subcloud_ids = [None, 1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13] - stage = [1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 8] + subcloud_ids = [1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13] + stage = [1, 1, 2, 3, 3, 4, 4, 5, 6, 6, 7] strategy_step_list = db_api.strategy_step_get_all(self.ctxt) for index, strategy_step in enumerate(strategy_step_list): self.assertEqual(subcloud_ids[index], strategy_step.subcloud_id) @@ -1491,8 +1034,8 @@ class TestSwUpdateManager(base.DCManagerTestCase): self.assertEqual(strategy_dict['subcloud-apply-type'], None) # Verify the strategy step list - subcloud_ids = [None, 1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13] - stage = [1, 2, 2, 3, 4, 4, 5, 6, 7, 8, 8, 9] + subcloud_ids = [1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13] + stage = [1, 1, 2, 3, 3, 4, 5, 6, 7, 7, 8] strategy_step_list = db_api.strategy_step_get_all(self.ctxt) for index, strategy_step in enumerate(strategy_step_list): self.assertEqual(subcloud_ids[index], strategy_step.subcloud_id) @@ -1587,7 +1130,7 @@ class TestSwUpdateManager(base.DCManagerTestCase): is_managed=True, is_online=True) self.update_subcloud_status(self.ctxt, fake_subcloud3.id) - # Subcloud3 will be included in the strategy as it's online + # Subcloud4 will be included in the strategy as it's online fake_subcloud4 = self.create_subcloud(self.ctxt, 'subcloud4', 1, is_managed=True, is_online=True) self.update_subcloud_status(self.ctxt, fake_subcloud4.id) @@ -1604,8 +1147,8 @@ class TestSwUpdateManager(base.DCManagerTestCase): self.assertEqual(strategy_dict['type'], consts.SW_UPDATE_TYPE_PATCH) # Verify the strategy step list - subcloud_ids = [None, 2, 3, 4] - stage = [1, 2, 2, 2] + subcloud_ids = [2, 3, 4] + stage = [1, 1, 1] strategy_step_list = db_api.strategy_step_get_all(self.ctxt) for index, strategy_step in enumerate(strategy_step_list): self.assertEqual(subcloud_ids[index], strategy_step.subcloud_id) @@ -1804,19 +1347,12 @@ class TestSwUpdateManager(base.DCManagerTestCase): data = copy.copy(FAKE_SW_UPDATE_DATA) data["force"] = True data["cloud_name"] = 'subcloud1' - strategy_dict = um.create_sw_update_strategy(self.ctxt, payload=data) - # Assert that values passed through CLI are used instead of group values - self.assertEqual(strategy_dict['subcloud-apply-type'], - consts.SUBCLOUD_APPLY_TYPE_PARALLEL) - - # Verify the strategy step list - subcloud_ids = [None] - stage = [1] - strategy_step_list = db_api.strategy_step_get_all(self.ctxt) - for index, strategy_step in enumerate(strategy_step_list): - self.assertEqual(subcloud_ids[index], strategy_step.subcloud_id) - self.assertEqual(stage[index], strategy_step.stage) + # No strategy step is created when all subclouds are offline, + # should raise 'Bad strategy request: Strategy has no steps to apply' + self.assertRaises(exceptions.BadRequest, + um.create_sw_update_strategy, + self.ctxt, payload=data) @mock.patch.object(sw_update_manager, 'PatchOrchThread') def test_delete_sw_update_strategy(self, mock_patch_orch_thread): @@ -1912,423 +1448,3 @@ class TestSwUpdateManager(base.DCManagerTestCase): self.assertRaises(exceptions.BadRequest, um.apply_sw_update_strategy, self.ctxt) - - @mock.patch.object(patch_orch_thread, 'SysinvClient') - @mock.patch.object(os_path, 'isfile') - @mock.patch.object(patch_orch_thread, 'PatchingClient') - @mock.patch.object(threading, 'Thread') - def test_update_subcloud_patches_no_management_affected_alarm( - self, mock_threading, - mock_patching_client, mock_os_path_isfile, mock_sysinv_client): - - subcloud_id = fake_subcloud.create_fake_subcloud(self.ctx).id - subcloud = db_api.subcloud_update( - self.ctx, - subcloud_id, - management_state=dccommon_consts.MANAGEMENT_MANAGED, - availability_status=dccommon_consts.AVAILABILITY_ONLINE) - fake_strategy.create_fake_strategy_step( - self.ctx, - subcloud_id=subcloud.id, - state=consts.STRATEGY_STATE_INITIAL) - strategy_step = db_api.strategy_step_get_by_name(self.ctx, subcloud.name) - - mock_os_path_isfile.return_value = True - mock_patching_client.side_effect = FakePatchingClientAvailable - mock_sysinv_client.side_effect = FakeSysinvClientNoMgmtAffectAlarm - - FakePatchingClientAvailable.apply = mock.Mock() - - sw_update_manager.PatchOrchThread.stopped = lambda x: False - mock_strategy_lock = mock.Mock() - pot = sw_update_manager.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - - # invoke get_region_one_patches once t update required attributes - pot.get_region_one_patches() - pot.update_subcloud_patches(strategy_step) - - # Verify that strategy step was updated - updated_strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(updated_strategy_steps[0]['state'], - consts.STRATEGY_STATE_CREATING_STRATEGY) - - @mock.patch.object(patch_orch_thread, 'SysinvClient') - @mock.patch.object(os_path, 'isfile') - @mock.patch.object(patch_orch_thread, 'PatchingClient') - @mock.patch.object(threading, 'Thread') - def test_update_subcloud_patches_no_alarm( - self, mock_threading, - mock_patching_client, mock_os_path_isfile, mock_sysinv_client): - - subcloud_id = fake_subcloud.create_fake_subcloud(self.ctx).id - subcloud = db_api.subcloud_update( - self.ctx, - subcloud_id, - management_state=dccommon_consts.MANAGEMENT_MANAGED, - availability_status=dccommon_consts.AVAILABILITY_ONLINE) - fake_strategy.create_fake_strategy_step( - self.ctx, - subcloud_id=subcloud.id, - state=consts.STRATEGY_STATE_INITIAL) - strategy_step = db_api.strategy_step_get_by_name(self.ctx, subcloud.name) - - mock_os_path_isfile.return_value = True - mock_patching_client.side_effect = FakePatchingClientAvailable - mock_sysinv_client.side_effect = FakeSysinvClientOneLoad - - FakePatchingClientAvailable.apply = mock.Mock() - - sw_update_manager.PatchOrchThread.stopped = lambda x: False - mock_strategy_lock = mock.Mock() - pot = sw_update_manager.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - - # invoke get_region_one_patches once t update required attributes - pot.get_region_one_patches() - pot.update_subcloud_patches(strategy_step) - - # Verify that strategy step was updated - updated_strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(updated_strategy_steps[0]['state'], - consts.STRATEGY_STATE_CREATING_STRATEGY) - - @mock.patch.object(patch_orch_thread, 'FmClient') - @mock.patch.object(patch_orch_thread, 'SysinvClient') - @mock.patch.object(os_path, 'isfile') - @mock.patch.object(patch_orch_thread, 'PatchingClient') - @mock.patch.object(threading, 'Thread') - def test_update_subcloud_patches_management_affected_alarm( - self, mock_threading, mock_patching_client, mock_os_path_isfile, - mock_sysinv_client, mock_fm_client): - - subcloud_id = fake_subcloud.create_fake_subcloud(self.ctx).id - subcloud = db_api.subcloud_update( - self.ctx, - subcloud_id, - management_state=dccommon_consts.MANAGEMENT_MANAGED, - availability_status=dccommon_consts.AVAILABILITY_ONLINE) - fake_strategy.create_fake_strategy_step( - self.ctx, - subcloud_id=subcloud.id, - state=consts.STRATEGY_STATE_INITIAL) - strategy_step = db_api.strategy_step_get_by_name(self.ctx, subcloud.name) - - mock_os_path_isfile.return_value = True - mock_patching_client.side_effect = FakePatchingClientAvailable - mock_sysinv_client.side_effect = FakeSysinvClientMgmtAffectAlarm - mock_fm_client.return_value = FakeFMClientAlarm('fake_region', 'fake_session') - - FakePatchingClientAvailable.apply = mock.Mock() - - sw_update_manager.PatchOrchThread.stopped = lambda x: False - mock_strategy_lock = mock.Mock() - pot = sw_update_manager.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - - # invoke get_region_one_patches once t update required attributes - pot.get_region_one_patches() - pot.update_subcloud_patches(strategy_step) - - # Verify that strategy step was updated - updated_strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(updated_strategy_steps[0]['state'], - consts.STRATEGY_STATE_FAILED) - - @mock.patch.object(patch_orch_thread, 'FmClient') - @mock.patch.object(patch_orch_thread, 'SysinvClient') - @mock.patch.object(os_path, 'isfile') - @mock.patch.object(patch_orch_thread, 'PatchingClient') - @mock.patch.object(threading, 'Thread') - def test_update_subcloud_patches_ignored_alarm( - self, mock_threading, mock_patching_client, mock_os_path_isfile, - mock_sysinv_client, mock_fm_client): - - subcloud_id = fake_subcloud.create_fake_subcloud(self.ctx).id - subcloud = db_api.subcloud_update( - self.ctx, - subcloud_id, - management_state=dccommon_consts.MANAGEMENT_MANAGED, - availability_status=dccommon_consts.AVAILABILITY_ONLINE) - fake_strategy.create_fake_strategy_step( - self.ctx, - subcloud_id=subcloud.id, - state=consts.STRATEGY_STATE_INITIAL) - strategy_step = db_api.strategy_step_get_by_name(self.ctx, subcloud.name) - - mock_os_path_isfile.return_value = True - mock_patching_client.side_effect = FakePatchingClientAvailable - mock_sysinv_client.side_effect = FakeSysinvClientMgmtAffectAlarm - mock_fm_client.return_value = FakeFMClientIgnoredAlarm('fake_region', 'fake_session') - - FakePatchingClientAvailable.apply = mock.Mock() - - sw_update_manager.PatchOrchThread.stopped = lambda x: False - mock_strategy_lock = mock.Mock() - pot = sw_update_manager.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - - # invoke get_region_one_patches once t update required attributes - pot.get_region_one_patches() - pot.update_subcloud_patches(strategy_step) - - # Verify that strategy step was updated - updated_strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(updated_strategy_steps[0]['state'], - consts.STRATEGY_STATE_CREATING_STRATEGY) - - @mock.patch.object(patch_orch_thread, 'SysinvClient') - @mock.patch.object(os_path, 'isfile') - @mock.patch.object(patch_orch_thread, 'PatchingClient') - @mock.patch.object(threading, 'Thread') - def test_update_subcloud_patches_sysinv_get_report_timeout( - self, mock_threading, - mock_patching_client, mock_os_path_isfile, mock_sysinv_client): - - subcloud_id = fake_subcloud.create_fake_subcloud(self.ctx).id - subcloud = db_api.subcloud_update( - self.ctx, - subcloud_id, - management_state=dccommon_consts.MANAGEMENT_MANAGED, - availability_status=dccommon_consts.AVAILABILITY_ONLINE) - fake_strategy.create_fake_strategy_step( - self.ctx, - subcloud_id=subcloud.id, - state=consts.STRATEGY_STATE_INITIAL) - strategy_step = db_api.strategy_step_get_by_name(self.ctx, subcloud.name) - - mock_os_path_isfile.return_value = True - mock_patching_client.side_effect = FakePatchingClientAvailable - mock_sysinv_client.side_effect = FakeSysinvClientReportTimeOut - - FakePatchingClientAvailable.apply = mock.Mock() - - sw_update_manager.PatchOrchThread.stopped = lambda x: False - mock_strategy_lock = mock.Mock() - pot = sw_update_manager.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - - # invoke get_region_one_patches once t update required attributes - pot.get_region_one_patches() - pot.update_subcloud_patches(strategy_step) - - # Verify that strategy step was updated - updated_strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(updated_strategy_steps[0]['state'], - consts.STRATEGY_STATE_FAILED) - - @mock.patch.object(patch_orch_thread, 'SysinvClient') - @mock.patch.object(os_path, 'isfile') - @mock.patch.object(patch_orch_thread, 'PatchingClient') - @mock.patch.object(threading, 'Thread') - def test_update_subcloud_patches( - self, mock_threading, - mock_patching_client, mock_os_path_isfile, mock_sysinv_client): - - subcloud_id = fake_subcloud.create_fake_subcloud(self.ctx).id - subcloud = db_api.subcloud_update( - self.ctx, - subcloud_id, - management_state=dccommon_consts.MANAGEMENT_MANAGED, - availability_status=dccommon_consts.AVAILABILITY_ONLINE) - fake_strategy.create_fake_strategy_step( - self.ctx, - subcloud_id=subcloud.id, - state=consts.STRATEGY_STATE_INITIAL) - strategy_step = db_api.strategy_step_get_by_name(self.ctx, subcloud.name) - - mock_patching_client.side_effect = FakePatchingClientOutOfSync - mock_os_path_isfile.return_value = True - mock_sysinv_client.side_effect = FakeSysinvClientOneLoad - FakePatchingClientOutOfSync.apply = mock.Mock() - FakePatchingClientOutOfSync.remove = mock.Mock() - FakePatchingClientOutOfSync.upload = mock.Mock() - sw_update_manager.PatchOrchThread.stopped = lambda x: False - mock_strategy_lock = mock.Mock() - pot = sw_update_manager.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - # invoke get_region_one_patches once to update required attributes - pot.get_region_one_patches() - pot.update_subcloud_patches(strategy_step) - - assert(compare_call_with_unsorted_list( - FakePatchingClientOutOfSync.remove.call_args_list[0], - ['DC.5', 'DC.6'] - )) - FakePatchingClientOutOfSync.upload.assert_called_with( - [consts.PATCH_VAULT_DIR + '/17.07/DC.8.patch']) - assert(compare_call_with_unsorted_list( - FakePatchingClientOutOfSync.apply.call_args_list[0], - ['DC.2', 'DC.3', 'DC.8'] - )) - - # Verify that strategy step was updated - updated_strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(updated_strategy_steps[0]['state'], - consts.STRATEGY_STATE_CREATING_STRATEGY) - - @mock.patch.object(patch_orch_thread, 'SysinvClient') - @mock.patch.object(os_path, 'isfile') - @mock.patch.object(patch_orch_thread, 'PatchingClient') - @mock.patch.object(threading, 'Thread') - def test_update_subcloud_patches_bad_committed( - self, mock_threading, - mock_patching_client, mock_os_path_isfile, mock_sysinv_client): - - subcloud_id = fake_subcloud.create_fake_subcloud(self.ctx).id - subcloud = db_api.subcloud_update( - self.ctx, - subcloud_id, - management_state=dccommon_consts.MANAGEMENT_MANAGED, - availability_status=dccommon_consts.AVAILABILITY_ONLINE) - fake_strategy.create_fake_strategy_step( - self.ctx, - subcloud_id=subcloud.id, - state=consts.STRATEGY_STATE_INITIAL) - strategy_step = db_api.strategy_step_get_by_name(self.ctx, subcloud.name) - - mock_os_path_isfile.return_value = True - mock_patching_client.side_effect = FakePatchingClientSubcloudCommitted - mock_sysinv_client.side_effect = FakeSysinvClientOneLoad - FakePatchingClientOutOfSync.apply = mock.Mock() - FakePatchingClientOutOfSync.remove = mock.Mock() - FakePatchingClientOutOfSync.upload = mock.Mock() - sw_update_manager.PatchOrchThread.stopped = lambda x: False - mock_strategy_lock = mock.Mock() - pot = sw_update_manager.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - # invoke get_region_one_patches once to update required attributes - pot.get_region_one_patches() - pot.update_subcloud_patches(strategy_step) - - # Verify that strategy step was updated - updated_strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(updated_strategy_steps[0]['state'], - consts.STRATEGY_STATE_FAILED) - - @mock.patch.object(patch_orch_thread, 'SysinvClient') - @mock.patch.object(os_path, 'isfile') - @mock.patch.object(patch_orch_thread, 'PatchingClient') - @mock.patch.object(threading, 'Thread') - def test_update_subcloud_patches_bad_state( - self, mock_threading, - mock_patching_client, mock_os_path_isfile, mock_sysinv_client): - - subcloud_id = fake_subcloud.create_fake_subcloud(self.ctx).id - subcloud = db_api.subcloud_update( - self.ctx, - subcloud_id, - management_state=dccommon_consts.MANAGEMENT_MANAGED, - availability_status=dccommon_consts.AVAILABILITY_ONLINE) - fake_strategy.create_fake_strategy_step( - self.ctx, - subcloud_id=subcloud.id, - state=consts.STRATEGY_STATE_INITIAL) - strategy_step = db_api.strategy_step_get_by_name(self.ctx, subcloud.name) - - mock_os_path_isfile.return_value = True - mock_patching_client.side_effect = FakePatchingClientSubcloudUnknown - mock_sysinv_client.side_effect = FakeSysinvClientOneLoad - FakePatchingClientOutOfSync.apply = mock.Mock() - FakePatchingClientOutOfSync.remove = mock.Mock() - FakePatchingClientOutOfSync.upload = mock.Mock() - sw_update_manager.PatchOrchThread.stopped = lambda x: False - mock_strategy_lock = mock.Mock() - pot = sw_update_manager.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - # invoke get_region_one_patches once to update required attributes - pot.get_region_one_patches() - pot.update_subcloud_patches(strategy_step) - - # Verify that strategy step was updated - updated_strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(updated_strategy_steps[0]['state'], - consts.STRATEGY_STATE_FAILED) - - @mock.patch.object(os_path, 'isfile') - @mock.patch.object(patch_orch_thread, 'PatchingClient') - @mock.patch.object(threading, 'Thread') - def test_finish( - self, mock_threading, - mock_patching_client, mock_os_path_isfile): - - subcloud_id = fake_subcloud.create_fake_subcloud(self.ctx).id - subcloud = db_api.subcloud_update( - self.ctx, - subcloud_id, - management_state=dccommon_consts.MANAGEMENT_MANAGED, - availability_status=dccommon_consts.AVAILABILITY_ONLINE) - fake_strategy.create_fake_strategy_step( - self.ctx, - subcloud_id=subcloud.id, - state=consts.STRATEGY_STATE_INITIAL) - strategy_step = db_api.strategy_step_get_by_name(self.ctx, subcloud.name) - - mock_os_path_isfile.return_value = True - mock_patching_client.side_effect = FakePatchingClientFinish - FakePatchingClientFinish.delete = mock.Mock() - FakePatchingClientFinish.commit = mock.Mock() - sw_update_manager.PatchOrchThread.stopped = lambda x: False - mock_strategy_lock = mock.Mock() - pot = sw_update_manager.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - # invoke get_region_one_patches once to update required attributes - pot.get_region_one_patches() - pot.finish(strategy_step) - - assert(compare_call_with_unsorted_list( - FakePatchingClientFinish.delete.call_args_list[0], - ['DC.5', 'DC.6'] - )) - assert(compare_call_with_unsorted_list( - FakePatchingClientFinish.commit.call_args_list[0], - ['DC.2', 'DC.3'] - )) - - # Verify that strategy step was updated - updated_strategy_steps = db_api.strategy_step_get_all(self.ctx) - self.assertEqual(updated_strategy_steps[0]['state'], - consts.STRATEGY_STATE_COMPLETE) - - @mock.patch.object(patch_orch_thread, 'PatchingClient') - def test_get_region_one_patches(self, mock_patching_client): - mock_strategy_lock = mock.Mock() - mock_patching_client.side_effect = FakePatchingClientOutOfSync - pot = patch_orch_thread.PatchOrchThread(mock_strategy_lock, - self.fake_dcmanager_audit_api) - pot.get_ks_client = mock.Mock() - pot.get_region_one_patches() - - regionone_patches = dict() - regionone_patches = \ - FakePatchingClientOutOfSync( - dccommon_consts.DEFAULT_REGION_NAME, mock.Mock(), mock.Mock()).query() - regionone_applied_patch_ids = [ - patch_id for patch_id in regionone_patches.keys() - if regionone_patches[patch_id]['repostate'] in [ - 'Applied', 'Committed']] - - # Verify the update of regionone_patches attribute - self.assertEqual(pot.regionone_patches, regionone_patches) - # Verify the update of regionone_applied_patch_ids attribute - self.assertEqual(pot.regionone_applied_patch_ids, - regionone_applied_patch_ids) - - regionone_committed_patches = \ - FakePatchingClientOutOfSync( - dccommon_consts.DEFAULT_REGION_NAME, mock.Mock(), mock.Mock() - ).query('Committed') - regionone_committed_patch_ids = [ - patch_id for patch_id in regionone_committed_patches] - # Verify the update of regionone_committed_patch_ids attribute - self.assertEqual(pot.regionone_committed_patch_ids, - regionone_committed_patch_ids)