diff --git a/distributedcloud/dccommon/drivers/openstack/software_v1.py b/distributedcloud/dccommon/drivers/openstack/software_v1.py index 2e92d5f92..47b92b847 100644 --- a/distributedcloud/dccommon/drivers/openstack/software_v1.py +++ b/distributedcloud/dccommon/drivers/openstack/software_v1.py @@ -18,6 +18,7 @@ ABORTING = "aborting" AVAILABLE = "available" COMMITTED = "committed" DEPLOYED = "deployed" +DEPLOYING = "deploying" REMOVING = "removing" UNAVAILABLE = "unavailable" @@ -70,6 +71,12 @@ class SoftwareClient(base.DriverBase): response = requests.post(url, headers=self.headers, timeout=timeout) return self._handle_response(response, operation="Deploy precheck") + def deploy_delete(self, timeout=REST_DEFAULT_TIMEOUT): + """Deploy delete""" + url = self.endpoint + "/deploy" + response = requests.delete(url, headers=self.headers, timeout=timeout) + return self._handle_response(response, operation="Deploy delete") + def commit_patch(self, releases, timeout=REST_DEFAULT_TIMEOUT): """Commit patch""" release_str = "/".join(releases) @@ -86,5 +93,5 @@ class SoftwareClient(base.DriverBase): if isinstance(data, dict) and data.get("error"): message = f"{operation} failed with error: {data.get('error')}" LOG.error(message) - raise Exception(message) + raise exceptions.SoftwareDataException(endpoint=response.url, error=message) return data diff --git a/distributedcloud/dccommon/exceptions.py b/distributedcloud/dccommon/exceptions.py index af79a48a2..4f19720d6 100644 --- a/distributedcloud/dccommon/exceptions.py +++ b/distributedcloud/dccommon/exceptions.py @@ -121,6 +121,10 @@ class ApiException(DCCommonException): message = _("%(endpoint)s failed with status code: %(rc)d") +class SoftwareDataException(DCCommonException): + message = _("%(endpoint)s failed with data error: %(error)s") + + class SystemPeerNotFound(NotFound): message = _("System Peer %(system_peer)s not found") diff --git a/distributedcloud/dcmanager/common/exceptions.py b/distributedcloud/dcmanager/common/exceptions.py index d5e9fdab9..863ba0479 100644 --- a/distributedcloud/dcmanager/common/exceptions.py +++ b/distributedcloud/dcmanager/common/exceptions.py @@ -250,6 +250,26 @@ class PreCheckFailedException(DCManagerException): message = _("Subcloud %(subcloud)s upgrade precheck failed: %(details)s") +class SoftwarePreCheckFailedException(DCManagerException): + message = _("Subcloud %(subcloud)s software deploy precheck failed: %(details)s") + + +class SoftwareListFailedException(DCManagerException): + message = _("Subcloud %(subcloud)s software list failed: %(details)s") + + +class SoftwareDeleteFailedException(DCManagerException): + message = _("Subcloud %(subcloud)s sofware delete failed: %(details)s") + + +class SoftwareDeployCommitFailedException(DCManagerException): + message = _("Subcloud %(subcloud)s sofware deploy commit failed: %(details)s") + + +class SoftwareDeployDeleteFailedException(DCManagerException): + message = _("Subcloud %(subcloud)s sofware deploy delete failed: %(details)s") + + class PrestagePreCheckFailedException(DCManagerException): """PrestagePreCheckFailedException diff --git a/distributedcloud/dcmanager/orchestrator/states/creating_vim_strategy.py b/distributedcloud/dcmanager/orchestrator/states/creating_vim_strategy.py index 65b96c2c6..0ca458a1a 100644 --- a/distributedcloud/dcmanager/orchestrator/states/creating_vim_strategy.py +++ b/distributedcloud/dcmanager/orchestrator/states/creating_vim_strategy.py @@ -44,9 +44,11 @@ class CreatingVIMStrategyState(BaseState): extra_args = utils.get_sw_update_strategy_extra_args(self.context) release_id = extra_args.get(consts.EXTRA_ARGS_RELEASE_ID) opts_dict["release_id"] = release_id + # Create rollback = False since DC orchestration do not support rollback + opts_dict["rollback"] = False # Call the API to build the VIM strategy - # release will be sent as a **kwargs value for sw-deploy strategy + # release and rollback will be sent as a **kwargs value for sw-deploy strategy subcloud_strategy = self.get_vim_client(region).create_strategy( self.strategy_name, opts_dict['storage-apply-type'], @@ -54,7 +56,9 @@ class CreatingVIMStrategyState(BaseState): opts_dict['max-parallel-workers'], opts_dict['default-instance-action'], opts_dict['alarm-restriction-type'], - release=opts_dict.get('release_id'),) + release=opts_dict.get('release_id'), + rollback=opts_dict.get('rollback'), + ) # a successful API call to create MUST set the state be 'building' if subcloud_strategy.state != vim.STATE_BUILDING: diff --git a/distributedcloud/dcmanager/orchestrator/states/software/finish_strategy.py b/distributedcloud/dcmanager/orchestrator/states/software/finish_strategy.py index 58e509d38..f04470dc0 100644 --- a/distributedcloud/dcmanager/orchestrator/states/software/finish_strategy.py +++ b/distributedcloud/dcmanager/orchestrator/states/software/finish_strategy.py @@ -3,12 +3,15 @@ # # SPDX-License-Identifier: Apache-2.0 # + from dccommon.drivers.openstack import software_v1 from dcmanager.common import consts -from dcmanager.common.exceptions import StrategyStoppedException +from dcmanager.common import exceptions +from dcmanager.db import api as db_api from dcmanager.orchestrator.states.base import BaseState -from dcmanager.orchestrator.states.software.cache.cache_specifications import \ - REGION_ONE_RELEASE_USM_CACHE_TYPE +from dcmanager.orchestrator.states.software.cache.cache_specifications import ( + REGION_ONE_RELEASE_USM_CACHE_TYPE, +) class FinishStrategyState(BaseState): @@ -25,68 +28,129 @@ class FinishStrategyState(BaseState): self.info_log(strategy_step, "Finishing software strategy") - regionone_committed_releases = self._read_from_cache( - REGION_ONE_RELEASE_USM_CACHE_TYPE, - state=software_v1.COMMITTED + subcloud = db_api.subcloud_get(self.context, strategy_step.subcloud.id) + + regionone_deployed_releases = self._read_from_cache( + REGION_ONE_RELEASE_USM_CACHE_TYPE, state=software_v1.DEPLOYED ) self.debug_log( strategy_step, - f"regionone_committed_releases: {regionone_committed_releases}" + f"regionone_deployed_releases: {regionone_deployed_releases}", ) try: software_client = self.get_software_client(self.region_name) subcloud_releases = software_client.list() except Exception: - message = ("Cannot retrieve subcloud releases. Please see logs for " - "details.") + message = "Cannot retrieve subcloud releases. Please see logs for details." self.exception_log(strategy_step, message) - raise Exception(message) + raise exceptions.SoftwareListFailedException( + subcloud=subcloud.name, + details=message, + ) - self.debug_log(strategy_step, - f"Releases for subcloud: {subcloud_releases}") + self.debug_log(strategy_step, f"Releases for subcloud: {subcloud_releases}") - releases_to_commit = list() - releases_to_delete = list() - - # For this subcloud, determine which releases should be committed and - # which should be deleted. + # For this subcloud, determine which releases should be committed, + # which should be deleted and which should finish the deploy. releases_to_delete = [ - release["release_id"] for release in subcloud_releases + release["release_id"] + for release in subcloud_releases if release["state"] in (software_v1.AVAILABLE, software_v1.UNAVAILABLE) ] - releases_to_commit = [ - release["release_id"] for release in subcloud_releases - if release["state"] == software_v1.DEPLOYED - and any( - release["release_id"] == release_regionone["release_id"] - for release_regionone in regionone_committed_releases - ) + + # TODO(nicodemos): Update releases_to_commit and handle it after + # `software commit` is implemented + releases_to_commit = [] + + releases_to_deploy_delete = [ + release["release_id"] + for release in subcloud_releases + if release["state"] == software_v1.DEPLOYING ] if releases_to_delete: - self.info_log(strategy_step, f"Deleting releases {releases_to_delete}") - try: - software_client.delete(releases_to_delete) - except Exception: - message = ("Cannot delete releases from subcloud. Please see " - "logs for details.") - self.exception_log(strategy_step, message) - raise Exception(message) + self._handle_release_delete( + strategy_step, software_client, subcloud, releases_to_delete + ) if self.stopped(): - raise StrategyStoppedException() + raise exceptions.StrategyStoppedException() if releases_to_commit: - self.info_log(strategy_step, - f"Committing releases {releases_to_commit} to subcloud") - try: - software_client.commit_patch(releases_to_commit) - except Exception: - message = ("Cannot commit releases to subcloud. Please see logs for " - "details.") - self.exception_log(strategy_step, message) - raise Exception(message) + self._handle_deploy_commit( + strategy_step, software_client, subcloud, releases_to_commit + ) + + if releases_to_deploy_delete: + self._handle_deploy_delete( + strategy_step, + software_client, + subcloud, + releases_to_deploy_delete, + regionone_deployed_releases, + ) return self.next_state + + def _handle_release_delete( + self, strategy_step, software_client, subcloud, releases_to_delete + ): + self.info_log(strategy_step, f"Deleting releases {releases_to_delete}") + try: + software_client.delete(releases_to_delete) + except Exception: + message = ( + "Cannot delete releases from subcloud. Please see logs for details." + ) + self.exception_log(strategy_step, message) + raise exceptions.SoftwareDeleteFailedException( + subcloud=subcloud.name, + details=message, + ) + + def _handle_deploy_commit( + self, strategy_step, software_client, subcloud, releases_to_commit + ): + raise NotImplementedError() + + # If there are releases in deploying state and it's deployed in the regionone, + # they should be finished executing the deploy delete operation. + def _handle_deploy_delete( + self, + strategy_step, + software_client, + subcloud, + releases_to_deploy_delete, + regionone_deployed_releases, + ): + if not any( + release_id == release_regionone["release_id"] + for release_id in releases_to_deploy_delete + for release_regionone in regionone_deployed_releases + ): + message = ( + f"There is a deploying release on subcloud {subcloud.name} " + "that is not deployed in System Controller. Aborting." + ) + self.error_log(strategy_step, message) + raise exceptions.SoftwareDeployDeleteFailedException( + subcloud=subcloud.name, + details=message, + ) + self.info_log( + strategy_step, + f"Finishing releases {releases_to_deploy_delete} to subcloud", + ) + try: + software_client.deploy_delete() + except Exception: + message = ( + "Cannot finish deploy delete on subcloud. Please see logs for details." + ) + self.exception_log(strategy_step, message) + raise exceptions.SoftwareDeployDeleteFailedException( + subcloud=subcloud.name, + details=message, + ) diff --git a/distributedcloud/dcmanager/orchestrator/states/software/pre_check.py b/distributedcloud/dcmanager/orchestrator/states/software/pre_check.py index d6dcbb0df..0eb1578c3 100644 --- a/distributedcloud/dcmanager/orchestrator/states/software/pre_check.py +++ b/distributedcloud/dcmanager/orchestrator/states/software/pre_check.py @@ -58,7 +58,7 @@ class PreCheckState(BaseState): except Exception: details = f"Get current strategy failed on subcloud: {subcloud.name}" self.error_log(strategy_step, details) - raise exceptions.PreCheckFailedException( + raise exceptions.SoftwarePreCheckFailedException( subcloud=subcloud.name, details=details, ) @@ -78,7 +78,7 @@ class PreCheckState(BaseState): f"subcloud {subcloud.name}. Aborting." ) self.error_log(strategy_step, details) - raise exceptions.PreCheckFailedException( + raise exceptions.SoftwarePreCheckFailedException( subcloud=subcloud.name, details=details, ) @@ -102,7 +102,10 @@ class PreCheckState(BaseState): except Exception as exc: message = f"Cannot retrieve release list : {exc}." self.exception_log(strategy_step, message) - raise Exception(message) + raise exceptions.SoftwareListFailedException( + subcloud=subcloud.name, + details=message, + ) self._check_prestaged_data( strategy_step, @@ -129,7 +132,7 @@ class PreCheckState(BaseState): f"subcloud: {subcloud_name}" ) self.error_log(strategy_step, details) - raise exceptions.PreCheckFailedException( + raise exceptions.SoftwarePreCheckFailedException( subcloud=subcloud_name, details=details, ) @@ -140,7 +143,7 @@ class PreCheckState(BaseState): f"{strategy_state}. Aborting." ) self.error_log(strategy_step, details) - raise exceptions.PreCheckFailedException( + raise exceptions.SoftwarePreCheckFailedException( subcloud=subcloud_name, details=details, ) @@ -163,7 +166,7 @@ class PreCheckState(BaseState): if not (is_available_in_subcloud and regionone_deployed_release): details = f"Release {release_id} is not prestaged. Aborting." self.error_log(strategy_step, details) - raise exceptions.PreCheckFailedException( + raise exceptions.SoftwarePreCheckFailedException( subcloud=subcloud_name, details=details, ) diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/software/test_create_vim_software_strategy.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/software/test_create_vim_software_strategy.py index d8304c587..32736be2a 100644 --- a/distributedcloud/dcmanager/tests/unit/orchestrator/states/software/test_create_vim_software_strategy.py +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/software/test_create_vim_software_strategy.py @@ -10,8 +10,9 @@ from dccommon.drivers.openstack import vim from dcmanager.common import consts from dcmanager.tests.unit.common import fake_strategy from dcmanager.tests.unit.fakes import FakeVimStrategy -from dcmanager.tests.unit.orchestrator.states.software.test_base import \ - TestSoftwareOrchestrator +from dcmanager.tests.unit.orchestrator.states.software.test_base import ( + TestSoftwareOrchestrator, +) STRATEGY_BUILDING = FakeVimStrategy(state=vim.STATE_BUILDING) STRATEGY_DONE_BUILDING = FakeVimStrategy(state=vim.STATE_READY_TO_APPLY) @@ -66,6 +67,7 @@ class TestCreateVIMSoftwareStrategyState(TestSoftwareOrchestrator): "migrate", "relaxed", release=RELEASE_ID, + rollback=False, ) # On success, the state should transition to the next state diff --git a/distributedcloud/dcmanager/tests/unit/orchestrator/states/software/test_finish_strategy.py b/distributedcloud/dcmanager/tests/unit/orchestrator/states/software/test_finish_strategy.py index 37ec47b3f..6976168fd 100644 --- a/distributedcloud/dcmanager/tests/unit/orchestrator/states/software/test_finish_strategy.py +++ b/distributedcloud/dcmanager/tests/unit/orchestrator/states/software/test_finish_strategy.py @@ -17,22 +17,22 @@ from dcmanager.tests.unit.orchestrator.states.software.test_base import \ REGION_ONE_RELEASES = [ { "release_id": "starlingx-9.0.0", - "state": "committed", + "state": "deployed", "sw_version": "9.0.0", }, { "release_id": "starlingx-9.0.1", - "state": "committed", + "state": "deployed", "sw_version": "9.0.1", }, { "release_id": "starlingx-9.0.2", - "state": "committed", + "state": "deployed", "sw_version": "9.0.2", }, { "release_id": "starlingx-9.0.3", - "state": "committed", + "state": "deployed", "sw_version": "9.0.3", }, ] @@ -40,12 +40,12 @@ REGION_ONE_RELEASES = [ SUBCLOUD_RELEASES = [ { "release_id": "starlingx-9.0.0", - "state": "committed", + "state": "deployed", "sw_version": "9.0.0", }, { "release_id": "starlingx-9.0.1", - "state": "committed", + "state": "deployed", "sw_version": "9.0.1", }, { @@ -53,16 +53,16 @@ SUBCLOUD_RELEASES = [ "state": "deployed", "sw_version": "9.0.2", }, + { + "release_id": "starlingx-9.0.2", + "state": "deploying", + "sw_version": "9.0.3", + }, { "release_id": "starlingx-9.0.4", "state": "available", "sw_version": "9.0.4", }, - { - "release_id": "starlingx-9.0.5", - "state": "deployed", - "sw_version": "9.0.5", - }, ] @@ -86,6 +86,7 @@ class TestFinishStrategyState(TestSoftwareOrchestrator): self.software_client.list = mock.MagicMock() self.software_client.delete = mock.MagicMock() self.software_client.commit_patch = mock.MagicMock() + self.software_client.deploy_delete = mock.MagicMock() self._read_from_cache = mock.MagicMock() def test_finish_strategy_success(self): @@ -101,9 +102,8 @@ class TestFinishStrategyState(TestSoftwareOrchestrator): call_args, _ = self.software_client.delete.call_args_list[0] self.assertItemsEqual(["starlingx-9.0.4"], call_args[0]) - self.software_client.commit_patch.assert_called_once_with( - ["starlingx-9.0.2"] - ) + self.software_client.commit_patch.assert_not_called() + self.software_client.deploy_delete.assert_called_once() # On success, the state should transition to the next state self.assert_step_updated(self.strategy_step.subcloud_id, @@ -169,8 +169,8 @@ class TestFinishStrategyState(TestSoftwareOrchestrator): self.strategy_step.subcloud_id, consts.STRATEGY_STATE_FAILED ) - def test_finish_strategy_fails_when_commit_patch_exception(self): - """Test finish strategy fails when software client commit_patch + def test_finish_strategy_fails_when_deploy_delete_exception(self): + """Test finish strategy fails when software client deploy_delete raises exception """ @@ -178,7 +178,7 @@ class TestFinishStrategyState(TestSoftwareOrchestrator): self.mock_read_from_cache.return_value = REGION_ONE_RELEASES self.software_client.list.side_effect = [SUBCLOUD_RELEASES] - self.software_client.commit_patch.side_effect = Exception() + self.software_client.deploy_delete.side_effect = Exception() self.worker.perform_state_action(self.strategy_step)