diff --git a/distributedcloud/dcmanager/__init__.py b/distributedcloud/dcmanager/__init__.py index d0f82b54b..4d604bdbd 100644 --- a/distributedcloud/dcmanager/__init__.py +++ b/distributedcloud/dcmanager/__init__.py @@ -1,5 +1,4 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2017, 2019, 2021 Wind River Systems, Inc. +# Copyright (c) 2017, 2019, 2021, 2024 Wind River Systems, Inc. # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at @@ -16,4 +15,4 @@ import pbr.version -__version__ = pbr.version.VersionInfo('distributedcloud').version_string() +__version__ = pbr.version.VersionInfo("distributedcloud").version_string() diff --git a/distributedcloud/dcmanager/cmd/api.py b/distributedcloud/dcmanager/cmd/api.py index 5b203d999..336b6f6b4 100644 --- a/distributedcloud/dcmanager/cmd/api.py +++ b/distributedcloud/dcmanager/cmd/api.py @@ -23,6 +23,7 @@ import logging as std_logging import sys import eventlet + eventlet.monkey_patch(os=False) # pylint: disable=wrong-import-position @@ -36,11 +37,12 @@ from dcmanager.api import app # noqa: E402 from dcmanager.common import config # noqa: E402 from dcmanager.common import messaging # noqa: E402 from dcorch.common import messaging as dcorch_messaging # noqa: E402 + # pylint: enable=wrong-import-position CONF = cfg.CONF config.register_options() -LOG = logging.getLogger('dcmanager.api') +LOG = logging.getLogger("dcmanager.api") def main(): @@ -56,8 +58,10 @@ def main(): LOG.warning("Wrong worker number, worker = %(workers)s", workers) workers = 1 - LOG.info("Server on http://%(host)s:%(port)s with %(workers)s", - {'host': host, 'port': port, 'workers': workers}) + LOG.info( + "Server on http://%(host)s:%(port)s with %(workers)s", + {"host": host, "port": port, "workers": workers}, + ) messaging.setup() dcorch_messaging.setup() systemd.notify_once() @@ -72,5 +76,5 @@ def main(): app.wait() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/distributedcloud/dcmanager/cmd/audit.py b/distributedcloud/dcmanager/cmd/audit.py index 1d863aef9..ff100c722 100644 --- a/distributedcloud/dcmanager/cmd/audit.py +++ b/distributedcloud/dcmanager/cmd/audit.py @@ -19,6 +19,7 @@ DC Manager Audit Service. """ import eventlet + eventlet.monkey_patch() # pylint: disable=wrong-import-position @@ -29,28 +30,28 @@ from oslo_service import service # noqa: E402 from dcmanager.common import config # noqa: E402 from dcmanager.common import messaging # noqa: E402 + # pylint: enable=wrong-import-position _lazy.enable_lazy() config.register_options() config.register_keystone_options() -LOG = logging.getLogger('dcmanager.audit') +LOG = logging.getLogger("dcmanager.audit") CONF = cfg.CONF def main(): logging.register_options(CONF) - CONF(project='dcmanager', prog='dcmanager-audit') - logging.setup(cfg.CONF, 'dcmanager-audit') + CONF(project="dcmanager", prog="dcmanager-audit") + logging.setup(cfg.CONF, "dcmanager-audit") logging.set_defaults() messaging.setup() from dcmanager.audit import service as audit srv = audit.DCManagerAuditService() - launcher = service.launch(cfg.CONF, - srv, workers=CONF.audit_workers) + launcher = service.launch(cfg.CONF, srv, workers=CONF.audit_workers) LOG.info("Starting...") LOG.debug("Configuration:") @@ -59,5 +60,5 @@ def main(): launcher.wait() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/distributedcloud/dcmanager/cmd/audit_worker.py b/distributedcloud/dcmanager/cmd/audit_worker.py index 01c24a287..8f2fb5d8d 100644 --- a/distributedcloud/dcmanager/cmd/audit_worker.py +++ b/distributedcloud/dcmanager/cmd/audit_worker.py @@ -19,6 +19,7 @@ DC Manager Audit Worker Service. """ import eventlet + eventlet.monkey_patch() # pylint: disable=wrong-import-position @@ -29,28 +30,28 @@ from oslo_service import service # noqa: E402 from dcmanager.common import config # noqa: E402 from dcmanager.common import messaging # noqa: E402 + # pylint: enable=wrong-import-position _lazy.enable_lazy() config.register_options() config.register_keystone_options() -LOG = logging.getLogger('dcmanager.audit-worker') +LOG = logging.getLogger("dcmanager.audit-worker") CONF = cfg.CONF def main(): logging.register_options(CONF) - CONF(project='dcmanager', prog='dcmanager-audit-worker') - logging.setup(cfg.CONF, 'dcmanager-audit-worker') + CONF(project="dcmanager", prog="dcmanager-audit-worker") + logging.setup(cfg.CONF, "dcmanager-audit-worker") logging.set_defaults() messaging.setup() from dcmanager.audit import service as audit srv = audit.DCManagerAuditWorkerService() - launcher = service.launch(cfg.CONF, - srv, workers=CONF.audit_worker_workers) + launcher = service.launch(cfg.CONF, srv, workers=CONF.audit_worker_workers) LOG.info("Starting...") LOG.debug("Configuration:") @@ -59,5 +60,5 @@ def main(): launcher.wait() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/distributedcloud/dcmanager/cmd/manage.py b/distributedcloud/dcmanager/cmd/manage.py index 605e32da7..885afa7bf 100644 --- a/distributedcloud/dcmanager/cmd/manage.py +++ b/distributedcloud/dcmanager/cmd/manage.py @@ -45,32 +45,37 @@ def do_db_sync(): def add_command_parsers(subparsers): - parser = subparsers.add_parser('db_version') + parser = subparsers.add_parser("db_version") parser.set_defaults(func=do_db_version) - parser = subparsers.add_parser('db_sync') + parser = subparsers.add_parser("db_sync") parser.set_defaults(func=do_db_sync) - parser.add_argument('version', nargs='?') - parser.add_argument('current_version', nargs='?') + parser.add_argument("version", nargs="?") + parser.add_argument("current_version", nargs="?") -command_opt = cfg.SubCommandOpt('command', - title='Commands', - help='Show available commands.', - handler=add_command_parsers) +command_opt = cfg.SubCommandOpt( + "command", + title="Commands", + help="Show available commands.", + handler=add_command_parsers, +) def main(): logging.register_options(CONF) - logging.setup(CONF, 'dcmanager-manage') + logging.setup(CONF, "dcmanager-manage") CONF.register_cli_opt(command_opt) try: - default_config_files = cfg.find_config_files('dcmanager', - 'dcmanager-engine') - CONF(sys.argv[1:], project='dcmanager', prog='dcmanager-manage', - version=version.version_info.version_string(), - default_config_files=default_config_files) + default_config_files = cfg.find_config_files("dcmanager", "dcmanager-engine") + CONF( + sys.argv[1:], + project="dcmanager", + prog="dcmanager-manage", + version=version.version_info.version_string(), + default_config_files=default_config_files, + ) except RuntimeError as e: sys.exit("ERROR: %s" % e) @@ -80,5 +85,5 @@ def main(): sys.exit("ERROR: %s" % e) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/distributedcloud/dcmanager/cmd/manager.py b/distributedcloud/dcmanager/cmd/manager.py index 4df473d29..17f3807fa 100644 --- a/distributedcloud/dcmanager/cmd/manager.py +++ b/distributedcloud/dcmanager/cmd/manager.py @@ -33,28 +33,27 @@ from dcmanager.common import config # noqa: E402 from dcmanager.common import consts # noqa: E402 from dcmanager.common import messaging # noqa: E402 from dcorch.common import messaging as dcorch_messaging # noqa: E402 + # pylint: enable=wrong-import-position _lazy.enable_lazy() config.register_options() config.register_keystone_options() -LOG = logging.getLogger('dcmanager.engine') +LOG = logging.getLogger("dcmanager.engine") def main(): logging.register_options(cfg.CONF) - cfg.CONF(project='dcmanager', prog='dcmanager-engine') - logging.setup(cfg.CONF, 'dcmanager-engine') + cfg.CONF(project="dcmanager", prog="dcmanager-engine") + logging.setup(cfg.CONF, "dcmanager-engine") logging.set_defaults() messaging.setup() dcorch_messaging.setup() from dcmanager.manager import service as manager - srv = manager.DCManagerService(cfg.CONF.host, - consts.TOPIC_DC_MANAGER) - launcher = service.launch(cfg.CONF, - srv, workers=cfg.CONF.workers) + srv = manager.DCManagerService(cfg.CONF.host, consts.TOPIC_DC_MANAGER) + launcher = service.launch(cfg.CONF, srv, workers=cfg.CONF.workers) LOG.info("Starting...") LOG.debug("Configuration:") @@ -65,5 +64,5 @@ def main(): launcher.wait() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/distributedcloud/dcmanager/cmd/orchestrator.py b/distributedcloud/dcmanager/cmd/orchestrator.py index f17640dcd..d4ca0ad3b 100644 --- a/distributedcloud/dcmanager/cmd/orchestrator.py +++ b/distributedcloud/dcmanager/cmd/orchestrator.py @@ -19,6 +19,7 @@ DC Manager Orchestrator Service. """ import eventlet + eventlet.monkey_patch() # pylint: disable=wrong-import-position @@ -29,10 +30,11 @@ from oslo_service import service # noqa: E402 from dcmanager.common import config # noqa: E402 from dcmanager.common import messaging # noqa: E402 + # pylint: enable=wrong-import-position CONF = cfg.CONF -LOG = logging.getLogger('dcmanager.orchestrator') +LOG = logging.getLogger("dcmanager.orchestrator") def main(): @@ -40,16 +42,15 @@ def main(): config.register_options() config.register_keystone_options() logging.register_options(CONF) - CONF(project='dcmanager', prog='dcmanager-orchestrator') - logging.setup(CONF, 'dcmanager-orchestrator') + CONF(project="dcmanager", prog="dcmanager-orchestrator") + logging.setup(CONF, "dcmanager-orchestrator") logging.set_defaults() messaging.setup() from dcmanager.orchestrator import service as orchestrator srv = orchestrator.DCManagerOrchestratorService() - launcher = service.launch(CONF, - srv, workers=cfg.CONF.orch_workers) + launcher = service.launch(CONF, srv, workers=cfg.CONF.orch_workers) LOG.info("Starting...") LOG.debug("Configuration:") @@ -58,5 +59,5 @@ def main(): launcher.wait() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/distributedcloud/dcmanager/cmd/state.py b/distributedcloud/dcmanager/cmd/state.py index f8d3cac91..30bc8f52c 100644 --- a/distributedcloud/dcmanager/cmd/state.py +++ b/distributedcloud/dcmanager/cmd/state.py @@ -24,6 +24,7 @@ DC Manager State Engine Server. """ import eventlet + eventlet.monkey_patch() # pylint: disable=wrong-import-position @@ -35,18 +36,19 @@ from oslo_service import service # noqa: E402 from dcmanager.common import config # noqa: E402 from dcmanager.common import messaging # noqa: E402 from dcorch.common import messaging as dcorch_messaging # noqa: E402 + # pylint: enable=wrong-import-position _lazy.enable_lazy() config.register_options() config.register_keystone_options() -LOG = logging.getLogger('dcmanager.state') +LOG = logging.getLogger("dcmanager.state") def main(): logging.register_options(cfg.CONF) - cfg.CONF(project='dcmanager', prog='dcmanager-state') - logging.setup(cfg.CONF, 'dcmanager-state') + cfg.CONF(project="dcmanager", prog="dcmanager-state") + logging.setup(cfg.CONF, "dcmanager-state") logging.set_defaults() messaging.setup() dcorch_messaging.setup() @@ -55,18 +57,21 @@ def main(): # Override values from /etc/dcmanager/dcmanager.conf specific # to dcmanager-state: - cfg.CONF.set_override('max_pool_size', 10, group='database') - cfg.CONF.set_override('max_overflow', 100, group='database') + cfg.CONF.set_override("max_pool_size", 10, group="database") + cfg.CONF.set_override("max_overflow", 100, group="database") LOG.info("Starting...") LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, logging.DEBUG) - LOG.info("Launching service, host=%s, state_workers=%s ...", - cfg.CONF.host, cfg.CONF.state_workers) + LOG.info( + "Launching service, host=%s, state_workers=%s ...", + cfg.CONF.host, + cfg.CONF.state_workers, + ) srv = state.DCManagerStateService(cfg.CONF.host) launcher = service.launch(cfg.CONF, srv, workers=cfg.CONF.state_workers) launcher.wait() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/distributedcloud/dcmanager/common/exceptions.py b/distributedcloud/dcmanager/common/exceptions.py index 129608136..c64b94f98 100644 --- a/distributedcloud/dcmanager/common/exceptions.py +++ b/distributedcloud/dcmanager/common/exceptions.py @@ -183,8 +183,7 @@ class PeerGroupAssociationCombinationNotFound(NotFound): class PeerGroupAssociationTargetNotMatch(NotFound): message = _( - "Peer Group Association with peer site controller " - "UUID %(uuid)s doesn't match." + "Peer Group Association with peer site controller UUID %(uuid)s doesn't match." ) @@ -237,8 +236,7 @@ class CertificateUploadError(DCManagerException): class LicenseInstallError(DCManagerException): message = _( - "Error while installing license on subcloud: " - "%(subcloud_id)s. %(error_message)s" + "Error while installing license on subcloud: %(subcloud_id)s. %(error_message)s" ) diff --git a/distributedcloud/dcmanager/common/phased_subcloud_deploy.py b/distributedcloud/dcmanager/common/phased_subcloud_deploy.py index 6e190ca48..40c34bcd9 100644 --- a/distributedcloud/dcmanager/common/phased_subcloud_deploy.py +++ b/distributedcloud/dcmanager/common/phased_subcloud_deploy.py @@ -623,10 +623,7 @@ def validate_install_values(payload, ip_version=None, subcloud=None): # the expected value is less than the default. so throw an error. pecan.abort( 400, - _( - "persistent_size of %s MB is less than " - "the permitted minimum %s MB " - ) + _("persistent_size of %s MB is less than the permitted minimum %s MB") % (str(persistent_size), consts.DEFAULT_PERSISTENT_SIZE), ) diff --git a/distributedcloud/dcmanager/manager/peer_group_audit_manager.py b/distributedcloud/dcmanager/manager/peer_group_audit_manager.py index 5e15def99..c312218f8 100644 --- a/distributedcloud/dcmanager/manager/peer_group_audit_manager.py +++ b/distributedcloud/dcmanager/manager/peer_group_audit_manager.py @@ -6,12 +6,10 @@ import threading -from oslo_config import cfg -from oslo_log import log as logging - - from fm_api import constants as fm_const from fm_api import fm_api +from oslo_config import cfg +from oslo_log import log as logging from dccommon import consts as dccommon_consts from dcmanager.common import consts @@ -22,7 +20,6 @@ from dcmanager.common import utils from dcmanager.db import api as db_api from dcmanager.manager.system_peer_manager import SystemPeerManager - CONF = cfg.CONF LOG = logging.getLogger(__name__) @@ -31,9 +28,8 @@ class PeerGroupAuditManager(manager.Manager): """Manages audit related tasks.""" def __init__(self, subcloud_manager, peer_group_id, *args, **kwargs): - LOG.debug(_('PeerGroupAuditManager initialization...')) - super().__init__(service_name="peer_group_audit_manager", - *args, **kwargs) + LOG.debug(_("PeerGroupAuditManager initialization...")) + super().__init__(service_name="peer_group_audit_manager", *args, **kwargs) self.context = context.get_admin_context() self.fm_api = fm_api.FaultAPIs() self.subcloud_manager = subcloud_manager @@ -42,118 +38,121 @@ class PeerGroupAuditManager(manager.Manager): self.thread = None self.thread_lock = threading.Lock() - def _get_subclouds_by_peer_group_from_system_peer(self, - dc_client, - system_peer, - peer_group_name): + def _get_subclouds_by_peer_group_from_system_peer( + self, dc_client, system_peer, peer_group_name + ): try: - subclouds = dc_client.get_subcloud_list_by_peer_group( - peer_group_name) + subclouds = dc_client.get_subcloud_list_by_peer_group(peer_group_name) return subclouds except Exception: - LOG.exception(f"Failed to get subclouds of peer group " - f"{peer_group_name} from DC: " - f"{system_peer.peer_name}") + LOG.exception( + f"Failed to get subclouds of peer group {peer_group_name} " + f"from DC: {system_peer.peer_name}" + ) @staticmethod - def _get_association_sync_status_from_peer_site(dc_client, - system_peer, - peer_group_id): + def _get_association_sync_status_from_peer_site( + dc_client, system_peer, peer_group_id + ): try: # Get peer site system peer dc_peer_system_peer = dc_client.get_system_peer( - utils.get_local_system().uuid) + utils.get_local_system().uuid + ) association = dc_client.get_peer_group_association_with_peer_id_and_pg_id( dc_peer_system_peer.get("id"), peer_group_id ) return association.get("sync-status") except Exception: - LOG.exception(f"Failed to get subclouds of peer group " - f"{peer_group_id} from DC: {system_peer.peer_name}") + LOG.exception( + f"Failed to get subclouds of peer group {peer_group_id} " + f"from DC: {system_peer.peer_name}" + ) - def _update_remote_peer_group_migration_status(self, - system_peer, - peer_group_name, - migration_status): + def _update_remote_peer_group_migration_status( + self, system_peer, peer_group_name, migration_status + ): dc_client = SystemPeerManager.get_peer_dc_client(system_peer) - peer_group_kwargs = { - 'migration_status': migration_status - } - dc_client.update_subcloud_peer_group(peer_group_name, - **peer_group_kwargs) - LOG.info(f"Updated Subcloud Peer Group {peer_group_name} on " - f"peer site {system_peer.peer_name}, set migration_status " - f"to: {migration_status}") + peer_group_kwargs = {"migration_status": migration_status} + dc_client.update_subcloud_peer_group(peer_group_name, **peer_group_kwargs) + LOG.info( + f"Updated Subcloud Peer Group {peer_group_name} on peer site " + f"{system_peer.peer_name}, set migration_status to: {migration_status}" + ) - def _get_local_subclouds_to_update_and_delete(self, - local_peer_group, - remote_subclouds, - remote_sync_status): + def _get_local_subclouds_to_update_and_delete( + self, local_peer_group, remote_subclouds, remote_sync_status + ): local_subclouds_to_update = list() local_subclouds_to_delete = list() any_rehome_failed = False - remote_subclouds_dict = {remote_subcloud.get('region-name'): - remote_subcloud for remote_subcloud - in remote_subclouds} + remote_subclouds_dict = { + remote_subcloud.get("region-name"): remote_subcloud + for remote_subcloud in remote_subclouds + } local_subclouds = db_api.subcloud_get_for_peer_group( - self.context, local_peer_group.id) + self.context, local_peer_group.id + ) for local_subcloud in local_subclouds: - remote_subcloud = remote_subclouds_dict.get( - local_subcloud.region_name) + remote_subcloud = remote_subclouds_dict.get(local_subcloud.region_name) if remote_subcloud: # Check if the remote subcloud meets the conditions for update # if it is 'managed' and the local subcloud is not # in 'secondary' status - if (remote_subcloud.get('management-state') == - dccommon_consts.MANAGEMENT_MANAGED and - not utils.subcloud_is_secondary_state( - local_subcloud.deploy_status)): + MANAGED = dccommon_consts.MANAGEMENT_MANAGED + if remote_subcloud.get( + "management-state" + ) == MANAGED and not utils.subcloud_is_secondary_state( + local_subcloud.deploy_status + ): local_subclouds_to_update.append(local_subcloud) # Sync rehome_data from remote to local subcloud if the remote # PGA sync_status is out-of-sync once migration completes, # indicating any bootstrap values/address updates to # the subcloud on the remote site. - if remote_sync_status == \ - consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC: + if remote_sync_status == consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC: self._sync_rehome_data( - local_subcloud.id, remote_subcloud.get('rehome_data')) - elif remote_subcloud.get('deploy-status') in \ - (consts.DEPLOY_STATE_REHOME_FAILED, - consts.DEPLOY_STATE_REHOME_PREP_FAILED): + local_subcloud.id, remote_subcloud.get("rehome_data") + ) + elif remote_subcloud.get("deploy-status") in ( + consts.DEPLOY_STATE_REHOME_FAILED, + consts.DEPLOY_STATE_REHOME_PREP_FAILED, + ): # Set local subcloud to rehome-failed if the remote is # rehome-failed or rehome-prep-failed, otherwise, the # deploy_status will remain rehome-pending, which will # block the correction of the bootstrap values/address. db_api.subcloud_update( - self.context, local_subcloud.id, - deploy_status=consts.DEPLOY_STATE_REHOME_FAILED) + self.context, + local_subcloud.id, + deploy_status=consts.DEPLOY_STATE_REHOME_FAILED, + ) any_rehome_failed = True else: local_subclouds_to_delete.append(local_subcloud) - return local_subclouds_to_update, local_subclouds_to_delete, \ - any_rehome_failed + return local_subclouds_to_update, local_subclouds_to_delete, any_rehome_failed def _set_local_subcloud_to_secondary(self, subcloud): try: LOG.info("Set local subcloud %s to secondary" % subcloud.name) # There will be an exception when unmanage # a subcloud in 'unamaged' state. - if subcloud.management_state != \ - dccommon_consts.MANAGEMENT_UNMANAGED: + if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED: self.subcloud_manager.update_subcloud( self.context, subcloud.id, - management_state=dccommon_consts. - MANAGEMENT_UNMANAGED) + management_state=dccommon_consts.MANAGEMENT_UNMANAGED, + ) self.subcloud_manager.update_subcloud( - self.context, - subcloud.id, - deploy_status=consts.DEPLOY_STATE_SECONDARY) + self.context, subcloud.id, deploy_status=consts.DEPLOY_STATE_SECONDARY + ) except Exception as e: - LOG.exception(f"Failed to update local non-secondary " - f"and offline subcloud [{subcloud.name}], err: {e}") + LOG.exception( + "Failed to update local non-secondary and offline subcloud " + f"[{subcloud.name}], err: {e}" + ) raise e def _sync_rehome_data(self, subcloud_id, rehome_data): @@ -164,86 +163,99 @@ class PeerGroupAuditManager(manager.Manager): LOG.info("Local peer group in migrating state, quit audit") return - LOG.info("Auditing remote subcloud peer group:[%s] " - "migration_status:[%s] group_priority[%s], " - "local subcloud peer group:[%s] " - "migration_status:[%s] group_priority[%s]" % - (remote_peer_group.get("peer_group_name"), - remote_peer_group.get("migration_status"), - remote_peer_group.get("group_priority"), - local_peer_group.peer_group_name, - local_peer_group.migration_status, - local_peer_group.group_priority)) + LOG.info( + "Auditing remote subcloud peer group:[%s] migration_status:[%s] " + "group_priority[%s], local subcloud peer group:[%s] " + "migration_status:[%s] group_priority[%s]" + % ( + remote_peer_group.get("peer_group_name"), + remote_peer_group.get("migration_status"), + remote_peer_group.get("group_priority"), + local_peer_group.peer_group_name, + local_peer_group.migration_status, + local_peer_group.group_priority, + ) + ) # if remote subcloud peer group's migration_status is 'migrating', # 'unmanaged' all local subclouds in local peer group and change its # deploy status to consts.DEPLOY_STATE_REHOME_PENDING to stop cert-mon # audits. - if remote_peer_group.get("migration_status") == \ - consts.PEER_GROUP_MIGRATING: + if remote_peer_group.get("migration_status") == consts.PEER_GROUP_MIGRATING: # Unmanaged all local subclouds of peer group - LOG.info(f"Unmanaged all local subclouds of peer group " - f"{local_peer_group.peer_group_name} " - f"since remote is in migrating state") - subclouds = db_api.subcloud_get_for_peer_group(self.context, - local_peer_group.id) + LOG.info( + "Unmanaged all local subclouds of peer group " + f"{local_peer_group.peer_group_name} since remote is in migrating state" + ) + subclouds = db_api.subcloud_get_for_peer_group( + self.context, local_peer_group.id + ) for subcloud in subclouds: try: # update_subcloud raises an exception when trying to umanage # an already unmanaged subcloud, so the deploy status # update must be done separately - if subcloud.management_state != \ - dccommon_consts.MANAGEMENT_UNMANAGED: + if ( + subcloud.management_state + != dccommon_consts.MANAGEMENT_UNMANAGED + ): # Unmanage and update the deploy-status - LOG.info("Unmanaging and setting the local subcloud " - f"{subcloud.name} deploy status to " - f"{consts.DEPLOY_STATE_REHOME_PENDING}") + LOG.info( + "Unmanaging and setting the local subcloud " + f"{subcloud.name} deploy status to " + f"{consts.DEPLOY_STATE_REHOME_PENDING}" + ) self.subcloud_manager.update_subcloud( self.context, subcloud.id, - management_state=dccommon_consts. - MANAGEMENT_UNMANAGED, - deploy_status=consts.DEPLOY_STATE_REHOME_PENDING) + management_state=dccommon_consts.MANAGEMENT_UNMANAGED, + deploy_status=consts.DEPLOY_STATE_REHOME_PENDING, + ) else: # Already unmanaged, just update the deploy-status - LOG.info(f"Setting the local subcloud {subcloud.name} " - "deploy status to " - f"{consts.DEPLOY_STATE_REHOME_PENDING}") + LOG.info( + f"Setting the local subcloud {subcloud.name} " + f"deploy status to {consts.DEPLOY_STATE_REHOME_PENDING}" + ) self.subcloud_manager.update_subcloud( self.context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_REHOME_PENDING) + deploy_status=consts.DEPLOY_STATE_REHOME_PENDING, + ) except Exception as e: - LOG.exception(f"Fail to unmanage local subcloud " - f"{subcloud.name}, err: {e}") + LOG.exception( + f"Fail to unmanage local subcloud {subcloud.name}, err: {e}" + ) raise e SystemPeerManager.update_sync_status( - self.context, system_peer, + self.context, + system_peer, consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC, - local_peer_group, remote_peer_group) + local_peer_group, + remote_peer_group, + ) self.require_audit_flag = False # if remote subcloud peer group's migration_status is 'complete', # get remote subclouds. For 'managed+online' subclouds, # set 'unmanaged+secondary' to local on same subclouds - elif remote_peer_group.get("migration_status") == \ - consts.PEER_GROUP_MIGRATION_COMPLETE: + elif ( + remote_peer_group.get("migration_status") + == consts.PEER_GROUP_MIGRATION_COMPLETE + ): dc_client = SystemPeerManager.get_peer_dc_client(system_peer) - remote_subclouds = \ - self._get_subclouds_by_peer_group_from_system_peer( - dc_client, - system_peer, - remote_peer_group.get("peer_group_name")) - remote_sync_status = \ - self._get_association_sync_status_from_peer_site( - dc_client, - system_peer, - remote_peer_group.get("id")) + remote_subclouds = self._get_subclouds_by_peer_group_from_system_peer( + dc_client, system_peer, remote_peer_group.get("peer_group_name") + ) + remote_sync_status = self._get_association_sync_status_from_peer_site( + dc_client, system_peer, remote_peer_group.get("id") + ) - local_subclouds_to_update, local_subclouds_to_delete, \ - any_rehome_failed = \ + local_subclouds_to_update, local_subclouds_to_delete, any_rehome_failed = ( self._get_local_subclouds_to_update_and_delete( - local_peer_group, remote_subclouds, remote_sync_status) + local_peer_group, remote_subclouds, remote_sync_status + ) + ) for subcloud in local_subclouds_to_update: self._set_local_subcloud_to_secondary(subcloud) @@ -253,85 +265,90 @@ class PeerGroupAuditManager(manager.Manager): for subcloud in local_subclouds_to_delete: self._set_local_subcloud_to_secondary(subcloud) try: - self.subcloud_manager.delete_subcloud( - self.context, subcloud.id) + self.subcloud_manager.delete_subcloud(self.context, subcloud.id) LOG.info(f"Deleted local subcloud {subcloud.name}") except Exception as e: SystemPeerManager.update_sync_status( - self.context, system_peer, + self.context, + system_peer, consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC, - local_peer_group, remote_peer_group) - LOG.exception(f"Failed to delete local subcloud " - f"[{subcloud.name}] that does not exist " - f"under the same subcloud_peer_group on " - f"peer site, err: {e}") + local_peer_group, + remote_peer_group, + ) + LOG.exception( + f"Failed to delete local subcloud [{subcloud.name}] that does " + "not exist under the same subcloud_peer_group on peer site, " + f"err: {e}" + ) raise e if remote_peer_group.get("system_leader_id") == system_peer.peer_uuid: - self._clear_or_raise_alarm(system_peer, - local_peer_group, - remote_peer_group) + self._clear_or_raise_alarm( + system_peer, local_peer_group, remote_peer_group + ) db_api.subcloud_peer_group_update( self.context, local_peer_group.id, system_leader_id=system_peer.peer_uuid, - system_leader_name=system_peer.peer_name) + system_leader_name=system_peer.peer_name, + ) self._update_remote_peer_group_migration_status( - system_peer, - remote_peer_group.get("peer_group_name"), - None) + system_peer, remote_peer_group.get("peer_group_name"), None + ) - if not (remote_sync_status == consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC - and any_rehome_failed): + if not ( + remote_sync_status == consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC + and any_rehome_failed + ): SystemPeerManager.update_sync_status( - self.context, system_peer, + self.context, + system_peer, consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, - local_peer_group, remote_peer_group) + local_peer_group, + remote_peer_group, + ) self.require_audit_flag = False else: # If remote peer group migration_status is 'None' self.require_audit_flag = False - def _clear_or_raise_alarm(self, - system_peer, - local_peer_group, - remote_peer_group): + def _clear_or_raise_alarm(self, system_peer, local_peer_group, remote_peer_group): # If local subcloud peer group's group_priority is # lower than remote subcloud peer group's group_priority, # an alarm will be raised. # lower number means higher priority - entity_instance_id = "peer_group=%s,peer=%s" % \ - (local_peer_group.peer_group_name, system_peer.peer_uuid) - if local_peer_group.group_priority < remote_peer_group.get('group_priority'): - LOG.warning("Alarm: local subcloud peer group [" - f"{local_peer_group.peer_group_name}] " - f"is managed by remote system [" - f"{system_peer.peer_name}]") + entity_instance_id = "peer_group=%s,peer=%s" % ( + local_peer_group.peer_group_name, + system_peer.peer_uuid, + ) + if local_peer_group.group_priority < remote_peer_group.get("group_priority"): + LOG.warning( + f"Alarm: local subcloud peer group [{local_peer_group.peer_group_name}]" + f" is managed by remote system [{system_peer.peer_name}]" + ) try: fault = fm_api.Fault( - alarm_id=fm_const. - FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED, + alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED, alarm_state=fm_const.FM_ALARM_STATE_SET, - entity_type_id=fm_const. - FM_ENTITY_TYPE_SUBCLOUD_PEER_GROUP, + entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD_PEER_GROUP, entity_instance_id=entity_instance_id, severity=fm_const.FM_ALARM_SEVERITY_MAJOR, - reason_text=("Subcloud peer group " - "(peer_group_name=%s) " - "is managed by remote " - "system (peer_uuid=%s) " - "with a lower priority." % - (local_peer_group.peer_group_name, - system_peer.peer_uuid)), + reason_text=( + "Subcloud peer group (peer_group_name=%s) is managed by " + "remote system (peer_uuid=%s) with a lower priority." + % (local_peer_group.peer_group_name, system_peer.peer_uuid) + ), alarm_type=fm_const.FM_ALARM_TYPE_0, - probable_cause=fm_const. - ALARM_PROBABLE_CAUSE_UNKNOWN, - proposed_repair_action="Check the reported peer group " - "state. Migrate it back to the current system if the " - "state is 'rehomed' and the current system is stable. " - "Otherwise, wait until these conditions are met.", - service_affecting=False) + probable_cause=fm_const.ALARM_PROBABLE_CAUSE_UNKNOWN, + proposed_repair_action=( + "Check the reported peer group state. Migrate it back to the " + "current system if the state is 'rehomed' and the current " + "system is stable. Otherwise, wait until these conditions " + "are met." + ), + service_affecting=False, + ) self.fm_api.set_fault(fault) except Exception as e: LOG.exception(e) @@ -339,17 +356,19 @@ class PeerGroupAuditManager(manager.Manager): try: fault = self.fm_api.get_fault( fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED, - entity_instance_id) + entity_instance_id, + ) if fault: LOG.info(f"Clear alarm: {entity_instance_id}") self.fm_api.clear_fault( fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED, - entity_instance_id) + entity_instance_id, + ) except Exception: LOG.exception( - f"Problem clearing fault [{entity_instance_id}], " - f"alarm_id=" - f"{fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED}") + f"Problem clearing fault [{entity_instance_id}], alarm_id=" + f"{fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED}" + ) def _do_audit(self, system_peer, remote_peer_group, local_peer_group): with self.thread_lock: @@ -367,20 +386,24 @@ class PeerGroupAuditManager(manager.Manager): def start(self, system_peer, remote_peer_group, local_peer_group): if self.thread_lock.locked(): - LOG.warning(f"Audit thread for {local_peer_group.peer_group_name} " - f"has already started") + LOG.warning( + f"Audit thread for {local_peer_group.peer_group_name} " + "has already started" + ) else: self.thread = threading.Thread( target=self._do_audit, - args=(system_peer, remote_peer_group, local_peer_group)) + args=(system_peer, remote_peer_group, local_peer_group), + ) self.thread.start() - def audit_peer_group_from_system(self, - system_peer, - remote_peer_group, - local_peer_group): - LOG.info(f"Audit peer group [{local_peer_group.peer_group_name}] " - f"with remote system {system_peer.peer_name}") + def audit_peer_group_from_system( + self, system_peer, remote_peer_group, local_peer_group + ): + LOG.info( + f"Audit peer group [{local_peer_group.peer_group_name}] " + f"with remote system {system_peer.peer_name}" + ) self.start(system_peer, remote_peer_group, local_peer_group) @staticmethod @@ -391,21 +414,23 @@ class PeerGroupAuditManager(manager.Manager): for system in system_peers: try: dc_client = SystemPeerManager.get_peer_dc_client(system) - payload = db_api.subcloud_peer_group_db_model_to_dict( - peer_group) - if 'created-at' in payload: - del payload['created-at'] - if 'updated-at' in payload: - del payload['updated-at'] - payload['peer_uuid'] = local_system.uuid - LOG.info("Send audit payload [%s] of peer group %s" % - (payload, peer_group.peer_group_name)) + payload = db_api.subcloud_peer_group_db_model_to_dict(peer_group) + if "created-at" in payload: + del payload["created-at"] + if "updated-at" in payload: + del payload["updated-at"] + payload["peer_uuid"] = local_system.uuid + LOG.info( + "Send audit payload [%s] of peer group %s" + % (payload, peer_group.peer_group_name) + ) response = dc_client.audit_subcloud_peer_group( - peer_group.peer_group_name, - **payload) + peer_group.peer_group_name, **payload + ) if response: return response except Exception: - LOG.exception("Failed to send audit request for peer group " - f"{peer_group.peer_group_name} to DC: " - f"{system.peer_name}") + LOG.exception( + "Failed to send audit request for peer group " + f"{peer_group.peer_group_name} to DC: {system.peer_name}" + ) diff --git a/distributedcloud/dcmanager/manager/peer_monitor_manager.py b/distributedcloud/dcmanager/manager/peer_monitor_manager.py index 186202b05..9f698f94e 100644 --- a/distributedcloud/dcmanager/manager/peer_monitor_manager.py +++ b/distributedcloud/dcmanager/manager/peer_monitor_manager.py @@ -19,7 +19,6 @@ from dcmanager.db import api as db_api from dcmanager.manager import peer_group_audit_manager as pgam from dcmanager.manager.system_peer_manager import SystemPeerManager - CONF = cfg.CONF LOG = logging.getLogger(__name__) @@ -46,27 +45,35 @@ class PeerMonitor(object): self.fm_api.clear_fault(alarm_id, entity_instance_id) except Exception as e: LOG.exception( - "Problem clearing fault for peer %s, alarm_id=%s " - "error: %s" % (self.peer.peer_uuid, alarm_id, e)) + "Problem clearing fault for peer %s, alarm_id=%s error: %s" + % (self.peer.peer_uuid, alarm_id, e) + ) def _raise_failure(self): alarm_id = fm_const.FM_ALARM_ID_DC_SYSTEM_PEER_HEARTBEAT_FAILED entity_instance_id = "peer=%s" % self.peer.peer_uuid - reason_text = ("Peer %s (peer_uuid=%s) connections in " - "disconnected state." % (self.peer.peer_name, - self.peer.peer_uuid)) + reason_text = "Peer %s (peer_uuid=%s) connections in disconnected state." % ( + self.peer.peer_name, + self.peer.peer_uuid, + ) severity = fm_const.FM_ALARM_SEVERITY_MAJOR peer_groups = db_api.subcloud_peer_group_get_by_leader_id( - self.context, self.peer.peer_uuid) + self.context, self.peer.peer_uuid + ) if len(peer_groups) > 0: - peer_group_names = [peer_group.peer_group_name - for peer_group in peer_groups] - reason_text = ("Peer %s (peer_uuid=%s) is in disconnected " - "state. The following subcloud peer groups " - "are impacted: %s." % - (self.peer.peer_name, self.peer.peer_uuid, - ", ".join(peer_group_names))) + peer_group_names = [ + peer_group.peer_group_name for peer_group in peer_groups + ] + reason_text = ( + "Peer %s (peer_uuid=%s) is in disconnected state. The following " + "subcloud peer groups are impacted: %s." + % ( + self.peer.peer_name, + self.peer.peer_uuid, + ", ".join(peer_group_names), + ) + ) severity = fm_const.FM_ALARM_SEVERITY_CRITICAL try: @@ -79,18 +86,22 @@ class PeerMonitor(object): reason_text=reason_text, alarm_type=fm_const.FM_ALARM_TYPE_1, probable_cause=fm_const.ALARM_PROBABLE_CAUSE_UNKNOWN, - proposed_repair_action="Check the connectivity between " - "the current system and the reported peer site. If the " - "peer system is down, migrate the affected peer group(s) " - "to the current system for continued subcloud management.", - service_affecting=False) + proposed_repair_action=( + "Check the connectivity between the current system and the " + "reported peer site. If the peer system is down, migrate the " + "affected peer group(s) to the current system for continued " + "subcloud management." + ), + service_affecting=False, + ) self.fm_api.set_fault(fault) except Exception as e: LOG.exception( - "Problem setting fault for peer %s, alarm_id=%s, " - "error: %s" % (self.peer.peer_uuid, alarm_id, e)) + "Problem setting fault for peer %s, alarm_id=%s, error: %s" + % (self.peer.peer_uuid, alarm_id, e) + ) def _heartbeat_check_via_get_peer_group_list(self): """Checking the heartbeat of system peer.""" @@ -98,29 +109,28 @@ class PeerMonitor(object): dc_peer_subcloud_peer_group_list = list() try: dc_client = SystemPeerManager.get_peer_dc_client(self.peer) - dc_peer_subcloud_peer_group_list = \ - dc_client.get_subcloud_peer_group_list() + dc_peer_subcloud_peer_group_list = dc_client.get_subcloud_peer_group_list() failed = False if not dc_peer_subcloud_peer_group_list: - LOG.warning("Resource subcloud peer group of dc:%s " - "not found" % self.peer.manager_endpoint) + LOG.warning( + "Resource subcloud peer group of dc:%s not found" + % self.peer.manager_endpoint + ) except Exception: - LOG.exception("Failed to access the dc: %s" % - self.peer.peer_name) + LOG.exception("Failed to access the dc: %s" % self.peer.peer_name) return failed, dc_peer_subcloud_peer_group_list def _update_sync_status_secondary_site_becomes_unreachable(self): # Get associations by system peer - associations = SystemPeerManager.get_local_associations(self.context, - self.peer) + associations = SystemPeerManager.get_local_associations(self.context, self.peer) for association in associations: # If the association is not primary, skip it. - if association.association_type == consts.\ - ASSOCIATION_TYPE_NON_PRIMARY: - LOG.debug("Skip update the Association sync_status as " - "it is not primary.") + if association.association_type == consts.ASSOCIATION_TYPE_NON_PRIMARY: + LOG.debug( + "Skip update the Association sync_status as it is not primary." + ) continue # If the secondary site is down, set the association sync status # "in-sync" -> "unknown" @@ -131,24 +141,27 @@ class PeerMonitor(object): sync_status = consts.ASSOCIATION_SYNC_STATUS_UNKNOWN message = f"Peer site ({self.peer.peer_name}) is unreachable." if association.sync_status not in [ - consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, - consts.ASSOCIATION_SYNC_STATUS_UNKNOWN]: + consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, + consts.ASSOCIATION_SYNC_STATUS_UNKNOWN, + ]: sync_status = consts.ASSOCIATION_SYNC_STATUS_FAILED db_api.peer_group_association_update( - self.context, association.id, + self.context, + association.id, sync_status=sync_status, - sync_message=message) + sync_message=message, + ) def _update_sync_status_secondary_site_becomes_reachable(self): # Get associations by system peer - associations = SystemPeerManager.get_local_associations(self.context, - self.peer) + associations = SystemPeerManager.get_local_associations(self.context, self.peer) for association in associations: # If the association is not primary, skip it. - if association.association_type == consts.\ - ASSOCIATION_TYPE_NON_PRIMARY: - LOG.debug("Skip update Peer Site Association sync_status as " - "current site Association is not primary.") + if association.association_type == consts.ASSOCIATION_TYPE_NON_PRIMARY: + LOG.debug( + "Skip update Peer Site Association sync_status as " + "current site Association is not primary." + ) continue # Upon detecting that the secondary site is reachable again, # the PGA sync_status will be set for both sites by the primary @@ -156,37 +169,43 @@ class PeerMonitor(object): # "unknown" -> "in-sync" # "failed" -> "out-of-sync" sync_status = consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC - if association.sync_status == \ - consts.ASSOCIATION_SYNC_STATUS_UNKNOWN: + if association.sync_status == consts.ASSOCIATION_SYNC_STATUS_UNKNOWN: sync_status = consts.ASSOCIATION_SYNC_STATUS_IN_SYNC dc_local_pg = db_api.subcloud_peer_group_get( - self.context, association.peer_group_id) + self.context, association.peer_group_id + ) SystemPeerManager.update_sync_status( - self.context, self.peer, sync_status, dc_local_pg, - association=association) + self.context, + self.peer, + sync_status, + dc_local_pg, + association=association, + ) def _do_monitor_peer(self): failure_count = 0 - LOG.info("Start monitoring thread for peer %s" % - self.peer.peer_name) + LOG.info("Start monitoring thread for peer %s" % self.peer.peer_name) + UNAVAILABLE_STATE = consts.SYSTEM_PEER_AVAILABILITY_STATE_UNAVAILABLE + AVAILABLE_STATE = consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE # Do the actual peer monitor. while not self.exit_flag.wait(timeout=self.peer.heartbeat_interval): try: # Get system peer from DB self.peer = db_api.system_peer_get(self.context, self.peer.id) - failed, remote_pg_list = \ - self._heartbeat_check_via_get_peer_group_list() + failed, remote_pg_list = self._heartbeat_check_via_get_peer_group_list() if failed: failure_count += 1 if failure_count >= self.peer.heartbeat_failure_threshold: # heartbeat_failure_threshold reached. - LOG.warning("DC %s heartbeat failed, Raising alarm" % - self.peer.peer_name) + LOG.warning( + "DC %s heartbeat failed, Raising alarm" + % self.peer.peer_name + ) self._raise_failure() db_api.system_peer_update( - self.context, self.peer.id, - availability_state= # noqa: E251 - consts.SYSTEM_PEER_AVAILABILITY_STATE_UNAVAILABLE + self.context, + self.peer.id, + availability_state=UNAVAILABLE_STATE, ) # pylint: disable=line-too-long self._update_sync_status_secondary_site_becomes_unreachable() @@ -195,23 +214,24 @@ class PeerMonitor(object): else: failure_count = 0 self._audit_local_peer_groups(remote_pg_list) - if self.peer.availability_state != \ - consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE: + if self.peer.availability_state != AVAILABLE_STATE: db_api.system_peer_update( - self.context, self.peer.id, - availability_state= # noqa: E251 - consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE + self.context, + self.peer.id, + availability_state=AVAILABLE_STATE, ) # pylint: disable=line-too-long self._update_sync_status_secondary_site_becomes_reachable() - LOG.info("DC %s back online, clear alarm" % - self.peer.peer_name) + LOG.info("DC %s back online, clear alarm" % self.peer.peer_name) self._clear_failure() except Exception as e: - LOG.exception("Got exception monitoring peer %s error: %s" % - (self.peer.peer_name, e)) - LOG.info("Caught graceful exit signal for peer monitor %s" % - self.peer.peer_name) + LOG.exception( + "Got exception monitoring peer %s error: %s" + % (self.peer.peer_name, e) + ) + LOG.info( + "Caught graceful exit signal for peer monitor %s" % self.peer.peer_name + ) def _audit_local_peer_groups(self, remote_pg_list): # Generate a dict index by remote peer group name @@ -222,21 +242,25 @@ class PeerMonitor(object): # Only audit peer groups existing on both side for peer_group_id, pgam_obj in self.peer_group_audit_obj_map.items(): - peer_group = db_api.subcloud_peer_group_get(self.context, - peer_group_id) + peer_group = db_api.subcloud_peer_group_get(self.context, peer_group_id) if peer_group.peer_group_name in remote_pg_dict: remote_peer_group = remote_pg_dict[peer_group.peer_group_name] # Audit for require_audit_flag is True or # Remote peer group is in 'complete' state. - if (pgam_obj.require_audit_flag - or remote_peer_group.get("migration_status") == - consts.PEER_GROUP_MIGRATION_COMPLETE): + if ( + pgam_obj.require_audit_flag + or remote_peer_group.get("migration_status") + == consts.PEER_GROUP_MIGRATION_COMPLETE + ): pgam_obj.audit_peer_group_from_system( - self.peer, remote_peer_group, peer_group) + self.peer, remote_peer_group, peer_group + ) else: - LOG.warning("peer group %s not found on remote DC %s " - "nothing to audit, need sync operation" % - (peer_group.peer_group_name, self.peer.peer_name)) + LOG.warning( + "peer group %s not found on remote DC %s " + "nothing to audit, need sync operation" + % (peer_group.peer_group_name, self.peer.peer_name) + ) def _set_require_audit_flag_to_associated_peer_groups(self): for pgam_obj in self.peer_group_audit_obj_map.values(): @@ -248,7 +272,7 @@ class PeerMonitor(object): pgam_obj = self.peer_group_audit_obj_map[peer_group.id] pgam_obj.audit(self.peer, remote_peer_group, peer_group) else: - msg = ("No peer group id %s found" % peer_group.peer_group_name) + msg = "No peer group id %s found" % peer_group.peer_group_name return msg def _clean_peer_group_audit_threads(self): @@ -262,25 +286,30 @@ class PeerMonitor(object): # destroy removed peer_group audit object for peer_group_id in removed_peer_groups: - LOG.info("Peer group [%s] removed from peer [%s]" % - (peer_group_id, self.peer.peer_name)) + LOG.info( + "Peer group [%s] removed from peer [%s]" + % (peer_group_id, self.peer.peer_name) + ) if peer_group_id in self.peer_group_audit_obj_map: self.peer_group_audit_obj_map[peer_group_id].stop() del self.peer_group_audit_obj_map[peer_group_id] # Add new peer_group audit object for peer_group_id in new_peer_groups: - LOG.info("New peer group [%s] found for peer [%s]" % - (peer_group_id, self.peer.peer_name)) - self.peer_group_audit_obj_map[peer_group_id] = \ - pgam.PeerGroupAuditManager(self.subcloud_manager, - peer_group_id) + LOG.info( + "New peer group [%s] found for peer [%s]" + % (peer_group_id, self.peer.peer_name) + ) + self.peer_group_audit_obj_map[peer_group_id] = pgam.PeerGroupAuditManager( + self.subcloud_manager, peer_group_id + ) self.peer_group_id_set = peer_group_id_set self._set_require_audit_flag_to_associated_peer_groups() def start(self): if self.thread is not None: - LOG.error('Peer monitor thread for %s has already started' % - self.peer.peer_name) + LOG.error( + "Peer monitor thread for %s has already started" % self.peer.peer_name + ) else: self.thread = threading.Thread(target=self._do_monitor_peer) self.thread.start() @@ -296,10 +325,9 @@ class PeerMonitorManager(manager.Manager): """Manages tasks related to peer monitor.""" def __init__(self, subcloud_manager): - LOG.debug('PeerMonitorManager initialization...') + LOG.debug("PeerMonitorManager initialization...") - super(PeerMonitorManager, self).__init__( - service_name="peer_monitor_manager") + super(PeerMonitorManager, self).__init__(service_name="peer_monitor_manager") self.peer_monitor = dict() self.context = context.get_admin_context() self.subcloud_manager = subcloud_manager @@ -314,12 +342,11 @@ class PeerMonitorManager(manager.Manager): del self.peer_monitor_thread_map[system_peer_id] def _create_peer_monitor_task(self, system_peer_id): - peer = db_api.system_peer_get(self.context, - system_peer_id) - LOG.info("Create monitoring thread for peer: %s" % - peer.peer_name) + peer = db_api.system_peer_get(self.context, system_peer_id) + LOG.info("Create monitoring thread for peer: %s" % peer.peer_name) self.peer_monitor_thread_map[system_peer_id] = PeerMonitor( - peer, self.context, self.subcloud_manager) + peer, self.context, self.subcloud_manager + ) self.peer_monitor_thread_map[system_peer_id].start() @staticmethod @@ -327,10 +354,12 @@ class PeerMonitorManager(manager.Manager): return {key: value for key, value in dict1.items() if key not in dict2} def _create_or_destroy_peer_monitor_task(self, peer_system_peer_group_map): - new_peers = self._diff_dict(peer_system_peer_group_map, - self.peer_monitor_thread_map) - removed_peers = self._diff_dict(self.peer_monitor_thread_map, - peer_system_peer_group_map) + new_peers = self._diff_dict( + peer_system_peer_group_map, self.peer_monitor_thread_map + ) + removed_peers = self._diff_dict( + self.peer_monitor_thread_map, peer_system_peer_group_map + ) for peer_id in new_peers: self._create_peer_monitor_task(peer_id) for peer_id in removed_peers: @@ -338,8 +367,7 @@ class PeerMonitorManager(manager.Manager): # Update peer_group_id set for peer_id, pm_obj in self.peer_monitor_thread_map.items(): - pm_obj.update_peer_group_id_set( - peer_system_peer_group_map[peer_id]) + pm_obj.update_peer_group_id_set(peer_system_peer_group_map[peer_id]) def peer_monitor_notify(self, context): LOG.info("Caught peer monitor notify...") @@ -348,31 +376,32 @@ class PeerMonitorManager(manager.Manager): associations = db_api.peer_group_association_get_all(context) for association in associations: peer_system_peer_group_map[association.system_peer_id].add( - association.peer_group_id) + association.peer_group_id + ) self._create_or_destroy_peer_monitor_task(peer_system_peer_group_map) def peer_group_audit_notify(self, context, peer_group_name, payload): - LOG.info("Caught peer group audit notification for peer group %s" % - peer_group_name) + LOG.info( + "Caught peer group audit notification for peer group %s" % peer_group_name + ) msg = None try: peer_group = db_api.subcloud_peer_group_get_by_name( - context, peer_group_name) - system_uuid = payload.get('peer_uuid') - system_peer = db_api.system_peer_get_by_uuid(context, - system_uuid) + context, peer_group_name + ) + system_uuid = payload.get("peer_uuid") + system_peer = db_api.system_peer_get_by_uuid(context, system_uuid) if system_peer.id in self.peer_monitor_thread_map: pmobj = self.peer_monitor_thread_map[system_peer.id] - msg = pmobj.audit_specific_local_peer_group(peer_group, - payload) + msg = pmobj.audit_specific_local_peer_group(peer_group, payload) else: - msg = ("System peer with UUID=%s is not under monitoring. " - "Skipping audit for peer group %s" % - (system_uuid, peer_group_name)) + msg = ( + "System peer with UUID=%s is not under monitoring. " + "Skipping audit for peer group %s" % (system_uuid, peer_group_name) + ) LOG.warning(msg) return msg except Exception as e: - LOG.exception('Handling peer group audit notify error: %s' % - str(e)) + LOG.exception("Handling peer group audit notify error: %s" % str(e)) return str(e) diff --git a/distributedcloud/dcmanager/manager/service.py b/distributedcloud/dcmanager/manager/service.py index 91e32e9a0..8b77241c1 100644 --- a/distributedcloud/dcmanager/manager/service.py +++ b/distributedcloud/dcmanager/manager/service.py @@ -46,9 +46,11 @@ LOG = logging.getLogger(__name__) # run multiple operations in parallel past the RPC limit. def run_in_thread(fn): """Decorator to run a function in a separate thread.""" + def wrapper(*args, **kwargs): thread = threading.Thread(target=fn, args=args, kwargs=kwargs) thread.start() + return wrapper @@ -101,9 +103,9 @@ class DCManagerService(service.Service): utils.set_open_file_limit(cfg.CONF.worker_rlimit_nofile) self.dcmanager_id = uuidutils.generate_uuid() self.init_managers() - target = oslo_messaging.Target(version=self.rpc_api_version, - server=self.host, - topic=self.topic) + target = oslo_messaging.Target( + version=self.rpc_api_version, server=self.host, topic=self.topic + ) self.target = target self._rpc_server = rpc_messaging.get_rpc_server(self.target, self) self._rpc_server.start() @@ -127,14 +129,15 @@ class DCManagerService(service.Service): @request_context def add_subcloud(self, context, subcloud_id, payload): # Adds a subcloud - LOG.info("Handling add_subcloud request for: %s" % payload.get('name')) + LOG.info("Handling add_subcloud request for: %s" % payload.get("name")) return self.subcloud_manager.add_subcloud(context, subcloud_id, payload) @request_context def add_secondary_subcloud(self, context, subcloud_id, payload): # Adds a secondary subcloud - LOG.info("Handling add_secondary_subcloud request for: %s" % - payload.get('name')) + LOG.info( + "Handling add_secondary_subcloud request for: %s" % payload.get("name") + ) return self.subcloud_manager.add_subcloud(context, subcloud_id, payload) @request_context @@ -144,22 +147,23 @@ class DCManagerService(service.Service): return self.subcloud_manager.delete_subcloud(context, subcloud_id) @request_context - def rename_subcloud(self, context, subcloud_id, curr_subcloud_name, - new_subcloud_name=None): + def rename_subcloud( + self, context, subcloud_id, curr_subcloud_name, new_subcloud_name=None + ): # Rename a subcloud - LOG.info("Handling rename_subcloud request for: %s" % - curr_subcloud_name) - subcloud = self.subcloud_manager.rename_subcloud(context, - subcloud_id, - curr_subcloud_name, - new_subcloud_name) + LOG.info("Handling rename_subcloud request for: %s" % curr_subcloud_name) + subcloud = self.subcloud_manager.rename_subcloud( + context, subcloud_id, curr_subcloud_name, new_subcloud_name + ) return subcloud @request_context def get_subcloud_name_by_region_name(self, context, subcloud_region): # get subcloud by region name - LOG.debug("Handling get_subcloud_name_by_region_name request for " - "region: %s" % subcloud_region) + LOG.debug( + "Handling get_subcloud_name_by_region_name request for region: %s" + % subcloud_region + ) subcloud = self.subcloud_manager.get_subcloud_name_by_region_name( context, subcloud_region ) @@ -167,128 +171,156 @@ class DCManagerService(service.Service): @request_context def update_subcloud( - self, context, subcloud_id, management_state=None, description=None, - location=None, group_id=None, data_install=None, force=None, - deploy_status=None, peer_group_id=None, bootstrap_values=None, - bootstrap_address=None + self, + context, + subcloud_id, + management_state=None, + description=None, + location=None, + group_id=None, + data_install=None, + force=None, + deploy_status=None, + peer_group_id=None, + bootstrap_values=None, + bootstrap_address=None, ): # Updates a subcloud LOG.info("Handling update_subcloud request for: %s" % subcloud_id) - subcloud = self.subcloud_manager.update_subcloud(context, subcloud_id, - management_state, - description, - location, - group_id, - data_install, - force, - deploy_status, - peer_group_id, - bootstrap_values, - bootstrap_address) + subcloud = self.subcloud_manager.update_subcloud( + context, + subcloud_id, + management_state, + description, + location, + group_id, + data_install, + force, + deploy_status, + peer_group_id, + bootstrap_values, + bootstrap_address, + ) return subcloud @request_context def update_subcloud_with_network_reconfig(self, context, subcloud_id, payload): - LOG.info("Handling update_subcloud_with_network_reconfig request for: %s", - subcloud_id) + LOG.info( + "Handling update_subcloud_with_network_reconfig request for: %s", + subcloud_id, + ) return self.subcloud_manager.update_subcloud_with_network_reconfig( - context, subcloud_id, payload) + context, subcloud_id, payload + ) @run_in_thread @request_context def redeploy_subcloud(self, context, subcloud_id, payload): # Redeploy a subcloud LOG.info("Handling redeploy_subcloud request for: %s" % subcloud_id) - return self.subcloud_manager.redeploy_subcloud(context, - subcloud_id, - payload) + return self.subcloud_manager.redeploy_subcloud(context, subcloud_id, payload) @request_context def backup_subclouds(self, context, payload): # Backup a subcloud or group of subclouds - entity = 'subcloud' if payload.get('subcloud') else 'group' - LOG.info("Handling backup_subclouds request for %s ID: %s" % - (entity, (payload.get('subcloud') or payload.get('group')))) + entity = "subcloud" if payload.get("subcloud") else "group" + LOG.info( + "Handling backup_subclouds request for %s ID: %s" + % (entity, (payload.get("subcloud") or payload.get("group"))) + ) return self.subcloud_manager.create_subcloud_backups(context, payload) @request_context def delete_subcloud_backups(self, context, release_version, payload): # Delete backup on subcloud or group of subclouds - entity = 'subcloud' if payload.get('subcloud') else 'group' - LOG.info("Handling delete_subcloud_backups request for %s ID: %s" % - (entity, (payload.get('subcloud') or payload.get('group')))) - return self.subcloud_manager.delete_subcloud_backups(context, - release_version, - payload) + entity = "subcloud" if payload.get("subcloud") else "group" + LOG.info( + "Handling delete_subcloud_backups request for %s ID: %s" + % (entity, (payload.get("subcloud") or payload.get("group"))) + ) + return self.subcloud_manager.delete_subcloud_backups( + context, release_version, payload + ) @request_context def restore_subcloud_backups(self, context, payload): # Restore a subcloud backup or a group of subclouds backups - entity = 'subcloud' if payload.get('subcloud') else 'group' - LOG.info("Handling restore_subcloud_backups request for %s ID: %s" % - (entity, (payload.get('subcloud') or payload.get('group')))) + entity = "subcloud" if payload.get("subcloud") else "group" + LOG.info( + "Handling restore_subcloud_backups request for %s ID: %s" + % (entity, (payload.get("subcloud") or payload.get("group"))) + ) return self.subcloud_manager.restore_subcloud_backups(context, payload) @request_context - def update_subcloud_sync_endpoint_type(self, context, subcloud_name, - endpoint_type_list, - openstack_installed): + def update_subcloud_sync_endpoint_type( + self, context, subcloud_name, endpoint_type_list, openstack_installed + ): # Updates subcloud sync endpoint type - LOG.info("Handling update_subcloud_sync_endpoint_type request for: %s" - % subcloud_name) + LOG.info( + "Handling update_subcloud_sync_endpoint_type request for: %s" + % subcloud_name + ) self.subcloud_manager.update_subcloud_sync_endpoint_type( - context, subcloud_name, endpoint_type_list, openstack_installed) + context, subcloud_name, endpoint_type_list, openstack_installed + ) @request_context def prestage_subcloud(self, context, payload): - LOG.info("Handling prestage_subcloud request for: %s", - payload['subcloud_name']) + LOG.info("Handling prestage_subcloud request for: %s", payload["subcloud_name"]) return self.subcloud_manager.prestage_subcloud(context, payload) @request_context def subcloud_deploy_create(self, context, subcloud_id, payload): # Adds a subcloud - LOG.info("Handling subcloud_deploy_create request for: %s" % - payload.get('name')) - return self.subcloud_manager.subcloud_deploy_create(context, - subcloud_id, - payload) + LOG.info( + "Handling subcloud_deploy_create request for: %s" % payload.get("name") + ) + return self.subcloud_manager.subcloud_deploy_create( + context, subcloud_id, payload + ) @run_in_thread @request_context - def subcloud_deploy_bootstrap(self, context, subcloud_id, payload, - initial_deployment): + def subcloud_deploy_bootstrap( + self, context, subcloud_id, payload, initial_deployment + ): # Bootstraps a subcloud - LOG.info("Handling subcloud_deploy_bootstrap request for: %s" % - payload.get('name')) + LOG.info( + "Handling subcloud_deploy_bootstrap request for: %s" % payload.get("name") + ) return self.subcloud_manager.subcloud_deploy_bootstrap( - context, subcloud_id, payload, initial_deployment) + context, subcloud_id, payload, initial_deployment + ) @run_in_thread @request_context - def subcloud_deploy_config(self, context, subcloud_id, payload, - initial_deployment): + def subcloud_deploy_config(self, context, subcloud_id, payload, initial_deployment): # Configures a subcloud LOG.info("Handling subcloud_deploy_config request for: %s" % subcloud_id) return self.subcloud_manager.subcloud_deploy_config( - context, subcloud_id, payload, initial_deployment) + context, subcloud_id, payload, initial_deployment + ) @run_in_thread @request_context - def subcloud_deploy_install(self, context, subcloud_id, payload, - initial_deployment): + def subcloud_deploy_install( + self, context, subcloud_id, payload, initial_deployment + ): # Install a subcloud LOG.info("Handling subcloud_deploy_install request for: %s" % subcloud_id) return self.subcloud_manager.subcloud_deploy_install( - context, subcloud_id, payload, initial_deployment) + context, subcloud_id, payload, initial_deployment + ) @run_in_thread @request_context def subcloud_deploy_enroll(self, context, subcloud_id, payload): # Enroll a subcloud - LOG.info(f'Handling subcloud_deploy_enroll request for: {subcloud_id}') + LOG.info(f"Handling subcloud_deploy_enroll request for: {subcloud_id}") return self.subcloud_manager.subcloud_deploy_enroll( - context, subcloud_id, payload) + context, subcloud_id, payload + ) @request_context def subcloud_deploy_complete(self, context, subcloud_id): @@ -301,26 +333,27 @@ class DCManagerService(service.Service): def subcloud_deploy_abort(self, context, subcloud_id, deploy_status): # Abort the subcloud deployment LOG.info("Handling subcloud_deploy_abort request for: %s" % subcloud_id) - return self.subcloud_manager.subcloud_deploy_abort(context, - subcloud_id, - deploy_status) + return self.subcloud_manager.subcloud_deploy_abort( + context, subcloud_id, deploy_status + ) @run_in_thread @request_context - def subcloud_deploy_resume(self, context, subcloud_id, subcloud_name, - payload, deploy_states_to_run): + def subcloud_deploy_resume( + self, context, subcloud_id, subcloud_name, payload, deploy_states_to_run + ): # Adds a subcloud LOG.info("Handling subcloud_deploy_resume request for: %s" % subcloud_name) - return self.subcloud_manager.subcloud_deploy_resume(context, - subcloud_id, - subcloud_name, - payload, - deploy_states_to_run) + return self.subcloud_manager.subcloud_deploy_resume( + context, subcloud_id, subcloud_name, payload, deploy_states_to_run + ) @request_context def batch_migrate_subcloud(self, context, payload): - LOG.info("Handling batch_migrate_subcloud request for peer_group: %s", - payload['peer_group']) + LOG.info( + "Handling batch_migrate_subcloud request for peer_group: %s", + payload["peer_group"], + ) return self.subcloud_manager.batch_migrate_subcloud(context, payload) @request_context @@ -330,44 +363,62 @@ class DCManagerService(service.Service): @request_context def peer_group_audit_notify(self, context, peer_group_name, payload): - LOG.info("Handling peer group audit notify of peer group " - f"{peer_group_name}") + LOG.info("Handling peer group audit notify of peer group {peer_group_name}") return self.peer_monitor_manager.peer_group_audit_notify( - context, peer_group_name, payload) + context, peer_group_name, payload + ) @request_context - def sync_subcloud_peer_group(self, context, association_id, - sync_subclouds=True): - LOG.info("Handling sync_subcloud_peer_group request for: %s", - association_id) + def sync_subcloud_peer_group(self, context, association_id, sync_subclouds=True): + LOG.info("Handling sync_subcloud_peer_group request for: %s", association_id) return self.system_peer_manager.sync_subcloud_peer_group( - context, association_id, sync_subclouds) + context, association_id, sync_subclouds + ) @request_context - def update_subcloud_peer_group(self, context, peer_group_id, - group_state, max_subcloud_rehoming, - group_name, new_group_name=None): - LOG.info("Handling update_subcloud_peer_group request for " - "peer group %s" % peer_group_id) + def update_subcloud_peer_group( + self, + context, + peer_group_id, + group_state, + max_subcloud_rehoming, + group_name, + new_group_name=None, + ): + LOG.info( + "Handling update_subcloud_peer_group request for peer group %s" + % peer_group_id + ) return self.system_peer_manager.update_subcloud_peer_group( - context, peer_group_id, group_state, max_subcloud_rehoming, - group_name, new_group_name) + context, + peer_group_id, + group_state, + max_subcloud_rehoming, + group_name, + new_group_name, + ) @request_context def delete_peer_group_association(self, context, association_id): - LOG.info("Handling delete_peer_group_association request for: %s", - association_id) + LOG.info( + "Handling delete_peer_group_association request for: %s", association_id + ) return self.system_peer_manager.delete_peer_group_association( - context, association_id) + context, association_id + ) @request_context - def update_association_sync_status(self, context, peer_group_id, - sync_status, sync_message=None): + def update_association_sync_status( + self, context, peer_group_id, sync_status, sync_message=None + ): # Updates peer group association sync_status - LOG.info("Handling update_peer_association_sync_status request for: %s" - % peer_group_id) + LOG.info( + "Handling update_peer_association_sync_status request for: %s" + % peer_group_id + ) return self.system_peer_manager.update_association_sync_status( - context, peer_group_id, sync_status, sync_message) + context, peer_group_id, sync_status, sync_message + ) def _stop_rpc_server(self): # Stop RPC connection to prevent new requests @@ -375,9 +426,9 @@ class DCManagerService(service.Service): try: self._rpc_server.stop() self._rpc_server.wait() - LOG.info('RPC service stopped successfully') + LOG.info("RPC service stopped successfully") except Exception as ex: - LOG.error('Failed to stop RPC service: %s', str(ex)) + LOG.error("Failed to stop RPC service: %s", str(ex)) def stop(self): SubprocessCleanup.shutdown_cleanup(origin="service") diff --git a/distributedcloud/dcmanager/manager/subcloud_manager.py b/distributedcloud/dcmanager/manager/subcloud_manager.py index aada8dea0..faa60a3b3 100644 --- a/distributedcloud/dcmanager/manager/subcloud_manager.py +++ b/distributedcloud/dcmanager/manager/subcloud_manager.py @@ -77,38 +77,43 @@ CONF = cfg.CONF # Name of our distributed cloud addn_hosts file for dnsmasq # to read. This file is referenced in dnsmasq.conf -ADDN_HOSTS_DC = 'dnsmasq.addn_hosts_dc' +ADDN_HOSTS_DC = "dnsmasq.addn_hosts_dc" # Subcloud configuration paths -ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK = \ - '/usr/share/ansible/stx-ansible/playbooks/create_subcloud_backup.yml' -ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK = \ - '/usr/share/ansible/stx-ansible/playbooks/delete_subcloud_backup.yml' -ANSIBLE_SUBCLOUD_BACKUP_RESTORE_PLAYBOOK = \ - '/usr/share/ansible/stx-ansible/playbooks/restore_subcloud_backup.yml' -ANSIBLE_SUBCLOUD_PLAYBOOK = \ - '/usr/share/ansible/stx-ansible/playbooks/bootstrap.yml' -ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK = \ - '/usr/share/ansible/stx-ansible/playbooks/rehome_subcloud.yml' -ANSIBLE_SUBCLOUD_UPDATE_PLAYBOOK = \ - '/usr/share/ansible/stx-ansible/playbooks/update_subcloud.yml' +ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK = ( + "/usr/share/ansible/stx-ansible/playbooks/create_subcloud_backup.yml" +) +ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK = ( + "/usr/share/ansible/stx-ansible/playbooks/delete_subcloud_backup.yml" +) +ANSIBLE_SUBCLOUD_BACKUP_RESTORE_PLAYBOOK = ( + "/usr/share/ansible/stx-ansible/playbooks/restore_subcloud_backup.yml" +) +ANSIBLE_SUBCLOUD_PLAYBOOK = "/usr/share/ansible/stx-ansible/playbooks/bootstrap.yml" +ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK = ( + "/usr/share/ansible/stx-ansible/playbooks/rehome_subcloud.yml" +) +ANSIBLE_SUBCLOUD_UPDATE_PLAYBOOK = ( + "/usr/share/ansible/stx-ansible/playbooks/update_subcloud.yml" +) # TODO(yuxing) Remove the ANSIBLE_VALIDATE_KEYSTONE_PASSWORD_SCRIPT when end # the support of rehoming a subcloud with a software version below 22.12 -ANSIBLE_VALIDATE_KEYSTONE_PASSWORD_SCRIPT = \ - consts.ANSIBLE_CURRENT_VERSION_BASE_PATH + \ - '/roles/rehome-subcloud/update-keystone-data/files/' + \ - 'validate_keystone_passwords.sh' +ANSIBLE_VALIDATE_KEYSTONE_PASSWORD_SCRIPT = ( + consts.ANSIBLE_CURRENT_VERSION_BASE_PATH + + "/roles/rehome-subcloud/update-keystone-data/files/" + + "validate_keystone_passwords.sh" +) USERS_TO_REPLICATE = [ - 'sysinv', - 'patching', - 'usm', - 'vim', - 'mtce', - 'fm', - 'barbican', - 'dcmanager' + "sysinv", + "patching", + "usm", + "vim", + "mtce", + "fm", + "barbican", + "dcmanager", ] # The timeout of the rehome playbook is set to 180 seconds as it takes a @@ -144,24 +149,24 @@ TRANSITORY_STATES = { consts.DEPLOY_STATE_PRE_ENROLL: consts.DEPLOY_STATE_PRE_ENROLL_FAILED, consts.DEPLOY_STATE_ENROLLING: consts.DEPLOY_STATE_ENROLL_FAILED, consts.DEPLOY_STATE_PRE_INIT_ENROLL: consts.DEPLOY_STATE_PRE_INIT_ENROLL_FAILED, - consts.DEPLOY_STATE_INITIATING_ENROLL: consts.DEPLOY_STATE_INIT_ENROLL_FAILED + consts.DEPLOY_STATE_INITIATING_ENROLL: consts.DEPLOY_STATE_INIT_ENROLL_FAILED, } TRANSITORY_BACKUP_STATES = { consts.BACKUP_STATE_VALIDATING: consts.BACKUP_STATE_VALIDATE_FAILED, consts.BACKUP_STATE_PRE_BACKUP: consts.BACKUP_STATE_PREP_FAILED, - consts.BACKUP_STATE_IN_PROGRESS: consts.BACKUP_STATE_FAILED + consts.BACKUP_STATE_IN_PROGRESS: consts.BACKUP_STATE_FAILED, } TRANSITORY_PRESTAGE_STATES = { consts.PRESTAGE_STATE_PACKAGES: consts.PRESTAGE_STATE_FAILED, - consts.PRESTAGE_STATE_IMAGES: consts.PRESTAGE_STATE_FAILED + consts.PRESTAGE_STATE_IMAGES: consts.PRESTAGE_STATE_FAILED, } MAX_PARALLEL_SUBCLOUD_BACKUP_CREATE = 250 MAX_PARALLEL_SUBCLOUD_BACKUP_DELETE = 250 MAX_PARALLEL_SUBCLOUD_BACKUP_RESTORE = 100 -CENTRAL_BACKUP_DIR = '/opt/dc-vault/backups' +CENTRAL_BACKUP_DIR = "/opt/dc-vault/backups" ENDPOINT_URLS = { dccommon_consts.ENDPOINT_TYPE_PLATFORM: "https://{}:6386/v1", @@ -181,31 +186,31 @@ MIN_WAIT_BEFORE_RETRY_KUBE_REQUEST = 1 # subcloud_deploy_create. They should not be deleted from # the overrides if it's needed to recreate the file. GENERATED_OVERRIDES_VALUES = [ - 'region_config', - 'distributed_cloud_role', - 'system_controller_subnet', - 'system_controller_floating_address', - 'system_controller_oam_subnet', - 'system_controller_oam_floating_address', - 'system_controller_keystone_admin_user_id', - 'system_controller_keystone_admin_project_id', - 'system_controller_keystone_services_project_id', - 'system_controller_keystone_sysinv_user_id', - 'system_controller_keystone_dcmanager_user_id', - 'users', - 'dc_root_ca_cert', - 'sc_ca_cert', - 'sc_ca_key' + "region_config", + "distributed_cloud_role", + "system_controller_subnet", + "system_controller_floating_address", + "system_controller_oam_subnet", + "system_controller_oam_floating_address", + "system_controller_keystone_admin_user_id", + "system_controller_keystone_admin_project_id", + "system_controller_keystone_services_project_id", + "system_controller_keystone_sysinv_user_id", + "system_controller_keystone_dcmanager_user_id", + "users", + "dc_root_ca_cert", + "sc_ca_cert", + "sc_ca_key", ] VALUES_TO_DELETE_OVERRIDES = [ - 'deploy_playbook', - 'deploy_values', - 'deploy_config', - 'deploy_chart', - 'deploy_overrides', - 'install_values', - 'sysadmin_password' + "deploy_playbook", + "deploy_values", + "deploy_config", + "deploy_chart", + "deploy_overrides", + "install_values", + "sysadmin_password", ] @@ -215,10 +220,11 @@ class SubcloudManager(manager.Manager): regionone_data = collections.defaultdict(dict) def __init__(self, *args, **kwargs): - LOG.debug(_('SubcloudManager initialization...')) + LOG.debug(_("SubcloudManager initialization...")) - super(SubcloudManager, self).__init__(service_name="subcloud_manager", - *args, **kwargs) + super(SubcloudManager, self).__init__( + service_name="subcloud_manager", *args, **kwargs + ) self.context = dcmanager_context.get_admin_context() self.dcorch_rpc_client = dcorch_rpc_client.EngineWorkerClient() self.fm_api = fm_api.FaultAPIs() @@ -240,25 +246,18 @@ class SubcloudManager(manager.Manager): def _create_intermediate_ca_cert(payload): subcloud_region = payload["region_name"] cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_region) - secret_name = SubcloudManager._get_subcloud_cert_secret_name( - subcloud_region) + secret_name = SubcloudManager._get_subcloud_cert_secret_name(subcloud_region) cert = { - "apiVersion": "%s/%s" % (kubeoperator.CERT_MANAGER_GROUP, - kubeoperator.CERT_MANAGER_VERSION), + "apiVersion": "%s/%s" + % (kubeoperator.CERT_MANAGER_GROUP, kubeoperator.CERT_MANAGER_VERSION), "kind": "Certificate", - "metadata": { - "namespace": CERT_NAMESPACE, - "name": cert_name - }, + "metadata": {"namespace": CERT_NAMESPACE, "name": cert_name}, "spec": { "secretName": secret_name, "duration": SC_INTERMEDIATE_CERT_DURATION, "renewBefore": SC_INTERMEDIATE_CERT_RENEW_BEFORE, - "issuerRef": { - "kind": "Issuer", - "name": "dc-adminep-root-ca-issuer" - }, + "issuerRef": {"kind": "Issuer", "name": "dc-adminep-root-ca-issuer"}, "commonName": cert_name, "isCA": True, }, @@ -284,166 +283,241 @@ class SubcloudManager(manager.Manager): # 15th retry: 1*2**(14*0.3 + 1) + 1, max wait time ~37.76s. for count in range(MAX_ATTEMPTS_TO_GET_INTERMEDIATE_CA_CERT): secret = kube.kube_get_secret(secret_name, CERT_NAMESPACE) - wait_per_request = \ - MIN_WAIT_BEFORE_RETRY_KUBE_REQUEST * 2 ** (count * 0.3 + 1) \ - + random.uniform(0, MIN_WAIT_BEFORE_RETRY_KUBE_REQUEST) - if not hasattr(secret, 'data'): + wait_per_request = MIN_WAIT_BEFORE_RETRY_KUBE_REQUEST * 2 ** ( + count * 0.3 + 1 + ) + random.uniform(0, MIN_WAIT_BEFORE_RETRY_KUBE_REQUEST) + if not hasattr(secret, "data"): time.sleep(wait_per_request) - LOG.debug('Wait for %s ... %s' % (secret_name, count)) + LOG.debug("Wait for %s ... %s" % (secret_name, count)) continue data = secret.data - if ('ca.crt' not in data or - 'tls.crt' not in data or 'tls.key' not in data) or \ - not (data['ca.crt'] and data['tls.crt'] and data['tls.key']): + if ( + "ca.crt" not in data or "tls.crt" not in data or "tls.key" not in data + ) or not (data["ca.crt"] and data["tls.crt"] and data["tls.key"]): # ca cert, certificate and key pair are needed and must exist # for creating an intermediate ca. If not, certificate is not # ready yet. time.sleep(wait_per_request) - LOG.debug('Wait for %s ... %s' % (secret_name, count)) + LOG.debug("Wait for %s ... %s" % (secret_name, count)) continue - payload['dc_root_ca_cert'] = data['ca.crt'] - payload['sc_ca_cert'] = data['tls.crt'] - payload['sc_ca_key'] = data['tls.key'] + payload["dc_root_ca_cert"] = data["ca.crt"] + payload["sc_ca_cert"] = data["tls.crt"] + payload["sc_ca_key"] = data["tls.key"] return - raise Exception("Secret for certificate %s is not ready." % cert_name) # TODO(kmacleod) switch to using utils.get_ansible_filename @staticmethod - def _get_ansible_filename(subcloud_name, postfix='.yml'): + def _get_ansible_filename(subcloud_name, postfix=".yml"): ansible_filename = os.path.join( - dccommon_consts.ANSIBLE_OVERRIDES_PATH, - subcloud_name + postfix) + dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + postfix + ) return ansible_filename - def compose_install_command(self, subcloud_name, - ansible_subcloud_inventory_file, - software_version=None): + def compose_install_command( + self, subcloud_name, ansible_subcloud_inventory_file, software_version=None + ): install_command = [ - "ansible-playbook", dccommon_consts.ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK, - "-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name, - "-e", "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + - subcloud_name + '/' + "install_values.yml", - "-e", "install_release_version=%s" % - software_version if software_version else SW_VERSION, - "-e", "rvmc_config_file=%s" % - os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH, - subcloud_name, - dccommon_consts.RVMC_CONFIG_FILE_NAME)] + "ansible-playbook", + dccommon_consts.ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK, + "-i", + ansible_subcloud_inventory_file, + "--limit", + subcloud_name, + "-e", + "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + + "/" + + subcloud_name + + "/" + + "install_values.yml", + "-e", + ( + "install_release_version=%s" % software_version + if software_version + else SW_VERSION + ), + "-e", + "rvmc_config_file=%s" + % os.path.join( + dccommon_consts.ANSIBLE_OVERRIDES_PATH, + subcloud_name, + dccommon_consts.RVMC_CONFIG_FILE_NAME, + ), + ] return install_command # TODO(glyraper): software_version will be used in the future - def compose_enroll_command(self, subcloud_name, - subcloud_region, - ansible_subcloud_inventory_file, - software_version, - state): + def compose_enroll_command( + self, + subcloud_name, + subcloud_region, + ansible_subcloud_inventory_file, + software_version, + state, + ): if state == "init": enroll_command = [ "ansible-playbook", dccommon_consts.ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK, - "-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name, - "-e", "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + - subcloud_name + '/' + "enroll_overrides.yml", - "-e", "install_release_version=%s" % - software_version if software_version else SW_VERSION, - "-e", "rvmc_config_file=%s" % - os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH, - subcloud_name, - dccommon_consts.RVMC_CONFIG_FILE_NAME)] + "-i", + ansible_subcloud_inventory_file, + "--limit", + subcloud_name, + "-e", + "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + + "/" + + subcloud_name + + "/" + + "enroll_overrides.yml", + "-e", + ( + "install_release_version=%s" % software_version + if software_version + else SW_VERSION + ), + "-e", + "rvmc_config_file=%s" + % os.path.join( + dccommon_consts.ANSIBLE_OVERRIDES_PATH, + subcloud_name, + dccommon_consts.RVMC_CONFIG_FILE_NAME, + ), + ] return enroll_command elif state == "enroll": extra_vars = "override_files_dir='%s' region_name=%s" % ( - dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_region) + dccommon_consts.ANSIBLE_OVERRIDES_PATH, + subcloud_region, + ) enroll_command = [ "ansible-playbook", dccommon_consts.ANSIBLE_SUBCLOUD_ENROLL_PLAYBOOK, - "-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name, - "-e", extra_vars] + "-i", + ansible_subcloud_inventory_file, + "--limit", + subcloud_name, + "-e", + extra_vars, + ] return enroll_command else: raise exceptions.InvalidInputError - def compose_bootstrap_command(self, subcloud_name, - subcloud_region, - ansible_subcloud_inventory_file, - software_version=None): + def compose_bootstrap_command( + self, + subcloud_name, + subcloud_region, + ansible_subcloud_inventory_file, + software_version=None, + ): bootstrap_command = [ "ansible-playbook", utils.get_playbook_for_software_version( - ANSIBLE_SUBCLOUD_PLAYBOOK, software_version), - "-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name + ANSIBLE_SUBCLOUD_PLAYBOOK, software_version + ), + "-i", + ansible_subcloud_inventory_file, + "--limit", + subcloud_name, ] # Add the overrides dir and region_name so the playbook knows # which overrides to load bootstrap_command += [ - "-e", str("override_files_dir='%s' region_name=%s") % ( - dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_region), - "-e", "install_release_version=%s" % - software_version if software_version else SW_VERSION] + "-e", + str("override_files_dir='%s' region_name=%s") + % (dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_region), + "-e", + ( + "install_release_version=%s" % software_version + if software_version + else SW_VERSION + ), + ] return bootstrap_command def compose_config_command( - self, subcloud_name, ansible_subcloud_inventory_file, payload): + self, subcloud_name, ansible_subcloud_inventory_file, payload + ): config_command = [ - "ansible-playbook", payload[consts.DEPLOY_PLAYBOOK], - "-e", "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + - subcloud_name + '_deploy_values.yml', - "-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name + "ansible-playbook", + payload[consts.DEPLOY_PLAYBOOK], + "-e", + "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + + "/" + + subcloud_name + + "_deploy_values.yml", + "-i", + ansible_subcloud_inventory_file, + "--limit", + subcloud_name, ] return config_command def compose_backup_command(self, subcloud_name, ansible_subcloud_inventory_file): backup_command = [ - "ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK, - "-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name, + "ansible-playbook", + ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK, + "-i", + ansible_subcloud_inventory_file, + "--limit", + subcloud_name, "-e", - "subcloud_bnr_overrides=%s" % ( - dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + subcloud_name + - "_backup_create_values.yml" - ) + "subcloud_bnr_overrides=%s" + % ( + dccommon_consts.ANSIBLE_OVERRIDES_PATH + + "/" + + subcloud_name + + "_backup_create_values.yml" + ), ] return backup_command - def compose_backup_delete_command(self, subcloud_name, - ansible_subcloud_inventory_file=None): + def compose_backup_delete_command( + self, subcloud_name, ansible_subcloud_inventory_file=None + ): backup_command = [ - "ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK, - "-e", "subcloud_bnr_overrides=%s" % - dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + - subcloud_name + "_backup_delete_values.yml" + "ansible-playbook", + ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK, + "-e", + "subcloud_bnr_overrides=%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + + "/" + + subcloud_name + + "_backup_delete_values.yml", ] if ansible_subcloud_inventory_file: # Backup stored in subcloud storage - backup_command.extend(("-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name)) + backup_command.extend( + ("-i", ansible_subcloud_inventory_file, "--limit", subcloud_name) + ) else: # Backup stored in central storage backup_command.extend(("-e", "inventory_hostname=%s" % subcloud_name)) return backup_command def compose_backup_restore_command( - self, subcloud_name, ansible_subcloud_inventory_file): + self, subcloud_name, ansible_subcloud_inventory_file + ): backup_command = [ - "ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_RESTORE_PLAYBOOK, - "-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name, - "-e", "subcloud_bnr_overrides=%s" % ( - dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + subcloud_name + - "_backup_restore_values.yml" - ) + "ansible-playbook", + ANSIBLE_SUBCLOUD_BACKUP_RESTORE_PLAYBOOK, + "-i", + ansible_subcloud_inventory_file, + "--limit", + subcloud_name, + "-e", + "subcloud_bnr_overrides=%s" + % ( + dccommon_consts.ANSIBLE_OVERRIDES_PATH + + "/" + + subcloud_name + + "_backup_restore_values.yml" + ), ] return backup_command @@ -451,51 +525,79 @@ class SubcloudManager(manager.Manager): self, subcloud_name, ansible_subcloud_inventory_file, software_version=None ): subcloud_update_command = [ - "ansible-playbook", ANSIBLE_SUBCLOUD_UPDATE_PLAYBOOK, - "-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name, - "--timeout", UPDATE_PLAYBOOK_TIMEOUT, - "-e", "install_release_version=%s" % - software_version if software_version else SW_VERSION, - "-e", "subcloud_update_overrides=%s" % ( - dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + subcloud_name + - "_update_values.yml" - ) + "ansible-playbook", + ANSIBLE_SUBCLOUD_UPDATE_PLAYBOOK, + "-i", + ansible_subcloud_inventory_file, + "--limit", + subcloud_name, + "--timeout", + UPDATE_PLAYBOOK_TIMEOUT, + "-e", + ( + "install_release_version=%s" % software_version + if software_version + else SW_VERSION + ), + "-e", + "subcloud_update_overrides=%s" + % ( + dccommon_consts.ANSIBLE_OVERRIDES_PATH + + "/" + + subcloud_name + + "_update_values.yml" + ), ] return subcloud_update_command - def compose_rehome_command(self, subcloud_name, subcloud_region, - ansible_subcloud_inventory_file, - software_version): + def compose_rehome_command( + self, + subcloud_name, + subcloud_region, + ansible_subcloud_inventory_file, + software_version, + ): extra_vars = "override_files_dir='%s' region_name=%s" % ( - dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_region) + dccommon_consts.ANSIBLE_OVERRIDES_PATH, + subcloud_region, + ) # TODO(yuxing) Remove the validate_keystone_passwords_script when end # the support of rehoming a subcloud with a software version below 22.12 if software_version <= LAST_SW_VERSION_IN_CENTOS: - extra_vars += (" validate_keystone_passwords_script='%s'" % - ANSIBLE_VALIDATE_KEYSTONE_PASSWORD_SCRIPT) + extra_vars += ( + " validate_keystone_passwords_script='%s'" + % ANSIBLE_VALIDATE_KEYSTONE_PASSWORD_SCRIPT + ) rehome_command = [ "ansible-playbook", utils.get_playbook_for_software_version( - ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK, software_version), - "-i", ansible_subcloud_inventory_file, - "--limit", subcloud_name, - "--timeout", REHOME_PLAYBOOK_TIMEOUT, - "-e", extra_vars] + ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK, software_version + ), + "-i", + ansible_subcloud_inventory_file, + "--limit", + subcloud_name, + "--timeout", + REHOME_PLAYBOOK_TIMEOUT, + "-e", + extra_vars, + ] return rehome_command def _migrate_manage_subcloud( - self, context, payload, available_system_peers, subcloud): + self, context, payload, available_system_peers, subcloud + ): success = True # Try to unmanage the subcloud on peer system if available_system_peers: - if self._unmanage_system_peer_subcloud(available_system_peers, - subcloud): + if self._unmanage_system_peer_subcloud(available_system_peers, subcloud): success = False - LOG.warning("Unmanged subcloud: %s error on peer system, " - "exit migration" % subcloud.name) + LOG.warning( + "Unmanged subcloud: %s error on peer system, exit migration" + % subcloud.name + ) return subcloud, success # migrate and set managed for @@ -511,28 +613,29 @@ class SubcloudManager(manager.Manager): job_done_ts = time.monotonic() while True: offline_seconds = time.monotonic() - job_done_ts - if subcloud.availability_status == \ - dccommon_consts.AVAILABILITY_OFFLINE: + if subcloud.availability_status == dccommon_consts.AVAILABILITY_OFFLINE: if offline_seconds >= consts.BATCH_REHOME_MGMT_STATES_TIMEOUT: - LOG.warning("Skip trying to manage subcloud: %s, " - "wait online timeout [%d]" % - (subcloud.name, offline_seconds)) + LOG.warning( + "Skip trying to manage subcloud: %s, " + "wait online timeout [%d]" + % (subcloud.name, offline_seconds) + ) success = False break time.sleep(20) else: try: self.update_subcloud( - context, subcloud.id, - dccommon_consts.MANAGEMENT_MANAGED) + context, subcloud.id, dccommon_consts.MANAGEMENT_MANAGED + ) except Exception: - LOG.exception("Unable to manage subcloud %s " - "after migration operation" % - subcloud.name) + LOG.exception( + "Unable to manage subcloud %s after migration operation" + % subcloud.name + ) success = False return subcloud, success - LOG.info("Set manage of subcloud: %s success" - % subcloud.name) + LOG.info("Set manage of subcloud: %s success" % subcloud.name) break subcloud = db_api.subcloud_get(context, subcloud.id) @@ -542,19 +645,25 @@ class SubcloudManager(manager.Manager): system_peers = list() # Get associations by peer group associations = db_api.peer_group_association_get_by_peer_group_id( - self.context, peer_group.id) + self.context, peer_group.id + ) if not associations: - LOG.info("No association found for peer group %s" % - peer_group.peer_group_name) + LOG.info( + "No association found for peer group %s" % peer_group.peer_group_name + ) return system_peers for association in associations: system_peer = db_api.system_peer_get( - self.context, association.system_peer_id) + self.context, association.system_peer_id + ) # Get 'available' system peer - if system_peer.availability_state != \ - consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE: - LOG.warning("Peer system %s offline, skip checking" % - system_peer.peer_name) + if ( + system_peer.availability_state + != consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE + ): + LOG.warning( + "Peer system %s offline, skip checking" % system_peer.peer_name + ) continue else: system_peers.append(system_peer) @@ -564,48 +673,56 @@ class SubcloudManager(manager.Manager): def _unmanage_system_peer_subcloud(self, system_peers, subcloud): unmanaged_error = False for system_peer in system_peers: - LOG.debug("Get subcloud: %s from system_peer: %s" % - (subcloud.name, system_peer.peer_name)) + LOG.debug( + "Get subcloud: %s from system_peer: %s" + % (subcloud.name, system_peer.peer_name) + ) for attempt in range(3): try: - dc_client = \ - SystemPeerManager.get_peer_dc_client(system_peer) + dc_client = SystemPeerManager.get_peer_dc_client(system_peer) # Get remote subcloud by region_name from system peer remote_subcloud = dc_client.get_subcloud( - subcloud.region_name, is_region_name=True) - is_unmanaged = remote_subcloud.get('management-state') == \ - dccommon_consts.MANAGEMENT_UNMANAGED - is_rehome_pending = remote_subcloud.get('deploy-status') == \ - consts.DEPLOY_STATE_REHOME_PENDING + subcloud.region_name, is_region_name=True + ) + is_unmanaged = ( + remote_subcloud.get("management-state") + == dccommon_consts.MANAGEMENT_UNMANAGED + ) + is_rehome_pending = ( + remote_subcloud.get("deploy-status") + == consts.DEPLOY_STATE_REHOME_PENDING + ) # Check if it's already in the correct state if is_unmanaged and is_rehome_pending: LOG.info( - f"Remote subcloud {remote_subcloud.get('name')} " - f"from system peer {system_peer.peer_name} is " - "already unmanaged and rehome-pending, " - "skipping unmanage attempt" + f"Remote subcloud {remote_subcloud.get('name')} from " + f"system peer {system_peer.peer_name} is already " + "unmanaged and rehome-pending, skipping unmanage attempt" ) break try: + UNMANAGED = dccommon_consts.MANAGEMENT_UNMANAGED if not is_unmanaged: # Unmanage and update the deploy-status payload = { - "management-state": - dccommon_consts.MANAGEMENT_UNMANAGED, - "migrate": "true"} + "management-state": UNMANAGED, + "migrate": "true", + } remote_subcloud = dc_client.update_subcloud( subcloud.region_name, files=None, data=payload, - is_region_name=True) - LOG.info("Successfully updated subcloud: " - f"{remote_subcloud.get('name')} on peer " - f"system {system_peer.peer_name} to " - f"{dccommon_consts.MANAGEMENT_UNMANAGED} " - f"and {consts.DEPLOY_STATE_REHOME_PENDING}" - " state.") + is_region_name=True, + ) + LOG.info( + "Successfully updated subcloud: " + f"{remote_subcloud.get('name')} on peer " + f"system {system_peer.peer_name} to " + f"{dccommon_consts.MANAGEMENT_UNMANAGED} " + f"and {consts.DEPLOY_STATE_REHOME_PENDING} state." + ) else: # Already unmanaged, just update the deploy-status payload = {"migrate": "true"} @@ -613,53 +730,58 @@ class SubcloudManager(manager.Manager): subcloud.region_name, files=None, data=payload, - is_region_name=True) - LOG.info("Successfully updated subcloud: " - f"{remote_subcloud.get('name')} on peer " - f"system {system_peer.peer_name} to " - f"{consts.DEPLOY_STATE_REHOME_PENDING}" - " state.") + is_region_name=True, + ) + LOG.info( + "Successfully updated subcloud: " + f"{remote_subcloud.get('name')} on peer " + f"system {system_peer.peer_name} to " + f"{consts.DEPLOY_STATE_REHOME_PENDING} state." + ) return unmanaged_error except Exception as e: raise exceptions.SubcloudNotUnmanaged() from e except SubcloudNotFound: - LOG.info("No identical subcloud: %s found on " - "peer system: %s" % - (subcloud.region_name, system_peer.peer_name)) + LOG.info( + "No identical subcloud: %s found on peer system: %s" + % (subcloud.region_name, system_peer.peer_name) + ) break except exceptions.SubcloudNotUnmanaged: - LOG.exception("Unmanaged error on subcloud: %s " - "on system %s" % - (subcloud.region_name, - system_peer.peer_name)) + LOG.exception( + "Unmanaged error on subcloud: %s on system %s" + % (subcloud.region_name, system_peer.peer_name) + ) unmanaged_error = True except Exception: - LOG.exception("Failed to set unmanged for " - "subcloud: %s on system %s attempt: %d" - % (subcloud.region_name, - system_peer.peer_name, attempt)) + LOG.exception( + "Failed to set unmanged for subcloud: %s on system %s attempt: " + "%d" % (subcloud.region_name, system_peer.peer_name, attempt) + ) time.sleep(1) return unmanaged_error def _clear_alarm_for_peer_group(self, peer_group): # Get alarms related to peer group faults = self.fm_api.get_faults_by_id( - fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED) + fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED + ) if not faults: return for fault in faults: - entity_instance_id_str = "peer_group=%s,peer=" % \ - (peer_group.peer_group_name) + entity_instance_id_str = "peer_group=%s,peer=" % ( + peer_group.peer_group_name + ) if entity_instance_id_str in fault.entity_instance_id: - LOG.info("Clear alarm for peer group %s" % - peer_group.peer_group_name) + LOG.info("Clear alarm for peer group %s" % peer_group.peer_group_name) self.fm_api.clear_fault( fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED, - fault.entity_instance_id) + fault.entity_instance_id, + ) def migrate_subcloud(self, context, subcloud_ref, payload): - '''migrate_subcloud function is for day-2's rehome purpose. + """migrate_subcloud function is for day-2's rehome purpose. This is called by 'dcmanager subcloud migrate '. This function is used to migrate those 'secondary' subcloud. @@ -667,48 +789,50 @@ class SubcloudManager(manager.Manager): :param context: request context object :param subcloud_ref: id or name of the subcloud :param payload: subcloud configuration - ''' + """ subcloud = None try: # subcloud_ref could be int type id. subcloud = utils.subcloud_get_by_ref(context, str(subcloud_ref)) if not subcloud: - LOG.error( - "Failed to migrate, non-existent subcloud %s" % subcloud_ref - ) + LOG.error("Failed to migrate, non-existent subcloud %s" % subcloud_ref) return - if 'sysadmin_password' not in payload: - LOG.error("Failed to migrate subcloud: %s, must provide " - "sysadmin_password" % subcloud.name) + if "sysadmin_password" not in payload: + LOG.error( + "Failed to migrate subcloud: %s, must provide sysadmin_password" + % subcloud.name + ) return if subcloud.deploy_status not in [ - consts.DEPLOY_STATE_SECONDARY, consts.DEPLOY_STATE_REHOME_FAILED, - consts.DEPLOY_STATE_REHOME_PREP_FAILED + consts.DEPLOY_STATE_SECONDARY, + consts.DEPLOY_STATE_REHOME_FAILED, + consts.DEPLOY_STATE_REHOME_PREP_FAILED, ]: - LOG.error("Failed to migrate subcloud: %s, " - "must be in secondary or rehome failure state" % - subcloud.name) + LOG.error( + "Failed to migrate subcloud: %s, " + "must be in secondary or rehome failure state" % subcloud.name + ) return db_api.subcloud_update( - context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_PRE_REHOME) + context, subcloud.id, deploy_status=consts.DEPLOY_STATE_PRE_REHOME + ) rehome_data = json.loads(subcloud.rehome_data) - saved_payload = rehome_data['saved_payload'] + saved_payload = rehome_data["saved_payload"] # Update sysadmin_password - sysadmin_password = \ - base64.b64decode(payload['sysadmin_password']).decode('utf-8') - saved_payload['sysadmin_password'] = sysadmin_password + sysadmin_password = base64.b64decode(payload["sysadmin_password"]).decode( + "utf-8" + ) + saved_payload["sysadmin_password"] = sysadmin_password # Decode admin_password - if 'admin_password' in saved_payload: - saved_payload['admin_password'] = base64.b64decode( - saved_payload['admin_password']).decode('utf-8') + if "admin_password" in saved_payload: + saved_payload["admin_password"] = base64.b64decode( + saved_payload["admin_password"] + ).decode("utf-8") # Re-generate ansible config based on latest rehome_data - subcloud = self.generate_subcloud_ansible_config( - subcloud, - saved_payload) + subcloud = self.generate_subcloud_ansible_config(subcloud, saved_payload) self.rehome_subcloud(context, subcloud) except Exception: # If we failed to migrate the subcloud, update the @@ -716,36 +840,38 @@ class SubcloudManager(manager.Manager): if subcloud: LOG.exception("Failed to migrate subcloud %s" % subcloud.name) db_api.subcloud_update( - context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_REHOME_PREP_FAILED) + context, + subcloud.id, + deploy_status=consts.DEPLOY_STATE_REHOME_PREP_FAILED, + ) return def batch_migrate_subcloud(self, context, payload): - if 'peer_group' not in payload: - LOG.error("Failed to migrate subcloud peer group, " - "missing peer_group") + if "peer_group" not in payload: + LOG.error("Failed to migrate subcloud peer group, missing peer_group") return - if 'sysadmin_password' not in payload: - LOG.error("Failed to migrate subcloud peer group, " - "missing sysadmin_password") + if "sysadmin_password" not in payload: + LOG.error( + "Failed to migrate subcloud peer group, missing sysadmin_password" + ) return if self.batch_rehome_lock.locked(): LOG.warning("Batch migrate is already running.") return with self.batch_rehome_lock: try: - peer_group = \ - utils.subcloud_peer_group_get_by_ref( - context, - payload['peer_group']) + peer_group = utils.subcloud_peer_group_get_by_ref( + context, payload["peer_group"] + ) self.run_batch_migrate( - context, peer_group, - payload['sysadmin_password']) + context, peer_group, payload["sysadmin_password"] + ) except Exception as e: - LOG.exception("Failed to batch migrate subcloud peer " - "group: %s error: %s" % - (payload['peer_group'], e)) + LOG.exception( + "Failed to batch migrate subcloud peer group: %s error: %s" + % (payload["peer_group"], e) + ) def run_batch_migrate(self, context, peer_group, sysadmin_password): subclouds = db_api.subcloud_get_for_peer_group(context, peer_group.id) @@ -755,62 +881,68 @@ class SubcloudManager(manager.Manager): # Verify rehome data rehome_data_json_str = tmp_subcloud.rehome_data if not rehome_data_json_str: - LOG.error("Unable to migrate subcloud: %s " - "no rehome data" % tmp_subcloud.name) + LOG.error( + "Unable to migrate subcloud: %s no rehome data" % tmp_subcloud.name + ) db_api.subcloud_update( - context, tmp_subcloud.id, - deploy_status=consts.DEPLOY_STATE_REHOME_PREP_FAILED) + context, + tmp_subcloud.id, + deploy_status=consts.DEPLOY_STATE_REHOME_PREP_FAILED, + ) continue tmp_rehome_data = json.loads(rehome_data_json_str) # Verify saved_payload in _rehome_data - if 'saved_payload' not in tmp_rehome_data: - LOG.error("Unable to migrate subcloud: %s " - "no saved_payload" % tmp_subcloud.name) + if "saved_payload" not in tmp_rehome_data: + LOG.error( + "Unable to migrate subcloud: %s no saved_payload" + % tmp_subcloud.name + ) db_api.subcloud_update( - context, tmp_subcloud.id, - deploy_status=consts.DEPLOY_STATE_REHOME_PREP_FAILED) + context, + tmp_subcloud.id, + deploy_status=consts.DEPLOY_STATE_REHOME_PREP_FAILED, + ) continue - if (tmp_subcloud.deploy_status in - [consts.DEPLOY_STATE_SECONDARY, - consts.DEPLOY_STATE_REHOME_FAILED, - consts.DEPLOY_STATE_REHOME_PREP_FAILED]): + if tmp_subcloud.deploy_status in [ + consts.DEPLOY_STATE_SECONDARY, + consts.DEPLOY_STATE_REHOME_FAILED, + consts.DEPLOY_STATE_REHOME_PREP_FAILED, + ]: subclouds_ready_to_migrate.append(tmp_subcloud) else: - LOG.info("Skipping subcloud %s from batch migration: " - "subcloud deploy_status is not in " - "secondary, rehome-failed or rehome-prep-failed" % - tmp_subcloud.name) + LOG.info( + "Skipping subcloud %s from batch migration: subcloud deploy_status " + "is not in secondary, rehome-failed or rehome-prep-failed" + % tmp_subcloud.name + ) # If no subcloud need to rehome, exit if not subclouds_ready_to_migrate: - LOG.info("No subclouds to be migrated in peer group: %s" - " ending migration attempt" - % str(peer_group.peer_group_name)) + LOG.info( + "No subclouds to be migrated in peer group: %s " + "ending migration attempt" % str(peer_group.peer_group_name) + ) return # Set migration_status to migrating db_api.subcloud_peer_group_update( - self.context, - peer_group.id, - migration_status=consts.PEER_GROUP_MIGRATING) + self.context, peer_group.id, migration_status=consts.PEER_GROUP_MIGRATING + ) # Try to get peer system by peer group system_peers = self._get_peer_system_list(peer_group) # Use thread pool to limit number of operations in parallel - migrate_pool = greenpool.GreenPool( - size=peer_group.max_subcloud_rehoming) + migrate_pool = greenpool.GreenPool(size=peer_group.max_subcloud_rehoming) # Spawn threads to migrate each applicable subcloud - tmp_payload = {'sysadmin_password': sysadmin_password} - migrate_function = functools.partial(self._migrate_manage_subcloud, - context, - tmp_payload, - system_peers) + tmp_payload = {"sysadmin_password": sysadmin_password} + migrate_function = functools.partial( + self._migrate_manage_subcloud, context, tmp_payload, system_peers + ) - self._run_parallel_group_operation('migrate', - migrate_function, - migrate_pool, - subclouds_ready_to_migrate) + self._run_parallel_group_operation( + "migrate", migrate_function, migrate_pool, subclouds_ready_to_migrate + ) # Set migration_status to complete, # Update system leader id and name @@ -820,14 +952,15 @@ class SubcloudManager(manager.Manager): peer_group.id, system_leader_id=local_system.uuid, system_leader_name=local_system.name, - migration_status=consts.PEER_GROUP_MIGRATION_COMPLETE) + migration_status=consts.PEER_GROUP_MIGRATION_COMPLETE, + ) # Try to send audit request to system peer - resp = PeerGroupAuditManager.send_audit_peer_group( - system_peers, peer_group) + resp = PeerGroupAuditManager.send_audit_peer_group(system_peers, peer_group) if resp: - LOG.warning("Audit peer group %s response: %s" % - (peer_group.peer_group_name, resp)) + LOG.warning( + "Audit peer group %s response: %s" % (peer_group.peer_group_name, resp) + ) # Try to clear existing alarm if we rehomed a '0' priority peer group if peer_group.group_priority == 0: @@ -838,7 +971,8 @@ class SubcloudManager(manager.Manager): def rehome_subcloud(self, context, subcloud): # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = self._get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) + subcloud.name, INVENTORY_FILE_POSTFIX + ) log_file = ( os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) @@ -849,35 +983,40 @@ class SubcloudManager(manager.Manager): subcloud.name, subcloud.region_name, ansible_subcloud_inventory_file, - subcloud.software_version) + subcloud.software_version, + ) # Update the deploy status to rehoming db_api.subcloud_update( - context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_REHOMING) + context, subcloud.id, deploy_status=consts.DEPLOY_STATE_REHOMING + ) # Run the rehome-subcloud playbook try: ansible = AnsiblePlaybook(subcloud.name) ansible.run_playbook(log_file, rehome_command) except PlaybookExecutionFailed: - msg = "Failed to run the subcloud rehome playbook" \ - f" for subcloud {subcloud.name}, check individual log at " \ - f"{log_file} for detailed output." + msg = ( + "Failed to run the subcloud rehome playbook for subcloud " + f"{subcloud.name}, check individual log at {log_file} " + "for detailed output." + ) LOG.error(msg) msg = utils.find_ansible_error_msg( - subcloud.name, log_file, consts.DEPLOY_STATE_REHOMING) + subcloud.name, log_file, consts.DEPLOY_STATE_REHOMING + ) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_REHOME_FAILED, - error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) + error_description=msg[0 : consts.ERROR_DESCRIPTION_LENGTH], + ) return # Update the deploy status to complete and rehomed flag to true only # after playbook execution succeeded. db_api.subcloud_update( - context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_DONE, - rehomed=True) + context, subcloud.id, deploy_status=consts.DEPLOY_STATE_DONE, rehomed=True + ) LOG.info("Successfully rehomed subcloud %s" % subcloud.name) def add_subcloud(self, context, subcloud_id, payload): @@ -887,20 +1026,25 @@ class SubcloudManager(manager.Manager): :param subcloud_id: id of the subcloud :param payload: subcloud configuration """ - LOG.info(f"Adding subcloud {payload['name']} with region " - f"{payload['region_name']}.") + LOG.info( + f"Adding subcloud {payload['name']} with region {payload['region_name']}." + ) - rehoming = payload.get('migrate', '').lower() == "true" - secondary = (payload.get('secondary', '').lower() == "true") - enroll = payload.get('enroll', '').lower() == "true" + rehoming = payload.get("migrate", "").lower() == "true" + secondary = payload.get("secondary", "").lower() == "true" + enroll = payload.get("enroll", "").lower() == "true" initial_deployment = True if not (rehoming or enroll) else False # Create the subcloud - subcloud = self.subcloud_deploy_create(context, subcloud_id, - payload, rehoming, - initial_deployment, - return_as_dict=False, - enroll=enroll) + subcloud = self.subcloud_deploy_create( + context, + subcloud_id, + payload, + rehoming, + initial_deployment, + return_as_dict=False, + enroll=enroll, + ) # return if 'secondary' subcloud if secondary: @@ -934,8 +1078,12 @@ class SubcloudManager(manager.Manager): # Finish adding the subcloud by running the deploy phases succeeded = self.run_deploy_phases( - context, subcloud_id, payload, phases_to_run, - initial_deployment=initial_deployment) + context, + subcloud_id, + payload, + phases_to_run, + initial_deployment=initial_deployment, + ) if succeeded: LOG.info(f"Finished adding subcloud {subcloud['name']}.") @@ -954,15 +1102,15 @@ class SubcloudManager(manager.Manager): LOG.info("Redeploying subcloud %s." % subcloud.name) # Define which deploy phases to run - phases_to_run = [consts.DEPLOY_PHASE_INSTALL, - consts.DEPLOY_PHASE_BOOTSTRAP] + phases_to_run = [consts.DEPLOY_PHASE_INSTALL, consts.DEPLOY_PHASE_BOOTSTRAP] if consts.DEPLOY_CONFIG in payload: phases_to_run.append(consts.DEPLOY_PHASE_CONFIG) else: phases_to_run.append(consts.DEPLOY_PHASE_COMPLETE) - succeeded = self.run_deploy_phases(context, subcloud_id, payload, - phases_to_run, initial_deployment=True) + succeeded = self.run_deploy_phases( + context, subcloud_id, payload, phases_to_run, initial_deployment=True + ) if succeeded: LOG.info(f"Finished redeploying subcloud {subcloud['name']}.") @@ -974,23 +1122,25 @@ class SubcloudManager(manager.Manager): :param payload: subcloud backup create detail """ - subcloud_id = payload.get('subcloud') - group_id = payload.get('group') + subcloud_id = payload.get("subcloud") + group_id = payload.get("group") # Retrieve either a single subcloud or all subclouds in a group - subclouds = [db_api.subcloud_get(context, subcloud_id)] if subcloud_id \ + subclouds = ( + [db_api.subcloud_get(context, subcloud_id)] + if subcloud_id else db_api.subcloud_get_for_group(context, group_id) + ) self._filter_subclouds_with_ongoing_backup(subclouds) - self._update_backup_status(context, subclouds, - consts.BACKUP_STATE_INITIAL) + self._update_backup_status(context, subclouds, consts.BACKUP_STATE_INITIAL) # Validate the subclouds and filter the ones applicable for backup - self._update_backup_status(context, subclouds, - consts.BACKUP_STATE_VALIDATING) + self._update_backup_status(context, subclouds, consts.BACKUP_STATE_VALIDATING) - subclouds_to_backup, invalid_subclouds = \ - self._validate_subclouds_for_backup(subclouds, 'create') + subclouds_to_backup, invalid_subclouds = self._validate_subclouds_for_backup( + subclouds, "create" + ) self._mark_invalid_subclouds_for_backup(context, invalid_subclouds) @@ -998,13 +1148,11 @@ class SubcloudManager(manager.Manager): backup_pool = greenpool.GreenPool(size=MAX_PARALLEL_SUBCLOUD_BACKUP_CREATE) # Spawn threads to back up each applicable subcloud - backup_function = functools.partial(self._backup_subcloud, context, - payload) + backup_function = functools.partial(self._backup_subcloud, context, payload) - self._run_parallel_group_operation('backup create', - backup_function, - backup_pool, - subclouds_to_backup) + self._run_parallel_group_operation( + "backup create", backup_function, backup_pool, subclouds_to_backup + ) LOG.info("Subcloud backup operation finished") @@ -1016,27 +1164,33 @@ class SubcloudManager(manager.Manager): :param payload: subcloud backup delete detail """ - local_delete = payload.get('local_only') + local_delete = payload.get("local_only") - subclouds_to_delete_backup, invalid_subclouds = \ + subclouds_to_delete_backup, invalid_subclouds = ( self._filter_subclouds_for_backup_delete(context, payload, local_delete) + ) # Spawn threads to back up each applicable subcloud backup_delete_function = functools.partial( - self._delete_subcloud_backup, context, payload, release_version) + self._delete_subcloud_backup, context, payload, release_version + ) # Use thread pool to limit number of operations in parallel max_parallel_operations = MAX_PARALLEL_SUBCLOUD_BACKUP_DELETE backup_delete_pool = greenpool.GreenPool(size=max_parallel_operations) failed_subclouds = self._run_parallel_group_operation( - 'backup delete', backup_delete_function, backup_delete_pool, - subclouds_to_delete_backup) + "backup delete", + backup_delete_function, + backup_delete_pool, + subclouds_to_delete_backup, + ) LOG.info("Subcloud backup delete operation finished") - return self._subcloud_operation_notice('delete', subclouds_to_delete_backup, - failed_subclouds, invalid_subclouds) + return self._subcloud_operation_notice( + "delete", subclouds_to_delete_backup, failed_subclouds, invalid_subclouds + ) def restore_subcloud_backups(self, context, payload): """Restore a subcloud or group of subclouds from backup data @@ -1045,55 +1199,65 @@ class SubcloudManager(manager.Manager): :param payload: restore backup subcloud detail """ - subcloud_id = payload.get('subcloud') - group_id = payload.get('group') + subcloud_id = payload.get("subcloud") + group_id = payload.get("group") # Initialize subclouds lists restore_subclouds, invalid_subclouds, failed_subclouds = ( - list(), list(), list()) + list(), + list(), + list(), + ) # Retrieve either a single subcloud or all subclouds in a group subclouds = ( - [db_api.subcloud_get(context, subcloud_id)] if subcloud_id + [db_api.subcloud_get(context, subcloud_id)] + if subcloud_id else db_api.subcloud_get_for_group(context, group_id) ) - bootstrap_address_dict = \ - payload.get('restore_values', {}).get('bootstrap_address', {}) + bootstrap_address_dict = payload.get("restore_values", {}).get( + "bootstrap_address", {} + ) - restore_subclouds, invalid_subclouds = ( - self._validate_subclouds_for_backup(subclouds, - 'restore', - bootstrap_address_dict) + restore_subclouds, invalid_subclouds = self._validate_subclouds_for_backup( + subclouds, "restore", bootstrap_address_dict ) if restore_subclouds: # Use thread pool to limit number of operations in parallel restore_pool = greenpool.GreenPool( - size=MAX_PARALLEL_SUBCLOUD_BACKUP_RESTORE) + size=MAX_PARALLEL_SUBCLOUD_BACKUP_RESTORE + ) # Spawn threads to back up each applicable subcloud restore_function = functools.partial( - self._restore_subcloud_backup, context, payload) + self._restore_subcloud_backup, context, payload + ) failed_subclouds = self._run_parallel_group_operation( - 'backup restore', restore_function, - restore_pool, restore_subclouds + "backup restore", restore_function, restore_pool, restore_subclouds ) restored_subclouds = len(restore_subclouds) - len(failed_subclouds) - LOG.info("Subcloud restore backup operation finished.\n" - "Restored subclouds: %s. Invalid subclouds: %s. " - "Failed subclouds: %s." % (restored_subclouds, - len(invalid_subclouds), - len(failed_subclouds))) + LOG.info( + "Subcloud restore backup operation finished.\nRestored subclouds: %s. " + "Invalid subclouds: %s. Failed subclouds: %s." + % (restored_subclouds, len(invalid_subclouds), len(failed_subclouds)) + ) - return self._subcloud_operation_notice('restore', restore_subclouds, - failed_subclouds, invalid_subclouds) + return self._subcloud_operation_notice( + "restore", restore_subclouds, failed_subclouds, invalid_subclouds + ) - def _deploy_bootstrap_prep(self, context, subcloud, payload: dict, - ansible_subcloud_inventory_file, - initial_deployment=False): + def _deploy_bootstrap_prep( + self, + context, + subcloud, + payload: dict, + ansible_subcloud_inventory_file, + initial_deployment=False, + ): """Run the preparation steps needed to run the bootstrap operation :param context: target request context object @@ -1105,8 +1269,9 @@ class SubcloudManager(manager.Manager): """ network_reconfig = utils.has_network_reconfig(payload, subcloud) if network_reconfig: - self._configure_system_controller_network(context, payload, subcloud, - update_db=False) + self._configure_system_controller_network( + context, payload, subcloud, update_db=False + ) # Regenerate the addn_hosts_dc file self._create_addn_hosts_dc(context) @@ -1116,20 +1281,18 @@ class SubcloudManager(manager.Manager): subcloud.id, description=payload.get("description"), management_subnet=utils.get_management_subnet(payload), - management_gateway_ip=utils.get_management_gateway_address( - payload), - management_start_ip=utils.get_management_start_address( - payload), + management_gateway_ip=utils.get_management_gateway_address(payload), + management_start_ip=utils.get_management_start_address(payload), management_end_ip=utils.get_management_end_address(payload), - systemcontroller_gateway_ip=payload.get( - "systemcontroller_gateway_address"), + systemcontroller_gateway_ip=payload.get("systemcontroller_gateway_address"), location=payload.get("location"), - deploy_status=consts.DEPLOY_STATE_PRE_BOOTSTRAP) + deploy_status=consts.DEPLOY_STATE_PRE_BOOTSTRAP, + ) # Populate payload with passwords - payload['ansible_become_pass'] = payload['sysadmin_password'] - payload['ansible_ssh_pass'] = payload['sysadmin_password'] - payload['admin_password'] = str(keyring.get_password('CGCS', 'admin')) + payload["ansible_become_pass"] = payload["sysadmin_password"] + payload["ansible_ssh_pass"] = payload["sysadmin_password"] + payload["admin_password"] = str(keyring.get_password("CGCS", "admin")) payload_for_overrides_file = payload.copy() for key in VALUES_TO_DELETE_OVERRIDES: @@ -1137,38 +1300,47 @@ class SubcloudManager(manager.Manager): del payload_for_overrides_file[key] # Update the ansible overrides file - overrides_file = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH, - subcloud.name + '.yml') + overrides_file = os.path.join( + dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud.name + ".yml" + ) overrides_file_exists = utils.update_values_on_yaml_file( - overrides_file, payload_for_overrides_file, - values_to_keep=GENERATED_OVERRIDES_VALUES) + overrides_file, + payload_for_overrides_file, + values_to_keep=GENERATED_OVERRIDES_VALUES, + ) if not overrides_file_exists: # Overrides file doesn't exist, so we generate a new one self.generate_subcloud_ansible_config( - subcloud, payload, initial_deployment=initial_deployment) + subcloud, payload, initial_deployment=initial_deployment + ) else: # Since we generate an inventory already when generating the # new Ansible overrides, only create the inventory here when # the overrides already existed - utils.create_subcloud_inventory(payload, - ansible_subcloud_inventory_file, - initial_deployment) + utils.create_subcloud_inventory( + payload, ansible_subcloud_inventory_file, initial_deployment + ) - utils.update_install_values_with_new_bootstrap_address(context, - payload, - subcloud) + utils.update_install_values_with_new_bootstrap_address( + context, payload, subcloud + ) bootstrap_command = self.compose_bootstrap_command( subcloud.name, subcloud.region_name, ansible_subcloud_inventory_file, - subcloud.software_version) + subcloud.software_version, + ) return bootstrap_command - def _deploy_config_prep(self, subcloud, payload: dict, - ansible_subcloud_inventory_file, - initial_deployment=False): + def _deploy_config_prep( + self, + subcloud, + payload: dict, + ansible_subcloud_inventory_file, + initial_deployment=False, + ): """Run the preparation steps needed to run the config operation :param subcloud: target subcloud model object @@ -1181,21 +1353,27 @@ class SubcloudManager(manager.Manager): # Update the ansible inventory for the subcloud bootstrap_address = payload[consts.BOOTSTRAP_ADDRESS] - subcloud_params = {'name': subcloud.name, - consts.BOOTSTRAP_ADDRESS: bootstrap_address} - utils.create_subcloud_inventory(subcloud_params, - ansible_subcloud_inventory_file, - initial_deployment) + subcloud_params = { + "name": subcloud.name, + consts.BOOTSTRAP_ADDRESS: bootstrap_address, + } + utils.create_subcloud_inventory( + subcloud_params, ansible_subcloud_inventory_file, initial_deployment + ) config_command = self.compose_config_command( - subcloud.name, - ansible_subcloud_inventory_file, - payload) + subcloud.name, ansible_subcloud_inventory_file, payload + ) return config_command - def _deploy_install_prep(self, subcloud, payload: dict, - ansible_subcloud_inventory_file, - initial_deployment=False, init_enroll=False): + def _deploy_install_prep( + self, + subcloud, + payload: dict, + ansible_subcloud_inventory_file, + initial_deployment=False, + init_enroll=False, + ): """Run preparation steps for install or init enroll operations :param subcloud: target subcloud model object @@ -1206,45 +1384,45 @@ class SubcloudManager(manager.Manager): :return: ansible command needed to run the install playbook """ - payload['install_values']['ansible_ssh_pass'] = \ - payload['sysadmin_password'] - payload['install_values']['ansible_become_pass'] = \ - payload['sysadmin_password'] + payload["install_values"]["ansible_ssh_pass"] = payload["sysadmin_password"] + payload["install_values"]["ansible_become_pass"] = payload["sysadmin_password"] # If all update_values already exists on override file or are # the same as the existing ones, the update won't happen # and the file will remain untouched - bootstrap_file = psd_common.get_config_file_path(subcloud.name, - consts.BOOTSTRAP_VALUES) - update_values = {'software_version': payload['software_version'], - 'bmc_password': payload['bmc_password'], - 'ansible_ssh_pass': payload['sysadmin_password'], - 'ansible_become_pass': payload['sysadmin_password'] - } - utils.update_values_on_yaml_file(bootstrap_file, - update_values) + bootstrap_file = psd_common.get_config_file_path( + subcloud.name, consts.BOOTSTRAP_VALUES + ) + update_values = { + "software_version": payload["software_version"], + "bmc_password": payload["bmc_password"], + "ansible_ssh_pass": payload["sysadmin_password"], + "ansible_become_pass": payload["sysadmin_password"], + } + utils.update_values_on_yaml_file(bootstrap_file, update_values) # Update the ansible inventory for the subcloud - bootstrap_address = payload['install_values']['bootstrap_address'] - subcloud_params = {'name': subcloud.name, - consts.BOOTSTRAP_ADDRESS: bootstrap_address} - utils.create_subcloud_inventory(subcloud_params, - ansible_subcloud_inventory_file, - initial_deployment) + bootstrap_address = payload["install_values"]["bootstrap_address"] + subcloud_params = { + "name": subcloud.name, + consts.BOOTSTRAP_ADDRESS: bootstrap_address, + } + utils.create_subcloud_inventory( + subcloud_params, ansible_subcloud_inventory_file, initial_deployment + ) if init_enroll: init_enroll_command = self.compose_enroll_command( subcloud.name, subcloud.region_name, ansible_subcloud_inventory_file, subcloud.software_version, - state="init" + state="init", ) return init_enroll_command install_command = self.compose_install_command( - subcloud.name, - ansible_subcloud_inventory_file, - payload['software_version']) + subcloud.name, ansible_subcloud_inventory_file, payload["software_version"] + ) return install_command def subcloud_deploy_abort(self, context, subcloud_id, deploy_status): @@ -1262,9 +1440,10 @@ class SubcloudManager(manager.Manager): ansible = AnsiblePlaybook(subcloud.name) aborted = ansible.run_abort() if not aborted: - LOG.warning("Ansible deploy phase subprocess of %s " - "was terminated before it could be aborted" - % subcloud.name) + LOG.warning( + "Ansible deploy phase subprocess of %s " + "was terminated before it could be aborted" % subcloud.name + ) # let the main phase thread handle the state update return @@ -1272,17 +1451,21 @@ class SubcloudManager(manager.Manager): # Send shutdown signal to subcloud send_subcloud_shutdown_signal(subcloud.name) except Exception as ex: - LOG.error("Subcloud deploy abort failed for subcloud %s: %s" % - (subcloud.name, str(ex))) - utils.update_abort_status(context, subcloud.id, subcloud.deploy_status, - abort_failed=True) + LOG.error( + "Subcloud deploy abort failed for subcloud %s: %s" + % (subcloud.name, str(ex)) + ) + utils.update_abort_status( + context, subcloud.id, subcloud.deploy_status, abort_failed=True + ) # exception is logged above raise ex LOG.info("Successfully aborted deployment of %s" % subcloud.name) utils.update_abort_status(context, subcloud.id, subcloud.deploy_status) - def subcloud_deploy_resume(self, context, subcloud_id, subcloud_name, - payload: dict, deploy_states_to_run): + def subcloud_deploy_resume( + self, context, subcloud_id, subcloud_name, payload: dict, deploy_states_to_run + ): """Resume the subcloud deployment :param context: request context object @@ -1292,15 +1475,17 @@ class SubcloudManager(manager.Manager): :param deploy_states_to_run: deploy phases pending execution """ LOG.info( - "Resuming deployment of subcloud %s. Deploy phases to be executed: %s" % - (subcloud_name, ', '.join(deploy_states_to_run))) + "Resuming deployment of subcloud %s. Deploy phases to be executed: %s" + % (subcloud_name, ", ".join(deploy_states_to_run)) + ) - self.run_deploy_phases(context, subcloud_id, payload, - deploy_states_to_run, - initial_deployment=True) + self.run_deploy_phases( + context, subcloud_id, payload, deploy_states_to_run, initial_deployment=True + ) - def generate_subcloud_ansible_config(self, subcloud, payload, - initial_deployment=False): + def generate_subcloud_ansible_config( + self, subcloud, payload, initial_deployment=False + ): """Generate latest ansible config based on given payload. :param subcloud: subcloud object @@ -1309,10 +1494,12 @@ class SubcloudManager(manager.Manager): :return: resulting subcloud DB object """ if initial_deployment: - LOG.debug(f"Overrides file not found for {payload['name']}. " - "Generating new overrides file.") + LOG.debug( + f"Overrides file not found for {payload['name']}. " + "Generating new overrides file." + ) else: - LOG.info("Generate subcloud %s ansible config." % payload['name']) + LOG.info("Generate subcloud %s ansible config." % payload["name"]) try: # Write ansible based on rehome_data @@ -1321,41 +1508,48 @@ class SubcloudManager(manager.Manager): region_clients=None, fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips, ).keystone_client - endpoint = m_ks_client.endpoint_cache.get_endpoint('sysinv') - sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, - m_ks_client.session, - endpoint=endpoint) + endpoint = m_ks_client.endpoint_cache.get_endpoint("sysinv") + sysinv_client = SysinvClient( + dccommon_consts.DEFAULT_REGION_NAME, + m_ks_client.session, + endpoint=endpoint, + ) LOG.debug("Getting cached regionone data for %s" % subcloud.name) cached_regionone_data = self._get_cached_regionone_data( - m_ks_client, sysinv_client) + m_ks_client, sysinv_client + ) self._populate_payload_with_cached_keystone_data( - cached_regionone_data, payload, populate_passwords=True) + cached_regionone_data, payload, populate_passwords=True + ) - payload['users'] = {} + payload["users"] = {} for user in USERS_TO_REPLICATE: - payload['users'][user] = \ - str(keyring.get_password( - user, dccommon_consts.SERVICES_USER_NAME)) + payload["users"][user] = str( + keyring.get_password(user, dccommon_consts.SERVICES_USER_NAME) + ) # TODO(Yuxing) remove replicating the smapi user when end the support # of rehoming a subcloud with a software version below 22.12 if subcloud.software_version <= LAST_SW_VERSION_IN_CENTOS: - payload['users']['smapi'] = \ - str(keyring.get_password( - 'smapi', dccommon_consts.SERVICES_USER_NAME)) + payload["users"]["smapi"] = str( + keyring.get_password("smapi", dccommon_consts.SERVICES_USER_NAME) + ) - if 'region_name' not in payload: - payload['region_name'] = subcloud.region_name + if "region_name" not in payload: + payload["region_name"] = subcloud.region_name # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = utils.get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) + subcloud.name, INVENTORY_FILE_POSTFIX + ) # Create the ansible inventory for the new subcloud - utils.create_subcloud_inventory(payload, - ansible_subcloud_inventory_file, - initial_deployment=initial_deployment) + utils.create_subcloud_inventory( + payload, + ansible_subcloud_inventory_file, + initial_deployment=initial_deployment, + ) # Create subcloud intermediate certificate and pass in keys # On initial deployment, this was already created by subcloud @@ -1373,12 +1567,19 @@ class SubcloudManager(manager.Manager): return subcloud except Exception: - LOG.exception("Failed to generate subcloud %s config" % payload['name']) + LOG.exception("Failed to generate subcloud %s config" % payload["name"]) raise - def subcloud_deploy_create(self, context, subcloud_id, payload, - rehoming=False, initial_deployment=True, - return_as_dict=True, enroll=False): + def subcloud_deploy_create( + self, + context, + subcloud_id, + payload, + rehoming=False, + initial_deployment=True, + return_as_dict=True, + enroll=False, + ): """Create subcloud and notify orchestrators. :param context: request context object @@ -1391,14 +1592,14 @@ class SubcloudManager(manager.Manager): returning :return: resulting subcloud DB object or dictionary """ - LOG.info("Creating subcloud %s." % payload['name']) + LOG.info("Creating subcloud %s." % payload["name"]) # cache original payload data for day-2's rehome usage original_payload = copy.deepcopy(payload) # Check the secondary option from payload - secondary_str = payload.get('secondary', '') - secondary = (secondary_str.lower() == 'true') + secondary_str = payload.get("secondary", "") + secondary = secondary_str.lower() == "true" if rehoming: deploy_state = consts.DEPLOY_STATE_PRE_REHOME @@ -1408,8 +1609,8 @@ class SubcloudManager(manager.Manager): deploy_state = consts.DEPLOY_STATE_CREATING subcloud = db_api.subcloud_update( - context, subcloud_id, - deploy_status=deploy_state) + context, subcloud_id, deploy_status=deploy_state + ) rehome_data = None try: @@ -1420,70 +1621,79 @@ class SubcloudManager(manager.Manager): region_clients=None, fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips, ).keystone_client - subcloud_subnet = netaddr.IPNetwork( - utils.get_management_subnet(payload)) - endpoint = m_ks_client.endpoint_cache.get_endpoint('sysinv') - sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, - m_ks_client.session, - endpoint=endpoint) + subcloud_subnet = netaddr.IPNetwork(utils.get_management_subnet(payload)) + endpoint = m_ks_client.endpoint_cache.get_endpoint("sysinv") + sysinv_client = SysinvClient( + dccommon_consts.DEFAULT_REGION_NAME, + m_ks_client.session, + endpoint=endpoint, + ) LOG.debug("Getting cached regionone data for %s" % subcloud.name) cached_regionone_data = self._get_cached_regionone_data( - m_ks_client, sysinv_client) - for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']: + m_ks_client, sysinv_client + ) + for mgmt_if_uuid in cached_regionone_data["mgmt_interface_uuids"]: sysinv_client.create_route( mgmt_if_uuid, str(subcloud_subnet.ip), subcloud_subnet.prefixlen, - payload['systemcontroller_gateway_address'], - 1) + payload["systemcontroller_gateway_address"], + 1, + ) if not enroll: - self._create_subcloud_endpoints(m_ks_client=m_ks_client, - payload=payload, - subcloud=subcloud, - context=context) + self._create_subcloud_endpoints( + m_ks_client=m_ks_client, + payload=payload, + subcloud=subcloud, + context=context, + ) # create entry into alarm summary table, will get real values later - alarm_updates = {'critical_alarms': -1, - 'major_alarms': -1, - 'minor_alarms': -1, - 'warnings': -1, - 'cloud_status': consts.ALARMS_DISABLED} - db_api.subcloud_alarms_create(context, subcloud.name, - alarm_updates) + alarm_updates = { + "critical_alarms": -1, + "major_alarms": -1, + "minor_alarms": -1, + "warnings": -1, + "cloud_status": consts.ALARMS_DISABLED, + } + db_api.subcloud_alarms_create(context, subcloud.name, alarm_updates) # Regenerate the addn_hosts_dc file self._create_addn_hosts_dc(context) # Passwords need to be populated when rehoming self._populate_payload_with_cached_keystone_data( - cached_regionone_data, payload, populate_passwords=rehoming) + cached_regionone_data, payload, populate_passwords=rehoming + ) if "deploy_playbook" in payload: - self._prepare_for_deployment(payload, subcloud.name, - populate_passwords=False) + self._prepare_for_deployment( + payload, subcloud.name, populate_passwords=False + ) - payload['users'] = {} + payload["users"] = {} for user in USERS_TO_REPLICATE: - payload['users'][user] = \ - str(keyring.get_password( - user, dccommon_consts.SERVICES_USER_NAME)) + payload["users"][user] = str( + keyring.get_password(user, dccommon_consts.SERVICES_USER_NAME) + ) # TODO(Yuxing) remove replicating the smapi user when end the support # of rehoming a subcloud with a software version below 22.12 if rehoming and subcloud.software_version <= LAST_SW_VERSION_IN_CENTOS: - payload['users']['smapi'] = \ - str(keyring.get_password( - 'smapi', dccommon_consts.SERVICES_USER_NAME)) + payload["users"]["smapi"] = str( + keyring.get_password("smapi", dccommon_consts.SERVICES_USER_NAME) + ) # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = utils.get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) + subcloud.name, INVENTORY_FILE_POSTFIX + ) # Create the ansible inventory for the new subcloud - utils.create_subcloud_inventory(payload, - ansible_subcloud_inventory_file, - initial_deployment) + utils.create_subcloud_inventory( + payload, ansible_subcloud_inventory_file, initial_deployment + ) if not enroll: # create subcloud intermediate certificate and pass in keys @@ -1498,21 +1708,21 @@ class SubcloudManager(manager.Manager): # for day-2's migrate purpose. if secondary: # remove unused parameters - if 'secondary' in original_payload: - del original_payload['secondary'] - if 'ansible_ssh_pass' in original_payload: - del original_payload['ansible_ssh_pass'] - if 'sysadmin_password' in original_payload: - del original_payload['sysadmin_password'] - if 'ansible_become_pass' in original_payload: - del original_payload['ansible_become_pass'] - if 'admin_password' in original_payload: + if "secondary" in original_payload: + del original_payload["secondary"] + if "ansible_ssh_pass" in original_payload: + del original_payload["ansible_ssh_pass"] + if "sysadmin_password" in original_payload: + del original_payload["sysadmin_password"] + if "ansible_become_pass" in original_payload: + del original_payload["ansible_become_pass"] + if "admin_password" in original_payload: # Encode admin_password - original_payload['admin_password'] = base64.b64encode( - original_payload['admin_password'].encode("utf-8") - ).decode('utf-8') + original_payload["admin_password"] = base64.b64encode( + original_payload["admin_password"].encode("utf-8") + ).decode("utf-8") bootstrap_info = utils.create_subcloud_rehome_data_template() - bootstrap_info['saved_payload'] = original_payload + bootstrap_info["saved_payload"] = original_payload rehome_data = json.dumps(bootstrap_info) deploy_state = consts.DEPLOY_STATE_SECONDARY @@ -1520,7 +1730,7 @@ class SubcloudManager(manager.Manager): deploy_state = consts.DEPLOY_STATE_CREATED except Exception: - LOG.exception("Failed to create subcloud %s" % payload['name']) + LOG.exception("Failed to create subcloud %s" % payload["name"]) # If we failed to create the subcloud, update the deployment status if rehoming: @@ -1531,9 +1741,8 @@ class SubcloudManager(manager.Manager): deploy_state = consts.DEPLOY_STATE_CREATE_FAILED subcloud = db_api.subcloud_update( - context, subcloud.id, - deploy_status=deploy_state, - rehome_data=rehome_data) + context, subcloud.id, deploy_status=deploy_state, rehome_data=rehome_data + ) LOG.info(f"Successfully created subcloud {subcloud.name}") @@ -1544,8 +1753,9 @@ class SubcloudManager(manager.Manager): return subcloud - def subcloud_deploy_install(self, context, subcloud_id, payload: dict, - initial_deployment=False) -> bool: + def subcloud_deploy_install( + self, context, subcloud_id, payload: dict, initial_deployment=False + ) -> bool: """Install subcloud :param context: request context object @@ -1559,9 +1769,10 @@ class SubcloudManager(manager.Manager): subcloud = db_api.subcloud_update( context, subcloud_id, - software_version=payload['software_version'], + software_version=payload["software_version"], deploy_status=consts.DEPLOY_STATE_PRE_INSTALL, - data_install=json.dumps(payload['install_values'])) + data_install=json.dumps(payload["install_values"]), + ) LOG.info("Installing subcloud %s." % subcloud.name) @@ -1571,19 +1782,22 @@ class SubcloudManager(manager.Manager): + "_playbook_output.log" ) ansible_subcloud_inventory_file = self._get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) + subcloud.name, INVENTORY_FILE_POSTFIX + ) install_command = self._deploy_install_prep( - subcloud, payload, ansible_subcloud_inventory_file, - initial_deployment) + subcloud, payload, ansible_subcloud_inventory_file, initial_deployment + ) install_success = self._run_subcloud_install( - context, subcloud, install_command, - log_file, payload['install_values']) + context, subcloud, install_command, log_file, payload["install_values"] + ) if install_success: db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_INSTALLED, - error_description=consts.ERROR_DESC_EMPTY) + error_description=consts.ERROR_DESC_EMPTY, + ) return install_success except Exception: @@ -1591,24 +1805,25 @@ class SubcloudManager(manager.Manager): # If we failed to install the subcloud, # update the deployment status db_api.subcloud_update( - context, subcloud_id, - deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED) + context, + subcloud_id, + deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED, + ) return False def subcloud_deploy_enroll(self, context, subcloud_id, payload: dict): db_api.subcloud_update( - context, - subcloud_id, - deploy_status=consts.DEPLOY_STATE_PRE_INIT_ENROLL + context, subcloud_id, deploy_status=consts.DEPLOY_STATE_PRE_INIT_ENROLL ) subcloud = db_api.subcloud_get(context, subcloud_id) if self.subcloud_init_enroll(context, subcloud.id, payload): try: - endpoint = ("https://" + - payload.get("external_oam_floating_address") + ":6385") + endpoint = ( + "https://" + payload.get("external_oam_floating_address") + ":6385" + ) subcloud_region_name = utils.get_region_name(endpoint) # The region name in the payload was randomly generated, need to @@ -1654,7 +1869,7 @@ class SubcloudManager(manager.Manager): subcloud.name, ansible_subcloud_inventory_file, payload[consts.BOOTSTRAP_ADDRESS], - ansible_pass=payload['sysadmin_password'], + ansible_pass=payload["sysadmin_password"], ) enroll_playbook_command = self.compose_enroll_command( @@ -1662,27 +1877,32 @@ class SubcloudManager(manager.Manager): subcloud.region_name, ansible_subcloud_inventory_file, subcloud.software_version, - state="enroll" + state="enroll", + ) + self._run_subcloud_enroll( + context, + subcloud, + enroll_playbook_command, + log_file, + region_name=subcloud_region_name, ) - self._run_subcloud_enroll(context, - subcloud, - enroll_playbook_command, - log_file, - region_name=subcloud_region_name) except Exception: - LOG.exception(f'Failed to enroll subcloud {subcloud.name}') + LOG.exception(f"Failed to enroll subcloud {subcloud.name}") db_api.subcloud_update( - context, subcloud_id, - deploy_status=consts.DEPLOY_STATE_PRE_ENROLL_FAILED) + context, + subcloud_id, + deploy_status=consts.DEPLOY_STATE_PRE_ENROLL_FAILED, + ) return False else: - LOG.error(f'Initial enrollment failed for subcloud {subcloud.name}') + LOG.error(f"Initial enrollment failed for subcloud {subcloud.name}") return subcloud - def subcloud_deploy_bootstrap(self, context, subcloud_id, payload, - initial_deployment=False): + def subcloud_deploy_bootstrap( + self, context, subcloud_id, payload, initial_deployment=False + ): """Bootstrap subcloud :param context: request context object @@ -1691,7 +1911,7 @@ class SubcloudManager(manager.Manager): :param initial_deployment: initial_deployment flag from subcloud inventory :return: success status """ - LOG.info("Bootstrapping subcloud %s." % payload['name']) + LOG.info("Bootstrapping subcloud %s." % payload["name"]) # Retrieve the subcloud details from the database subcloud = db_api.subcloud_get(context, subcloud_id) @@ -1702,25 +1922,33 @@ class SubcloudManager(manager.Manager): + "_playbook_output.log" ) ansible_subcloud_inventory_file = self._get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) + subcloud.name, INVENTORY_FILE_POSTFIX + ) bootstrap_command = self._deploy_bootstrap_prep( - context, subcloud, payload, + context, + subcloud, + payload, ansible_subcloud_inventory_file, - initial_deployment) + initial_deployment, + ) bootstrap_success = self._run_subcloud_bootstrap( - context, subcloud, bootstrap_command, log_file) + context, subcloud, bootstrap_command, log_file + ) return bootstrap_success except Exception: - LOG.exception("Failed to bootstrap subcloud %s" % payload['name']) + LOG.exception("Failed to bootstrap subcloud %s" % payload["name"]) db_api.subcloud_update( - context, subcloud_id, - deploy_status=consts.DEPLOY_STATE_PRE_BOOTSTRAP_FAILED) + context, + subcloud_id, + deploy_status=consts.DEPLOY_STATE_PRE_BOOTSTRAP_FAILED, + ) return False - def subcloud_deploy_config(self, context, subcloud_id, payload: dict, - initial_deployment=False) -> bool: + def subcloud_deploy_config( + self, context, subcloud_id, payload: dict, initial_deployment=False + ) -> bool: """Configure subcloud :param context: request context object @@ -1732,8 +1960,8 @@ class SubcloudManager(manager.Manager): LOG.info("Configuring subcloud %s." % subcloud_id) subcloud = db_api.subcloud_update( - context, subcloud_id, - deploy_status=consts.DEPLOY_STATE_PRE_CONFIG) + context, subcloud_id, deploy_status=consts.DEPLOY_STATE_PRE_CONFIG + ) try: log_file = ( os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) @@ -1741,21 +1969,25 @@ class SubcloudManager(manager.Manager): ) # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = self._get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) + subcloud.name, INVENTORY_FILE_POSTFIX + ) config_command = self._deploy_config_prep( - subcloud, payload, ansible_subcloud_inventory_file, - initial_deployment) + subcloud, payload, ansible_subcloud_inventory_file, initial_deployment + ) - config_success = self._run_subcloud_config(subcloud, context, - config_command, log_file) + config_success = self._run_subcloud_config( + subcloud, context, config_command, log_file + ) return config_success except Exception: LOG.exception("Failed to configure %s" % subcloud.name) db_api.subcloud_update( - context, subcloud_id, - deploy_status=consts.DEPLOY_STATE_PRE_CONFIG_FAILED) + context, + subcloud_id, + deploy_status=consts.DEPLOY_STATE_PRE_CONFIG_FAILED, + ) return False def subcloud_deploy_complete(self, context, subcloud_id): @@ -1768,19 +2000,23 @@ class SubcloudManager(manager.Manager): LOG.info("Completing subcloud %s deployment." % subcloud_id) # Just update the deploy status - subcloud = db_api.subcloud_update(context, subcloud_id, - deploy_status=consts.DEPLOY_STATE_DONE) + subcloud = db_api.subcloud_update( + context, subcloud_id, deploy_status=consts.DEPLOY_STATE_DONE + ) - LOG.info("Subcloud %s deploy status set to: %s" - % (subcloud_id, consts.DEPLOY_STATE_DONE)) + LOG.info( + "Subcloud %s deploy status set to: %s" + % (subcloud_id, consts.DEPLOY_STATE_DONE) + ) return db_api.subcloud_db_model_to_dict(subcloud) def _subcloud_operation_notice( - self, operation, restore_subclouds, failed_subclouds, - invalid_subclouds): - all_failed = ((not set(restore_subclouds) - set(failed_subclouds)) - and not invalid_subclouds) + self, operation, restore_subclouds, failed_subclouds, invalid_subclouds + ): + all_failed = ( + not set(restore_subclouds) - set(failed_subclouds) + ) and not invalid_subclouds if all_failed: LOG.error("Backup %s failed for all applied subclouds" % operation) raise exceptions.SubcloudBackupOperationFailed(operation=operation) @@ -1788,13 +2024,14 @@ class SubcloudManager(manager.Manager): if invalid_subclouds: self._warn_for_invalid_subclouds_on_backup_operation(invalid_subclouds) if failed_subclouds: - self._warn_for_failed_subclouds_on_backup_operation(operation, - failed_subclouds) + self._warn_for_failed_subclouds_on_backup_operation( + operation, failed_subclouds + ) if invalid_subclouds or failed_subclouds: - return self._build_subcloud_operation_notice(operation, - failed_subclouds, - invalid_subclouds) + return self._build_subcloud_operation_notice( + operation, failed_subclouds, invalid_subclouds + ) return def _filter_subclouds_with_ongoing_backup(self, subclouds): @@ -1802,22 +2039,27 @@ class SubcloudManager(manager.Manager): while i < len(subclouds): subcloud = subclouds[i] if subcloud.backup_status in consts.STATES_FOR_ONGOING_BACKUP: - LOG.info(_('Subcloud %s already has a backup operation in ' - 'progress' % subcloud.name)) + LOG.info( + _( + "Subcloud %s already has a backup operation in progress" + % subcloud.name + ) + ) subclouds.pop(i) else: i += 1 - def _validate_subclouds_for_backup(self, subclouds, operation, - bootstrap_address_dict=None): + def _validate_subclouds_for_backup( + self, subclouds, operation, bootstrap_address_dict=None + ): valid_subclouds = [] invalid_subclouds = [] for subcloud in subclouds: is_valid = False try: - if utils.is_valid_for_backup_operation(operation, - subcloud, - bootstrap_address_dict): + if utils.is_valid_for_backup_operation( + operation, subcloud, bootstrap_address_dict + ): is_valid = True except exceptions.ValidateFail: @@ -1853,7 +2095,8 @@ class SubcloudManager(manager.Manager): context, subcloud_id, deploy_status=consts.DEPLOY_STATE_INITIATING_ENROLL, - data_install=json.dumps(payload['install_values'])) + data_install=json.dumps(payload["install_values"]), + ) # TODO(glyraper): log_file to be used in the playbook execution # log_file = ( @@ -1861,19 +2104,23 @@ class SubcloudManager(manager.Manager): # + "_playbook_output.log" # ) ansible_subcloud_inventory_file = self._get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) + subcloud.name, INVENTORY_FILE_POSTFIX + ) init_enroll_command = self._deploy_install_prep( - subcloud, payload, ansible_subcloud_inventory_file, - init_enroll=True) + subcloud, payload, ansible_subcloud_inventory_file, init_enroll=True + ) if enrollment.enroll_init(consts.DC_ANSIBLE_LOG_DIR, init_enroll_command): - LOG.info('Subcloud enrollment initial phase successful ' - f'for subcloud {subcloud.name}') + LOG.info( + "Subcloud enrollment initial phase successful " + f"for subcloud {subcloud.name}" + ) db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_INIT_ENROLL_COMPLETE, - error_description=consts.ERROR_DESC_EMPTY) + error_description=consts.ERROR_DESC_EMPTY, + ) return True except Exception: @@ -1881,8 +2128,10 @@ class SubcloudManager(manager.Manager): # If we failed to initiate the subcloud enroll, # update the deployment status db_api.subcloud_update( - context, subcloud_id, - deploy_status=consts.DEPLOY_STATE_PRE_INIT_ENROLL_FAILED) + context, + subcloud_id, + deploy_status=consts.DEPLOY_STATE_PRE_INIT_ENROLL_FAILED, + ) return False @staticmethod @@ -1893,12 +2142,14 @@ class SubcloudManager(manager.Manager): if invalid_ids: # Set state on subclouds that failed validation - LOG.warn('The following subclouds are not online and/or managed ' - 'and/or in a valid deploy state, and will not be backed ' - 'up: %s', ', '.join(list(invalid_names))) + LOG.warn( + "The following subclouds are not online and/or managed and/or " + "in a valid deploy state, and will not be backed up: %s", + ", ".join(list(invalid_names)), + ) SubcloudManager._update_backup_status_by_ids( - context, invalid_ids, - consts.BACKUP_STATE_VALIDATE_FAILED) + context, invalid_ids, consts.BACKUP_STATE_VALIDATE_FAILED + ) except DCManagerException as ex: LOG.exception("Subcloud backup validation failed") @@ -1907,31 +2158,33 @@ class SubcloudManager(manager.Manager): @staticmethod def _warn_for_invalid_subclouds_on_backup_operation(invalid_subclouds): invalid_names = {subcloud.name for subcloud in invalid_subclouds} - LOG.warn('The following subclouds were not online and/or in a valid ' - 'deploy/management state, and thus were not be reached ' - 'for backup operation: %s', ', '.join(list(invalid_names))) + LOG.warn( + "The following subclouds were not online and/or in a valid " + "deploy/management state, and thus were not be reached " + "for backup operation: %s", + ", ".join(list(invalid_names)), + ) @staticmethod def _warn_for_failed_subclouds_on_backup_operation(operation, failed_subclouds): failed_names = {subcloud.name for subcloud in failed_subclouds} - LOG.warn('Backup %s operation failed for some subclouds, ' - 'check previous logs for details. Failed subclouds: %s' % - (operation, ', '.join(list(failed_names)))) + LOG.warn( + "Backup %s operation failed for some subclouds, " + "check previous logs for details. Failed subclouds: %s" + % (operation, ", ".join(list(failed_names))) + ) @staticmethod def _update_backup_status(context, subclouds, backup_status): subcloud_ids = [subcloud.id for subcloud in subclouds] - return SubcloudManager. \ - _update_backup_status_by_ids(context, subcloud_ids, - backup_status) + return SubcloudManager._update_backup_status_by_ids( + context, subcloud_ids, backup_status + ) @staticmethod def _update_backup_status_by_ids(context, subcloud_ids, backup_status): - validate_state_form = { - Subcloud.backup_status.name: backup_status - } - db_api.subcloud_bulk_update_by_ids(context, subcloud_ids, - validate_state_form) + validate_state_form = {Subcloud.backup_status.name: backup_status} + db_api.subcloud_bulk_update_by_ids(context, subcloud_ids, validate_state_form) @staticmethod def _run_parallel_group_operation(op_type, op_function, thread_pool, subclouds): @@ -1946,9 +2199,11 @@ class SubcloudManager(manager.Manager): completion = float(processed) / float(len(subclouds)) * 100 remaining = len(subclouds) - processed - LOG.info("Processed subcloud %s for %s (operation %.0f%% " - "complete, %d subcloud(s) remaining)" % - (subcloud.name, op_type, completion, remaining)) + LOG.info( + "Processed subcloud %s for %s (operation %.0f%% " + "complete, %d subcloud(s) remaining)" + % (subcloud.name, op_type, completion, remaining) + ) return failed_subclouds @@ -1977,19 +2232,21 @@ class SubcloudManager(manager.Manager): # Prepare for backup overrides_file = self._create_overrides_for_backup_or_restore( - 'create', payload, subcloud.name + "create", payload, subcloud.name ) backup_command = self.compose_backup_command( - subcloud.name, subcloud_inventory_file) + subcloud.name, subcloud_inventory_file + ) self._clear_subcloud_backup_failure_alarm_if_exists(subcloud) except Exception: self._fail_subcloud_backup_prep(context, subcloud) return subcloud, False - local_only = payload.get('local_only') or False + local_only = payload.get("local_only") or False success = self._run_subcloud_backup_create_playbook( - subcloud, backup_command, context, local_only) + subcloud, backup_command, context, local_only + ) if success: utils.delete_subcloud_inventory(overrides_file) @@ -1997,19 +2254,23 @@ class SubcloudManager(manager.Manager): return subcloud, success def _filter_subclouds_for_backup_delete(self, context, payload, local_delete): - subcloud_id = payload.get('subcloud') - group_id = payload.get('group') + subcloud_id = payload.get("subcloud") + group_id = payload.get("group") # Retrieve either a single subcloud or all subclouds in a group - subclouds = [db_api.subcloud_get(context, subcloud_id)] if subcloud_id \ + subclouds = ( + [db_api.subcloud_get(context, subcloud_id)] + if subcloud_id else db_api.subcloud_get_for_group(context, group_id) + ) invalid_subclouds = [] # Subcloud state validation only required for local delete if local_delete: # Use same criteria defined for subcloud backup create - subclouds_to_delete_backup, invalid_subclouds = \ - self._validate_subclouds_for_backup(subclouds, 'delete') + subclouds_to_delete_backup, invalid_subclouds = ( + self._validate_subclouds_for_backup(subclouds, "delete") + ) else: # Otherwise, validation is unnecessary, since connection is not required subclouds_to_delete_backup = subclouds @@ -2022,17 +2283,20 @@ class SubcloudManager(manager.Manager): payload, subcloud.name, release_version ) inventory_file = None - if payload['override_values']['local']: + if payload["override_values"]["local"]: inventory_file = self._create_subcloud_inventory_file(subcloud) delete_command = self.compose_backup_delete_command( - subcloud.name, inventory_file) + subcloud.name, inventory_file + ) except Exception: - LOG.exception("Failed to prepare subcloud %s for backup delete" - % subcloud.name) + LOG.exception( + "Failed to prepare subcloud %s for backup delete" % subcloud.name + ) return subcloud, False - success = self._run_subcloud_backup_delete_playbook(context, subcloud, - delete_command) + success = self._run_subcloud_backup_delete_playbook( + context, subcloud, delete_command + ) if success: utils.delete_subcloud_inventory(overrides_file) @@ -2040,8 +2304,10 @@ class SubcloudManager(manager.Manager): return subcloud, success def _restore_subcloud_backup(self, context, payload, subcloud): - log_file = (os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + - '_playbook_output.log') + log_file = ( + os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + + "_playbook_output.log" + ) # To get the bootstrap_address for the subcloud, we considered # the following order: @@ -2049,63 +2315,75 @@ class SubcloudManager(manager.Manager): # 2) Use the value from install_values if present # 3) Use the value from the current inventory file if it exist # To reach this part of the code, one of the above conditions is True - bootstrap_address_dict = \ - payload.get('restore_values', {}).get('bootstrap_address', {}) + bootstrap_address_dict = payload.get("restore_values", {}).get( + "bootstrap_address", {} + ) if bootstrap_address_dict.get(subcloud.name): - LOG.debug('Using bootstrap_address from restore_values for subcloud %s' - % subcloud.name) + LOG.debug( + "Using bootstrap_address from restore_values for subcloud %s" + % subcloud.name + ) bootstrap_address = bootstrap_address_dict.get(subcloud.name) elif subcloud.data_install: - LOG.debug('Using bootstrap_address from install_values for subcloud %s' - % subcloud.name) + LOG.debug( + "Using bootstrap_address from install_values for subcloud %s" + % subcloud.name + ) data_install = json.loads(subcloud.data_install) - bootstrap_address = data_install.get('bootstrap_address') + bootstrap_address = data_install.get("bootstrap_address") else: - LOG.debug('Using bootstrap_address from previous inventory file ' - 'for subcloud %s' % subcloud.name) - bootstrap_address = \ - utils.get_ansible_host_ip_from_inventory(subcloud.name) + LOG.debug( + "Using bootstrap_address from previous inventory file for subcloud %s" + % subcloud.name + ) + bootstrap_address = utils.get_ansible_host_ip_from_inventory(subcloud.name) try: db_api.subcloud_update( - context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_PRE_RESTORE + context, subcloud.id, deploy_status=consts.DEPLOY_STATE_PRE_RESTORE ) subcloud_inventory_file = self._create_subcloud_inventory_file( - subcloud, bootstrap_address=bootstrap_address) + subcloud, bootstrap_address=bootstrap_address + ) # Prepare for restore overrides_file = self._create_overrides_for_backup_or_restore( - 'restore', payload, subcloud.name + "restore", payload, subcloud.name ) restore_command = self.compose_backup_restore_command( - subcloud.name, subcloud_inventory_file) + subcloud.name, subcloud_inventory_file + ) except Exception: db_api.subcloud_update( - context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_RESTORE_PREP_FAILED + context, + subcloud.id, + deploy_status=consts.DEPLOY_STATE_RESTORE_PREP_FAILED, + ) + LOG.exception( + "Failed to prepare subcloud %s for backup restore" % subcloud.name ) - LOG.exception("Failed to prepare subcloud %s for backup restore" - % subcloud.name) return subcloud, False - if payload.get('with_install'): + if payload.get("with_install"): data_install = json.loads(subcloud.data_install) - software_version = payload.get('software_version') + software_version = payload.get("software_version") install_command = self.compose_install_command( - subcloud.name, subcloud_inventory_file, software_version) + subcloud.name, subcloud_inventory_file, software_version + ) # Update data_install with missing data matching_iso, _ = utils.get_vault_load_files(software_version) - data_install['software_version'] = software_version - data_install['image'] = matching_iso - data_install['ansible_ssh_pass'] = payload['sysadmin_password'] - data_install['ansible_become_pass'] = payload['sysadmin_password'] + data_install["software_version"] = software_version + data_install["image"] = matching_iso + data_install["ansible_ssh_pass"] = payload["sysadmin_password"] + data_install["ansible_become_pass"] = payload["sysadmin_password"] install_success = self._run_subcloud_install( - context, subcloud, install_command, log_file, data_install) + context, subcloud, install_command, log_file, data_install + ) if not install_success: return subcloud, False success = self._run_subcloud_backup_restore_playbook( - subcloud, restore_command, context, log_file) + subcloud, restore_command, context, log_file + ) if success: utils.delete_subcloud_inventory(overrides_file) @@ -2114,25 +2392,25 @@ class SubcloudManager(manager.Manager): @staticmethod def _build_subcloud_operation_notice( - operation, failed_subclouds, invalid_subclouds): + operation, failed_subclouds, invalid_subclouds + ): invalid_subcloud_names = [subcloud.name for subcloud in invalid_subclouds] failed_subcloud_names = [subcloud.name for subcloud in failed_subclouds] - notice = ( - "Subcloud backup %s operation completed with warnings:\n" - % operation) + notice = "Subcloud backup %s operation completed with warnings:\n" % operation if invalid_subclouds: - notice += ("The following subclouds were skipped for local backup " - "%s operation: %s." - % (operation, ' ,'.join(invalid_subcloud_names))) + notice += ( + "The following subclouds were skipped for local backup " + "%s operation: %s." % (operation, " ,".join(invalid_subcloud_names)) + ) if failed_subclouds: - notice += ("The following subclouds failed during backup " - "%s operation: %s." - % (operation, ' ,'.join(failed_subcloud_names))) + notice += ( + "The following subclouds failed during backup " + "%s operation: %s." % (operation, " ,".join(failed_subcloud_names)) + ) return notice - def _create_subcloud_endpoints(self, m_ks_client, payload, subcloud, - context): + def _create_subcloud_endpoints(self, m_ks_client, payload, subcloud, context): # Create endpoints to this subcloud on the # management-start-ip of the subcloud which will be allocated @@ -2147,50 +2425,54 @@ class SubcloudManager(manager.Manager): endpoint_config = [] endpoint_ip = utils.get_management_start_address(payload) if netaddr.IPAddress(endpoint_ip).version == 6: - endpoint_ip = '[' + endpoint_ip + ']' + endpoint_ip = "[" + endpoint_ip + "]" for service in m_ks_client.services_list: admin_endpoint_url = ENDPOINT_URLS.get(service.type, None) if admin_endpoint_url: admin_endpoint_url = admin_endpoint_url.format(endpoint_ip) endpoint_config.append( - {"id": service.id, - "admin_endpoint_url": admin_endpoint_url}) + {"id": service.id, "admin_endpoint_url": admin_endpoint_url} + ) if len(endpoint_config) < len(ENDPOINT_URLS): raise exceptions.BadRequest( - resource='subcloud', - msg='Missing service in SystemController') + resource="subcloud", msg="Missing service in SystemController" + ) for endpoint in endpoint_config: try: m_ks_client.keystone_client.endpoints.create( endpoint["id"], - endpoint['admin_endpoint_url'], + endpoint["admin_endpoint_url"], interface=dccommon_consts.KS_ENDPOINT_ADMIN, - region=subcloud.region_name) + region=subcloud.region_name, + ) except Exception as e: # Keystone service must be temporarily busy, retry LOG.error(str(e)) m_ks_client.keystone_client.endpoints.create( endpoint["id"], - endpoint['admin_endpoint_url'], + endpoint["admin_endpoint_url"], interface=dccommon_consts.KS_ENDPOINT_ADMIN, - region=subcloud.region_name) + region=subcloud.region_name, + ) # Inform orchestrator that subcloud has been added self.dcorch_rpc_client.add_subcloud( context, subcloud.region_name, subcloud.software_version, - subcloud.management_start_ip + subcloud.management_start_ip, ) - def _create_subcloud_inventory_file(self, subcloud, bootstrap_address=None, - initial_deployment=False): + def _create_subcloud_inventory_file( + self, subcloud, bootstrap_address=None, initial_deployment=False + ): # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = self._get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) + subcloud.name, INVENTORY_FILE_POSTFIX + ) if not bootstrap_address: # Use subcloud floating IP for host reachability @@ -2199,104 +2481,116 @@ class SubcloudManager(manager.Manager): region_clients=None, fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips, ).keystone_client - bootstrap_address = utils.get_oam_addresses(subcloud, keystone_client) \ - .oam_floating_ip + bootstrap_address = utils.get_oam_addresses( + subcloud, keystone_client + ).oam_floating_ip # Add parameters used to generate inventory - subcloud_params = {'name': subcloud.name, - 'bootstrap-address': bootstrap_address} + subcloud_params = { + "name": subcloud.name, + "bootstrap-address": bootstrap_address, + } - utils.create_subcloud_inventory(subcloud_params, - ansible_subcloud_inventory_file, - initial_deployment) + utils.create_subcloud_inventory( + subcloud_params, ansible_subcloud_inventory_file, initial_deployment + ) return ansible_subcloud_inventory_file def _create_overrides_for_backup_or_restore(self, op, payload, subcloud_name): # Set override names as expected by the playbook - if not payload.get('override_values'): - payload['override_values'] = {} + if not payload.get("override_values"): + payload["override_values"] = {} - payload['override_values']['local'] = \ - payload['local_only'] or False + payload["override_values"]["local"] = payload["local_only"] or False - if op == 'create': - payload['override_values']['backup_registry_images'] = \ - payload['registry_images'] or False - suffix = 'backup_create_values' + if op == "create": + payload["override_values"]["backup_registry_images"] = ( + payload["registry_images"] or False + ) + suffix = "backup_create_values" else: - payload['override_values']['restore_registry_images'] = \ - payload['registry_images'] or False - suffix = 'backup_restore_values' + payload["override_values"]["restore_registry_images"] = ( + payload["registry_images"] or False + ) + suffix = "backup_restore_values" - if not payload['local_only']: - payload['override_values']['central_backup_dir'] = CENTRAL_BACKUP_DIR + if not payload["local_only"]: + payload["override_values"]["central_backup_dir"] = CENTRAL_BACKUP_DIR - payload['override_values']['ansible_ssh_pass'] = \ - payload['sysadmin_password'] - payload['override_values']['ansible_become_pass'] = \ - payload['sysadmin_password'] - payload['override_values']['admin_password'] = \ - str(keyring.get_password('CGCS', 'admin')) + payload["override_values"]["ansible_ssh_pass"] = payload["sysadmin_password"] + payload["override_values"]["ansible_become_pass"] = payload["sysadmin_password"] + payload["override_values"]["admin_password"] = str( + keyring.get_password("CGCS", "admin") + ) - if payload.get('backup_values'): - LOG.info('Backup create: Received backup_values for subcloud %s' - % subcloud_name) - for key, value in payload.get('backup_values').items(): - payload['override_values'][key] = value - elif payload.get('restore_values'): - LOG.info('Backup restore: Received restore_values for subcloud %s' - % subcloud_name) - for key, value in payload.get('restore_values').items(): - payload['override_values'][key] = value + if payload.get("backup_values"): + LOG.info( + "Backup create: Received backup_values for subcloud %s" % subcloud_name + ) + for key, value in payload.get("backup_values").items(): + payload["override_values"][key] = value + elif payload.get("restore_values"): + LOG.info( + "Backup restore: Received restore_values for subcloud %s" + % subcloud_name + ) + for key, value in payload.get("restore_values").items(): + payload["override_values"][key] = value return self._create_backup_overrides_file(payload, subcloud_name, suffix) - def _create_overrides_for_backup_delete(self, payload, subcloud_name, - release_version): + def _create_overrides_for_backup_delete( + self, payload, subcloud_name, release_version + ): # Set override names as expected by the playbook - if not payload.get('override_values'): - payload['override_values'] = {} + if not payload.get("override_values"): + payload["override_values"] = {} - payload['override_values']['software_version'] = release_version + payload["override_values"]["software_version"] = release_version - payload['override_values']['local'] = \ - payload['local_only'] or False + payload["override_values"]["local"] = payload["local_only"] or False - if not payload['local_only']: - payload['override_values']['central_backup_dir'] = CENTRAL_BACKUP_DIR + if not payload["local_only"]: + payload["override_values"]["central_backup_dir"] = CENTRAL_BACKUP_DIR else: - payload['override_values']['ansible_ssh_pass'] = \ - payload['sysadmin_password'] - payload['override_values']['ansible_become_pass'] = \ - payload['sysadmin_password'] + payload["override_values"]["ansible_ssh_pass"] = payload[ + "sysadmin_password" + ] + payload["override_values"]["ansible_become_pass"] = payload[ + "sysadmin_password" + ] return self._create_backup_overrides_file( - payload, subcloud_name, 'backup_delete_values' + payload, subcloud_name, "backup_delete_values" ) def _create_backup_overrides_file(self, payload, subcloud_name, filename_suffix): backup_overrides_file = os.path.join( - dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + '_' + - filename_suffix + '.yml') + dccommon_consts.ANSIBLE_OVERRIDES_PATH, + subcloud_name + "_" + filename_suffix + ".yml", + ) - with open(backup_overrides_file, 'w') as f_out: - f_out.write( - '---\n' - ) - for k, v in payload['override_values'].items(): + with open(backup_overrides_file, "w") as f_out: + f_out.write("---\n") + for k, v in payload["override_values"].items(): f_out.write("%s: %s\n" % (k, v)) return backup_overrides_file - def _run_subcloud_backup_create_playbook(self, subcloud, backup_command, - context, local_only): - log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \ - '_playbook_output.log' + def _run_subcloud_backup_create_playbook( + self, subcloud, backup_command, context, local_only + ): + log_file = ( + os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + + "_playbook_output.log" + ) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, backup_status=consts.BACKUP_STATE_IN_PROGRESS, - error_description=consts.ERROR_DESC_EMPTY) + error_description=consts.ERROR_DESC_EMPTY, + ) # Run the subcloud backup playbook try: @@ -2310,9 +2604,11 @@ class SubcloudManager(manager.Manager): backup_status = consts.BACKUP_STATE_COMPLETE_CENTRAL db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, backup_status=backup_status, - backup_datetime=datetime.datetime.utcnow()) + backup_datetime=datetime.datetime.utcnow(), + ) LOG.info("Successfully backed up subcloud %s" % subcloud.name) return True @@ -2322,8 +2618,10 @@ class SubcloudManager(manager.Manager): @staticmethod def _run_subcloud_backup_delete_playbook(context, subcloud, delete_command): - log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \ - '_playbook_output.log' + log_file = ( + os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + + "_playbook_output.log" + ) try: # Run the subcloud backup delete playbook @@ -2333,53 +2631,66 @@ class SubcloudManager(manager.Manager): # Set backup status to unknown after delete, since most recent backup may # have been deleted db_api.subcloud_bulk_update_by_ids( - context, [subcloud.id], - {Subcloud.backup_status.name: consts.BACKUP_STATE_UNKNOWN, - Subcloud.backup_datetime.name: None}) + context, + [subcloud.id], + { + Subcloud.backup_status.name: consts.BACKUP_STATE_UNKNOWN, + Subcloud.backup_datetime.name: None, + }, + ) LOG.info("Successfully deleted backup for subcloud %s" % subcloud.name) return True except PlaybookExecutionFailed: - LOG.error("Failed to delete backup for subcloud %s, check individual " - "log at %s for detailed output." % (subcloud.name, log_file)) + LOG.error( + "Failed to delete backup for subcloud %s, check individual " + "log at %s for detailed output." % (subcloud.name, log_file) + ) msg = utils.find_ansible_error_msg( - subcloud.name, log_file, consts.BACKUP_STATE_FAILED) + subcloud.name, log_file, consts.BACKUP_STATE_FAILED + ) LOG.error(msg) db_api.subcloud_update( - context, subcloud.id, - error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) + context, + subcloud.id, + error_description=msg[0 : consts.ERROR_DESCRIPTION_LENGTH], + ) return False def _run_subcloud_backup_restore_playbook( - self, subcloud, restore_command, context, log_file): + self, subcloud, restore_command, context, log_file + ): db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_RESTORING, - error_description=consts.ERROR_DESC_EMPTY + error_description=consts.ERROR_DESC_EMPTY, ) # Run the subcloud backup restore playbook try: ansible = AnsiblePlaybook(subcloud.name) - ansible.run_playbook(log_file, restore_command, - timeout=CONF.playbook_timeout) + ansible.run_playbook( + log_file, restore_command, timeout=CONF.playbook_timeout + ) LOG.info("Successfully restore subcloud %s" % subcloud.name) db_api.subcloud_update( - context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_DONE + context, subcloud.id, deploy_status=consts.DEPLOY_STATE_DONE ) return True except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( - subcloud.name, log_file, consts.DEPLOY_STATE_RESTORING) + subcloud.name, log_file, consts.DEPLOY_STATE_RESTORING + ) LOG.error(msg) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_RESTORE_FAILED, - error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH] + error_description=msg[0 : consts.ERROR_DESCRIPTION_LENGTH], ) return False @@ -2388,18 +2699,21 @@ class SubcloudManager(manager.Manager): LOG.exception("Failed to prepare subcloud %s for backup" % subcloud.name) db_api.subcloud_update( - context, subcloud.id, - backup_status=consts.BACKUP_STATE_PREP_FAILED) + context, subcloud.id, backup_status=consts.BACKUP_STATE_PREP_FAILED + ) def _fail_subcloud_backup_operation(self, context, log_file, subcloud): msg = utils.find_ansible_error_msg( - subcloud.name, log_file, consts.BACKUP_STATE_IN_PROGRESS) + subcloud.name, log_file, consts.BACKUP_STATE_IN_PROGRESS + ) LOG.error(msg) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, backup_status=consts.BACKUP_STATE_FAILED, - error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) + error_description=msg[0 : consts.ERROR_DESCRIPTION_LENGTH], + ) self._set_subcloud_backup_failure_alarm(subcloud) @@ -2408,12 +2722,13 @@ class SubcloudManager(manager.Manager): try: fault = self.fm_api.get_fault( - fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, - entity_instance_id) + fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, entity_instance_id + ) if fault: self.fm_api.clear_fault( fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, # noqa - entity_instance_id) + entity_instance_id, + ) except Exception as e: LOG.exception(e) @@ -2427,20 +2742,27 @@ class SubcloudManager(manager.Manager): entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD, entity_instance_id=entity_instance_id, severity=fm_const.FM_ALARM_SEVERITY_MINOR, - reason_text=("Subcloud Backup Failure (subcloud=%s)" - % subcloud.name), + reason_text=("Subcloud Backup Failure (subcloud=%s)" % subcloud.name), alarm_type=fm_const.FM_ALARM_TYPE_3, probable_cause=fm_const.ALARM_PROBABLE_CAUSE_UNKNOWN, - proposed_repair_action="Retry subcloud backup after checking input " - "file. If problem persists, please contact " - "next level of support.", - service_affecting=False) + proposed_repair_action=( + "Retry subcloud backup after checking input file. " + "If problem persists, please contact next level of support." + ), + service_affecting=False, + ) self.fm_api.set_fault(fault) except Exception as e: LOG.exception(e) - def run_deploy_phases(self, context, subcloud_id, payload, - deploy_phases_to_run, initial_deployment=False): + def run_deploy_phases( + self, + context, + subcloud_id, + payload, + deploy_phases_to_run, + initial_deployment=False, + ): """Run one or more deployment phases, ensuring correct order :param context: request context object @@ -2453,17 +2775,19 @@ class SubcloudManager(manager.Manager): succeeded = True if consts.DEPLOY_PHASE_INSTALL in deploy_phases_to_run: succeeded = self.subcloud_deploy_install( - context, subcloud_id, payload, initial_deployment) + context, subcloud_id, payload, initial_deployment + ) if succeeded and consts.DEPLOY_PHASE_ENROLL in deploy_phases_to_run: - succeeded = self.subcloud_deploy_enroll( - context, subcloud_id, payload) + succeeded = self.subcloud_deploy_enroll(context, subcloud_id, payload) raise NotImplementedError if succeeded and consts.DEPLOY_PHASE_BOOTSTRAP in deploy_phases_to_run: succeeded = self.subcloud_deploy_bootstrap( - context, subcloud_id, payload, initial_deployment) + context, subcloud_id, payload, initial_deployment + ) if succeeded and consts.DEPLOY_PHASE_CONFIG in deploy_phases_to_run: succeeded = self.subcloud_deploy_config( - context, subcloud_id, payload, initial_deployment) + context, subcloud_id, payload, initial_deployment + ) if succeeded and consts.DEPLOY_PHASE_COMPLETE in deploy_phases_to_run: self.subcloud_deploy_complete(context, subcloud_id) return succeeded @@ -2472,57 +2796,70 @@ class SubcloudManager(manager.Manager): LOG.exception("run_deploy_phases failed") raise ex - def _run_subcloud_config(self, subcloud, context, - config_command, log_file): + def _run_subcloud_config(self, subcloud, context, config_command, log_file): # Run the custom deploy playbook LOG.info("Starting deploy of %s" % subcloud.name) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_CONFIGURING, - error_description=consts.ERROR_DESC_EMPTY) + error_description=consts.ERROR_DESC_EMPTY, + ) try: ansible = AnsiblePlaybook(subcloud.name) - aborted = ansible.run_playbook( - log_file, config_command) + aborted = ansible.run_playbook(log_file, config_command) except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( - subcloud.name, log_file, consts.DEPLOY_STATE_CONFIGURING) + subcloud.name, log_file, consts.DEPLOY_STATE_CONFIGURING + ) LOG.error(msg) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_CONFIG_FAILED, - error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) + error_description=msg[0 : consts.ERROR_DESCRIPTION_LENGTH], + ) return False if aborted: return False LOG.info("Successfully deployed %s" % subcloud.name) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_DONE, - error_description=consts.ERROR_DESC_EMPTY) + error_description=consts.ERROR_DESC_EMPTY, + ) return True @staticmethod - def _run_subcloud_install(context, subcloud, install_command, - log_file, payload): - software_version = str(payload['software_version']) - LOG.info("Preparing remote install of %s, version: %s", - subcloud.name, software_version) - if (subcloud.deploy_status != consts.DEPLOY_STATE_PRE_INSTALL or - subcloud.software_version != software_version): + def _run_subcloud_install(context, subcloud, install_command, log_file, payload): + software_version = str(payload["software_version"]) + LOG.info( + "Preparing remote install of %s, version: %s", + subcloud.name, + software_version, + ) + if ( + subcloud.deploy_status != consts.DEPLOY_STATE_PRE_INSTALL + or subcloud.software_version != software_version + ): db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_PRE_INSTALL, - software_version=software_version) + software_version=software_version, + ) try: install = SubcloudInstall(subcloud.name) install.prep(dccommon_consts.ANSIBLE_OVERRIDES_PATH, payload) except Exception as e: LOG.exception(e) db_api.subcloud_update( - context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED) + context, + subcloud.id, + deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED, + ) if install: install.cleanup(software_version) return False @@ -2530,21 +2867,25 @@ class SubcloudManager(manager.Manager): # Run the remote install playbook LOG.info("Starting remote install of %s" % subcloud.name) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_INSTALLING, - error_description=consts.ERROR_DESC_EMPTY) + error_description=consts.ERROR_DESC_EMPTY, + ) try: - aborted = install.install( - consts.DC_ANSIBLE_LOG_DIR, install_command) + aborted = install.install(consts.DC_ANSIBLE_LOG_DIR, install_command) except Exception as e: msg = utils.find_ansible_error_msg( - subcloud.name, log_file, consts.DEPLOY_STATE_INSTALLING) + subcloud.name, log_file, consts.DEPLOY_STATE_INSTALLING + ) LOG.error(str(e)) LOG.error(msg) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_INSTALL_FAILED, - error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) + error_description=msg[0 : consts.ERROR_DESCRIPTION_LENGTH], + ) install.cleanup(software_version) return False install.cleanup(software_version) @@ -2553,48 +2894,57 @@ class SubcloudManager(manager.Manager): LOG.info("Successfully installed %s" % subcloud.name) return True - def _run_subcloud_enroll(self, context, subcloud, enroll_command, - log_file, region_name): + def _run_subcloud_enroll( + self, context, subcloud, enroll_command, log_file, region_name + ): # Update the subcloud deploy_status to enrolling db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_ENROLLING, - error_description=consts.ERROR_DESC_EMPTY) + error_description=consts.ERROR_DESC_EMPTY, + ) - LOG.info(f'Starting enroll of subcloud {subcloud.name}') + LOG.info(f"Starting enroll of subcloud {subcloud.name}") try: ansible = AnsiblePlaybook(subcloud.name) ansible.run_playbook(log_file, enroll_command) except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( - subcloud.name, log_file, consts.DEPLOY_STATE_ENROLLING) - LOG.error(f'Enroll failed for subcloud {subcloud.name}: {msg}') + subcloud.name, log_file, consts.DEPLOY_STATE_ENROLLING + ) + LOG.error(f"Enroll failed for subcloud {subcloud.name}: {msg}") db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_ENROLL_FAILED, - error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) + error_description=msg[0 : consts.ERROR_DESCRIPTION_LENGTH], + ) return False # Ensure rehomed=False after bootstrapped from central cloud, it # applies on both initial deployment and re-deployment. db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_ENROLLED, error_description=consts.ERROR_DESC_EMPTY, region_name=region_name, - rehomed=False) + rehomed=False, + ) - LOG.info(f'Successfully enrolled {subcloud.name}') + LOG.info(f"Successfully enrolled {subcloud.name}") return True - def _run_subcloud_bootstrap(self, context, subcloud, - bootstrap_command, log_file): + def _run_subcloud_bootstrap(self, context, subcloud, bootstrap_command, log_file): # Update the subcloud deploy_status to bootstrapping db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPING, - error_description=consts.ERROR_DESC_EMPTY) + error_description=consts.ERROR_DESC_EMPTY, + ) # Run the ansible subcloud bootstrap playbook LOG.info("Starting bootstrap of %s" % subcloud.name) @@ -2603,12 +2953,15 @@ class SubcloudManager(manager.Manager): aborted = ansible.run_playbook(log_file, bootstrap_command) except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( - subcloud.name, log_file, consts.DEPLOY_STATE_BOOTSTRAPPING) + subcloud.name, log_file, consts.DEPLOY_STATE_BOOTSTRAPPING + ) LOG.error(msg) db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_BOOTSTRAP_FAILED, - error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) + error_description=msg[0 : consts.ERROR_DESCRIPTION_LENGTH], + ) return False if aborted: @@ -2617,10 +2970,12 @@ class SubcloudManager(manager.Manager): # Ensure rehomed=False after bootstrapped from central cloud, it # applies on both initial deployment and re-deployment. db_api.subcloud_update( - context, subcloud.id, + context, + subcloud.id, deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPED, error_description=consts.ERROR_DESC_EMPTY, - rehomed=False) + rehomed=False, + ) LOG.info("Successfully bootstrapped %s" % subcloud.name) return True @@ -2629,19 +2984,18 @@ class SubcloudManager(manager.Manager): """Generate the addn_hosts_dc file for hostname/ip translation""" addn_hosts_dc = os.path.join(CONFIG_PATH, ADDN_HOSTS_DC) - addn_hosts_dc_temp = addn_hosts_dc + '.temp' + addn_hosts_dc_temp = addn_hosts_dc + ".temp" subclouds = db_api.subcloud_get_all(context) - with open(addn_hosts_dc_temp, 'w') as f_out_addn_dc_temp: + with open(addn_hosts_dc_temp, "w") as f_out_addn_dc_temp: for subcloud in subclouds: - addn_dc_line = subcloud.management_start_ip + ' ' + \ - subcloud.name + '\n' + addn_dc_line = subcloud.management_start_ip + " " + subcloud.name + "\n" f_out_addn_dc_temp.write(addn_dc_line) # if no more subclouds, create empty file so dnsmasq does not # emit an error log. if not subclouds: - f_out_addn_dc_temp.write(' ') + f_out_addn_dc_temp.write(" ") if not filecmp.cmp(addn_hosts_dc_temp, addn_hosts_dc): os.rename(addn_hosts_dc_temp, addn_hosts_dc) @@ -2651,27 +3005,32 @@ class SubcloudManager(manager.Manager): def _write_subcloud_ansible_config(self, cached_regionone_data, payload): """Create the override file for usage with the specified subcloud""" - overrides_file = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH, - payload['name'] + '.yml') + overrides_file = os.path.join( + dccommon_consts.ANSIBLE_OVERRIDES_PATH, payload["name"] + ".yml" + ) - mgmt_pool = cached_regionone_data['mgmt_pool'] + mgmt_pool = cached_regionone_data["mgmt_pool"] mgmt_floating_ip = mgmt_pool.floating_address mgmt_subnet = "%s/%d" % (mgmt_pool.network, mgmt_pool.prefix) - oam_addresses = cached_regionone_data['oam_addresses'] + oam_addresses = cached_regionone_data["oam_addresses"] oam_floating_ip = oam_addresses.oam_floating_ip oam_subnet = oam_addresses.oam_subnet - with open(overrides_file, 'w') as f_out_overrides_file: + with open(overrides_file, "w") as f_out_overrides_file: f_out_overrides_file.write( - '---' - '\nregion_config: yes' - '\ndistributed_cloud_role: subcloud' - '\nsystem_controller_subnet: ' + mgmt_subnet + - '\nsystem_controller_floating_address: ' + mgmt_floating_ip + - '\nsystem_controller_oam_subnet: ' + oam_subnet + - '\nsystem_controller_oam_floating_address: ' + oam_floating_ip - + '\n' + "---" + "\nregion_config: yes" + "\ndistributed_cloud_role: subcloud" + "\nsystem_controller_subnet: " + + mgmt_subnet + + "\nsystem_controller_floating_address: " + + mgmt_floating_ip + + "\nsystem_controller_oam_subnet: " + + oam_subnet + + "\nsystem_controller_oam_floating_address: " + + oam_floating_ip + + "\n" ) for k, v in payload.items(): @@ -2682,31 +3041,32 @@ class SubcloudManager(manager.Manager): """Create the deploy value files for the subcloud""" deploy_values_file = os.path.join( - dccommon_consts.ANSIBLE_OVERRIDES_PATH, - subcloud_name + '_deploy_values.yml' + dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + "_deploy_values.yml" ) - with open(deploy_values_file, 'w') as f_out_deploy_values_file: - json.dump(payload['deploy_values'], f_out_deploy_values_file) + with open(deploy_values_file, "w") as f_out_deploy_values_file: + json.dump(payload["deploy_values"], f_out_deploy_values_file) - def _prepare_for_deployment(self, payload, subcloud_name, - populate_passwords=True): - payload['deploy_values'] = dict() + def _prepare_for_deployment(self, payload, subcloud_name, populate_passwords=True): + payload["deploy_values"] = dict() if populate_passwords: - payload['deploy_values']['ansible_become_pass'] = \ - payload['sysadmin_password'] - payload['deploy_values']['ansible_ssh_pass'] = \ - payload['sysadmin_password'] - payload['deploy_values']['admin_password'] = \ - str(keyring.get_password('CGCS', 'admin')) - payload['deploy_values']['deployment_config'] = \ - payload[consts.DEPLOY_CONFIG] - payload['deploy_values']['deployment_manager_chart'] = \ - payload[consts.DEPLOY_CHART] - payload['deploy_values']['deployment_manager_overrides'] = \ - payload[consts.DEPLOY_OVERRIDES] - payload['deploy_values']['user_uploaded_artifacts'] = \ - payload["user_uploaded_artifacts"] + payload["deploy_values"]["ansible_become_pass"] = payload[ + "sysadmin_password" + ] + payload["deploy_values"]["ansible_ssh_pass"] = payload["sysadmin_password"] + payload["deploy_values"]["admin_password"] = str( + keyring.get_password("CGCS", "admin") + ) + payload["deploy_values"]["deployment_config"] = payload[consts.DEPLOY_CONFIG] + payload["deploy_values"]["deployment_manager_chart"] = payload[ + consts.DEPLOY_CHART + ] + payload["deploy_values"]["deployment_manager_overrides"] = payload[ + consts.DEPLOY_OVERRIDES + ] + payload["deploy_values"]["user_uploaded_artifacts"] = payload[ + "user_uploaded_artifacts" + ] self._write_deploy_files(payload, subcloud_name) def _delete_subcloud_routes(self, keystone_client, subcloud): @@ -2715,28 +3075,28 @@ class SubcloudManager(manager.Manager): # Delete the route to this subcloud on the management interface on # both controllers. management_subnet = netaddr.IPNetwork(subcloud.management_subnet) - endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv') + endpoint = keystone_client.endpoint_cache.get_endpoint("sysinv") sysinv_client = SysinvClient( dccommon_consts.DEFAULT_REGION_NAME, keystone_client.session, - endpoint=endpoint + endpoint=endpoint, ) cached_regionone_data = self._get_cached_regionone_data( - keystone_client, sysinv_client) - for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']: + keystone_client, sysinv_client + ) + for mgmt_if_uuid in cached_regionone_data["mgmt_interface_uuids"]: sysinv_client.delete_route( mgmt_if_uuid, str(management_subnet.ip), management_subnet.prefixlen, str(netaddr.IPAddress(subcloud.systemcontroller_gateway_ip)), - 1 + 1, ) @staticmethod def _delete_subcloud_cert(subcloud_region): cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_region) - secret_name = SubcloudManager._get_subcloud_cert_secret_name( - subcloud_region) + secret_name = SubcloudManager._get_subcloud_cert_secret_name(subcloud_region) kube = kubeoperator.KubeOperator() kube.delete_cert_manager_certificate(CERT_NAMESPACE, cert_name) @@ -2744,9 +3104,9 @@ class SubcloudManager(manager.Manager): kube.kube_delete_secret(secret_name, CERT_NAMESPACE) LOG.info("cert %s and secret %s are deleted" % (cert_name, secret_name)) - def _remove_subcloud_details(self, context, - subcloud, - ansible_subcloud_inventory_file): + def _remove_subcloud_details( + self, context, subcloud, ansible_subcloud_inventory_file + ): """Remove subcloud details from database and inform orchestrators""" # Inform orchestrators that subcloud has been deleted try: @@ -2805,15 +3165,19 @@ class SubcloudManager(manager.Manager): self._cleanup_ansible_files(subcloud.name) def _cleanup_ansible_files(self, subcloud_name): - LOG.info(f"Cleaning up subcloud {subcloud_name} files " - f"from {dccommon_consts.ANSIBLE_OVERRIDES_PATH}") + LOG.info( + f"Cleaning up subcloud {subcloud_name} files " + f"from {dccommon_consts.ANSIBLE_OVERRIDES_PATH}" + ) try: self._delete_subcloud_overrides_file(subcloud_name) self._delete_subcloud_config_files(subcloud_name) self._delete_subcloud_install_files(subcloud_name) except Exception: - LOG.exception("Unable to cleanup subcloud ansible files" - f" for subcloud: {subcloud_name}") + LOG.exception( + "Unable to cleanup subcloud ansible files " + f"for subcloud: {subcloud_name}" + ) @staticmethod def _delete_subcloud_overrides_file(subcloud_name): @@ -2830,8 +3194,9 @@ class SubcloudManager(manager.Manager): @staticmethod def _delete_subcloud_install_files(subcloud_name): - install_path = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH, - subcloud_name) + install_path = os.path.join( + dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + ) if os.path.exists(install_path): shutil.rmtree(install_path) @@ -2844,8 +3209,8 @@ class SubcloudManager(manager.Manager): ansible_file_list = os.listdir(ansible_path) log_file_list = os.listdir(log_path) - ansible_file_list = [ansible_path + '/' + x for x in ansible_file_list] - log_file_list = [log_path + '/' + x for x in log_file_list] + ansible_file_list = [ansible_path + "/" + x for x in ansible_file_list] + log_file_list = [log_path + "/" + x for x in log_file_list] for cur_file in ansible_file_list + log_file_list: new_file = cur_file.replace(cur_sc_name, new_sc_name) @@ -2854,26 +3219,31 @@ class SubcloudManager(manager.Manager): os.rename(cur_file, new_file) # Gets new ansible inventory file - ansible_inv_file = self._get_ansible_filename(new_sc_name, - INVENTORY_FILE_POSTFIX) + ansible_inv_file = self._get_ansible_filename( + new_sc_name, INVENTORY_FILE_POSTFIX + ) if os.path.exists(ansible_inv_file): # Updates inventory host param with the new subcloud name - with open(ansible_inv_file, 'r') as f: + with open(ansible_inv_file, "r") as f: data = yaml.safe_load(f) mkey = list(data.keys())[0] - if mkey in data and 'hosts' in data[mkey] and \ - cur_sc_name in data[mkey]['hosts']: - data[mkey]['hosts'][new_sc_name] = \ - data[mkey]['hosts'].pop(cur_sc_name) + if ( + mkey in data + and "hosts" in data[mkey] + and cur_sc_name in data[mkey]["hosts"] + ): + data[mkey]["hosts"][new_sc_name] = data[mkey]["hosts"].pop(cur_sc_name) - with open(ansible_inv_file, 'w') as f: + with open(ansible_inv_file, "w") as f: yaml.dump(data, f, sort_keys=False) else: - msg = ("Could not rename inventory file %s because it does not " - "exist." % ansible_inv_file) + msg = ( + "Could not rename inventory file %s because it does not exist." + % ansible_inv_file + ) LOG.warn(msg) @staticmethod @@ -2897,23 +3267,26 @@ class SubcloudManager(manager.Manager): # - subloud resource out of sync # - Subcloud Backup Failure for alarm_id, entity_instance_id in ( - (fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE, - "subcloud=%s" % subcloud.name), - (fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, - "subcloud=%s.resource=%s" % - (subcloud.name, dccommon_consts.ENDPOINT_TYPE_DC_CERT)), - (fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, - "subcloud=%s" % subcloud.name)): + (fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE, "subcloud=%s" % subcloud.name), + ( + fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, + "subcloud=%s.resource=%s" + % (subcloud.name, dccommon_consts.ENDPOINT_TYPE_DC_CERT), + ), + ( + fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, + "subcloud=%s" % subcloud.name, + ), + ): try: - fault = self.fm_api.get_fault(alarm_id, - entity_instance_id) + fault = self.fm_api.get_fault(alarm_id, entity_instance_id) if fault: - self.fm_api.clear_fault(alarm_id, - entity_instance_id) + self.fm_api.clear_fault(alarm_id, entity_instance_id) except Exception as e: LOG.info( - "Problem clearing fault for subcloud %s, alarm_id=%s" % - (subcloud.name, alarm_id)) + "Problem clearing fault for subcloud %s, alarm_id=%s" + % (subcloud.name, alarm_id) + ) LOG.exception(e) def delete_subcloud(self, context, subcloud_id): @@ -2931,25 +3304,23 @@ class SubcloudManager(manager.Manager): if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED: raise exceptions.SubcloudNotUnmanaged() - if subcloud.availability_status == \ - dccommon_consts.AVAILABILITY_ONLINE: + if subcloud.availability_status == dccommon_consts.AVAILABILITY_ONLINE: raise exceptions.SubcloudNotOffline() # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = self._get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) + subcloud.name, INVENTORY_FILE_POSTFIX + ) - self._remove_subcloud_details(context, - subcloud, - ansible_subcloud_inventory_file) + self._remove_subcloud_details( + context, subcloud, ansible_subcloud_inventory_file + ) self._clear_subcloud_alarms(subcloud) - def rename_subcloud(self, - context, - subcloud_id, - curr_subcloud_name, - new_subcloud_name=None): + def rename_subcloud( + self, context, subcloud_id, curr_subcloud_name, new_subcloud_name=None + ): """Rename subcloud. :param context: request context object. @@ -2958,9 +3329,9 @@ class SubcloudManager(manager.Manager): :param new_subcloud_name: new subcloud name """ try: - subcloud = db_api. \ - subcloud_get_by_name_or_region_name(context, - new_subcloud_name) + subcloud = db_api.subcloud_get_by_name_or_region_name( + context, new_subcloud_name + ) except exceptions.SubcloudNameOrRegionNameNotFound: pass else: @@ -2970,14 +3341,13 @@ class SubcloudManager(manager.Manager): # Therefore it is not allowed to change the name. if subcloud_id != subcloud.id: raise exceptions.SubcloudOrRegionNameAlreadyExists( - name=new_subcloud_name) + name=new_subcloud_name + ) # updates subcloud name - subcloud = db_api.subcloud_update(context, subcloud_id, - name=new_subcloud_name) + subcloud = db_api.subcloud_update(context, subcloud_id, name=new_subcloud_name) # updates subcloud names on alarms - db_api.subcloud_rename_alarms(context, curr_subcloud_name, - new_subcloud_name) + db_api.subcloud_rename_alarms(context, curr_subcloud_name, new_subcloud_name) # Deletes subcloud alarms entity_instance_id = "subcloud=%s" % curr_subcloud_name self.fm_api.clear_all(entity_instance_id) @@ -2986,23 +3356,21 @@ class SubcloudManager(manager.Manager): self._create_addn_hosts_dc(context) # Rename related subcloud files - self._rename_subcloud_ansible_files(curr_subcloud_name, - new_subcloud_name) + self._rename_subcloud_ansible_files(curr_subcloud_name, new_subcloud_name) # Update the subcloud rehome_data with the new name if subcloud.rehome_data: rehome_data_dict = json.loads(subcloud.rehome_data) - if 'saved_payload' in rehome_data_dict: - rehome_data_dict['saved_payload']['name'] = new_subcloud_name + if "saved_payload" in rehome_data_dict: + rehome_data_dict["saved_payload"]["name"] = new_subcloud_name rehome_data = json.dumps(rehome_data_dict) - subcloud = db_api.subcloud_update(context, subcloud_id, - rehome_data=rehome_data) + subcloud = db_api.subcloud_update( + context, subcloud_id, rehome_data=rehome_data + ) return subcloud - def get_subcloud_name_by_region_name(self, - context, - subcloud_region): + def get_subcloud_name_by_region_name(self, context, subcloud_region): subcloud_name = None if subcloud_region is not None: sc = db_api.subcloud_get_by_region_name(context, subcloud_region) @@ -3010,9 +3378,13 @@ class SubcloudManager(manager.Manager): return subcloud_name - def _validate_management_state_update(self, new_management_state: str, - new_deploy_status: str, - subcloud: Subcloud, force: bool): + def _validate_management_state_update( + self, + new_management_state: str, + new_deploy_status: str, + subcloud: Subcloud, + force: bool, + ): if new_management_state == dccommon_consts.MANAGEMENT_UNMANAGED: if subcloud.management_state == dccommon_consts.MANAGEMENT_UNMANAGED: msg = f"Subcloud {subcloud.name} already unmanaged" @@ -3034,17 +3406,21 @@ class SubcloudManager(manager.Manager): and new_deploy_status == consts.DEPLOY_STATE_DONE ) - if (subcloud.deploy_status != consts.DEPLOY_STATE_DONE and - not allowed_deploy_transition): - msg = (f"Unable to manage {subcloud.name}: its deploy_status " - f"must be either '{consts.DEPLOY_STATE_DONE}' or " - f"'{consts.DEPLOY_STATE_REHOME_PENDING}'") + if ( + subcloud.deploy_status != consts.DEPLOY_STATE_DONE + and not allowed_deploy_transition + ): + msg = ( + f"Unable to manage {subcloud.name}: its deploy_status " + f"must be either '{consts.DEPLOY_STATE_DONE}' or " + f"'{consts.DEPLOY_STATE_REHOME_PENDING}'" + ) LOG.warning(msg) raise exceptions.BadRequest(resource="subcloud", msg=msg) - if (subcloud.availability_status != - dccommon_consts.AVAILABILITY_ONLINE) and ( - subcloud.deploy_status != consts.DEPLOY_STATE_REHOME_PENDING): + if ( + subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE + ) and (subcloud.deploy_status != consts.DEPLOY_STATE_REHOME_PENDING): LOG.warning(f"Subcloud {subcloud.name} is not online") raise exceptions.SubcloudNotOnline() @@ -3054,8 +3430,9 @@ class SubcloudManager(manager.Manager): LOG.error(f"Invalid management_state {new_management_state}") raise exceptions.InvalidInputError() - def _prepare_rehome_data(self, subcloud: Subcloud, - bootstrap_values, bootstrap_address): + def _prepare_rehome_data( + self, subcloud: Subcloud, bootstrap_values, bootstrap_address + ): rehome_data_dict = None # load the existing data if it exists if subcloud.rehome_data: @@ -3069,65 +3446,68 @@ class SubcloudManager(manager.Manager): # Since bootstrap-address is not original data in bootstrap-values # it's necessary to save it first, then put it back after # after bootstrap_values is updated. - if 'bootstrap-address' in rehome_data_dict['saved_payload']: - _bootstrap_address = \ - rehome_data_dict['saved_payload']['bootstrap-address'] - bootstrap_values_dict = yaml.load( - bootstrap_values, Loader=yaml.SafeLoader - ) + if "bootstrap-address" in rehome_data_dict["saved_payload"]: + _bootstrap_address = rehome_data_dict["saved_payload"][ + "bootstrap-address" + ] + bootstrap_values_dict = yaml.load(bootstrap_values, Loader=yaml.SafeLoader) # remove sysadmin_password,ansible_ssh_pass,ansible_become_pass # encode admin_password - if 'sysadmin_password' in bootstrap_values_dict: - del bootstrap_values_dict['sysadmin_password'] - if 'ansible_ssh_pass' in bootstrap_values_dict: - del bootstrap_values_dict['ansible_ssh_pass'] - if 'ansible_become_pass' in bootstrap_values_dict: - del bootstrap_values_dict['ansible_become_pass'] - if 'admin_password' in bootstrap_values_dict: - bootstrap_values_dict['admin_password'] = base64.b64encode( - bootstrap_values_dict['admin_password'].encode("utf-8") - ).decode('utf-8') - rehome_data_dict['saved_payload'] = bootstrap_values_dict + if "sysadmin_password" in bootstrap_values_dict: + del bootstrap_values_dict["sysadmin_password"] + if "ansible_ssh_pass" in bootstrap_values_dict: + del bootstrap_values_dict["ansible_ssh_pass"] + if "ansible_become_pass" in bootstrap_values_dict: + del bootstrap_values_dict["ansible_become_pass"] + if "admin_password" in bootstrap_values_dict: + bootstrap_values_dict["admin_password"] = base64.b64encode( + bootstrap_values_dict["admin_password"].encode("utf-8") + ).decode("utf-8") + rehome_data_dict["saved_payload"] = bootstrap_values_dict # put bootstrap_address back into rehome_data_dict if _bootstrap_address: - rehome_data_dict['saved_payload'][ - 'bootstrap-address'] = _bootstrap_address + rehome_data_dict["saved_payload"][ + "bootstrap-address" + ] = _bootstrap_address # update bootstrap_address if bootstrap_address: if rehome_data_dict is None: raise exceptions.BadRequest( - resource='subcloud', - msg='Cannot update bootstrap_address into rehome data, ' - 'need to import bootstrap_values first') - rehome_data_dict['saved_payload'][ - 'bootstrap-address'] = bootstrap_address + resource="subcloud", + msg=( + "Cannot update bootstrap_address into rehome data, " + "need to import bootstrap_values first" + ), + ) + rehome_data_dict["saved_payload"]["bootstrap-address"] = bootstrap_address rehome_data = None systemcontroller_gateway_address = None if rehome_data_dict: rehome_data = json.dumps(rehome_data_dict) - systemcontroller_gateway_address = \ - rehome_data_dict['saved_payload'].get( - "systemcontroller_gateway_address" - ) + systemcontroller_gateway_address = rehome_data_dict["saved_payload"].get( + "systemcontroller_gateway_address" + ) return rehome_data, systemcontroller_gateway_address - def update_subcloud(self, - context, - subcloud_id, - management_state=None, - description=None, - location=None, - group_id=None, - data_install=None, - force=None, - deploy_status=None, - peer_group_id=None, - bootstrap_values=None, - bootstrap_address=None): + def update_subcloud( + self, + context, + subcloud_id, + management_state=None, + description=None, + location=None, + group_id=None, + data_install=None, + force=None, + deploy_status=None, + peer_group_id=None, + bootstrap_values=None, + bootstrap_address=None, + ): """Update subcloud and notify orchestrators. :param context: request context object @@ -3153,14 +3533,17 @@ class SubcloudManager(manager.Manager): # When trying to manage a 'rehome-pending' subcloud, revert its deploy # status back to 'complete' if its not specified - if (management_state == dccommon_consts.MANAGEMENT_MANAGED and - subcloud.deploy_status == consts.DEPLOY_STATE_REHOME_PENDING and - not deploy_status): + if ( + management_state == dccommon_consts.MANAGEMENT_MANAGED + and subcloud.deploy_status == consts.DEPLOY_STATE_REHOME_PENDING + and not deploy_status + ): deploy_status = consts.DEPLOY_STATE_DONE # management_state semantic checking - self._validate_management_state_update(management_state, deploy_status, - subcloud, force) + self._validate_management_state_update( + management_state, deploy_status, subcloud, force + ) # Update bootstrap values into rehome_data rehome_data, systemcontroller_gateway_ip = self._prepare_rehome_data( @@ -3171,23 +3554,30 @@ class SubcloudManager(manager.Manager): msg = None # Only update deploy_status if subcloud is or will be unmanaged if dccommon_consts.MANAGEMENT_UNMANAGED not in ( - management_state, subcloud.management_state): - msg = ("Unable to update deploy_status of subcloud " - f"{subcloud.name} to {deploy_status}: subcloud " - "must also be unmanaged") + management_state, + subcloud.management_state, + ): + msg = ( + f"Unable to update deploy_status of subcloud {subcloud.name} " + f"to {deploy_status}: subcloud must also be unmanaged" + ) # Only allow managing if the deploy status is also set to 'complete' - if (management_state == dccommon_consts.MANAGEMENT_MANAGED and - deploy_status != consts.DEPLOY_STATE_DONE): - msg = (f"Unable to manage {subcloud.name} while also updating " - f"its deploy_status to {deploy_status}: not allowed") + if ( + management_state == dccommon_consts.MANAGEMENT_MANAGED + and deploy_status != consts.DEPLOY_STATE_DONE + ): + msg = ( + f"Unable to manage {subcloud.name} while also updating " + f"its deploy_status to {deploy_status}: not allowed" + ) if msg: LOG.warning(msg) - raise exceptions.BadRequest(resource='subcloud', msg=msg) + raise exceptions.BadRequest(resource="subcloud", msg=msg) # Update route if the systemcontroller_gateway_ip has been updated if ( - systemcontroller_gateway_ip is not None and - systemcontroller_gateway_ip != subcloud.systemcontroller_gateway_ip + systemcontroller_gateway_ip is not None + and systemcontroller_gateway_ip != subcloud.systemcontroller_gateway_ip ): m_ks_client = OpenStackDriver( region_name=dccommon_consts.DEFAULT_REGION_NAME, @@ -3195,8 +3585,9 @@ class SubcloudManager(manager.Manager): fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips, ).keystone_client self._create_subcloud_route( - {'management_subnet': subcloud.management_subnet}, - m_ks_client, systemcontroller_gateway_ip + {"management_subnet": subcloud.management_subnet}, + m_ks_client, + systemcontroller_gateway_ip, ) # Deletes old routes (subcloud obj holds old gateway ip) self._delete_subcloud_routes(m_ks_client, subcloud) @@ -3212,7 +3603,7 @@ class SubcloudManager(manager.Manager): deploy_status=deploy_status, peer_group_id=peer_group_id, rehome_data=rehome_data, - systemcontroller_gateway_ip=systemcontroller_gateway_ip + systemcontroller_gateway_ip=systemcontroller_gateway_ip, ) # Inform orchestrators that subcloud has been updated @@ -3223,29 +3614,34 @@ class SubcloudManager(manager.Manager): context, subcloud.region_name, management_state, - subcloud.availability_status) + subcloud.availability_status, + ) - LOG.info('Notifying dcorch, subcloud:%s management: %s, ' - 'availability:%s' % (subcloud.name, - management_state, - subcloud.availability_status)) + LOG.info( + "Notifying dcorch, subcloud:%s management: %s, availability:%s" + % (subcloud.name, management_state, subcloud.availability_status) + ) except Exception as e: LOG.exception(e) - LOG.warn('Problem informing dcorch of subcloud ' - 'state change, resume to original state, subcloud: %s' - % subcloud.name) + LOG.warn( + "Problem informing dcorch of subcloud " + "state change, resume to original state, subcloud: %s" + % subcloud.name + ) management_state = original_management_state # Also revert the deploy_status otherwise we could have a # managed subcloud with the 'secondary' or other invalid deploy # status/management state combination. deploy_status = original_deploy_status - subcloud = \ - db_api.subcloud_update(context, subcloud_id, - management_state=management_state, - description=description, - location=location, - deploy_status=deploy_status) + subcloud = db_api.subcloud_update( + context, + subcloud_id, + management_state=management_state, + description=description, + location=location, + deploy_status=deploy_status, + ) if management_state == dccommon_consts.MANAGEMENT_UNMANAGED: # set all endpoint statuses to unknown, except the dc-cert @@ -3256,8 +3652,8 @@ class SubcloudManager(manager.Manager): # Do not ignore the dc-cert endpoint for secondary or rehome # pending subclouds as cert-mon does not audit them if subcloud.deploy_status in ( - consts.DEPLOY_STATE_SECONDARY, - consts.DEPLOY_STATE_REHOME_PENDING + consts.DEPLOY_STATE_SECONDARY, + consts.DEPLOY_STATE_REHOME_PENDING, ): ignore_endpoints = None @@ -3267,11 +3663,12 @@ class SubcloudManager(manager.Manager): subcloud_region=subcloud.region_name, endpoint_type=None, sync_status=dccommon_consts.SYNC_STATUS_UNKNOWN, - ignore_endpoints=ignore_endpoints) + ignore_endpoints=ignore_endpoints, + ) elif management_state == dccommon_consts.MANAGEMENT_MANAGED: # Subcloud is managed # Tell cert-mon to audit endpoint certificate - LOG.info('Request certmon audit for %s' % subcloud.name) + LOG.info("Request certmon audit for %s" % subcloud.name) dc_notification = dcmanager_rpc_client.DCManagerNotifications() dc_notification.subcloud_managed(context, subcloud.region_name) @@ -3283,7 +3680,8 @@ class SubcloudManager(manager.Manager): context, subcloud.name, subcloud.region_name, - dccommon_consts.AVAILABILITY_OFFLINE) + dccommon_consts.AVAILABILITY_OFFLINE, + ) # Clear existing fault alarm of secondary subcloud if subcloud.deploy_status == consts.DEPLOY_STATE_SECONDARY: @@ -3294,40 +3692,53 @@ class SubcloudManager(manager.Manager): def update_subcloud_with_network_reconfig(self, context, subcloud_id, payload): subcloud = db_api.subcloud_get(context, subcloud_id) subcloud = db_api.subcloud_update( - context, subcloud.id, - deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK + context, + subcloud.id, + deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK, ) - subcloud_name = payload['name'] + subcloud_name = payload["name"] try: self._create_intermediate_ca_cert(payload) subcloud_inventory_file = self._get_ansible_filename( - subcloud_name, INVENTORY_FILE_POSTFIX) - subcloud_params = {'name': subcloud_name, - 'bootstrap-address': payload.get('bootstrap_address')} + subcloud_name, INVENTORY_FILE_POSTFIX + ) + subcloud_params = { + "name": subcloud_name, + "bootstrap-address": payload.get("bootstrap_address"), + } utils.create_subcloud_inventory(subcloud_params, subcloud_inventory_file) overrides_file = self._create_subcloud_update_overrides_file( - payload, subcloud_name, 'update_values') + payload, subcloud_name, "update_values" + ) update_command = self.compose_update_command( - subcloud_name, subcloud_inventory_file, subcloud.software_version) + subcloud_name, subcloud_inventory_file, subcloud.software_version + ) except Exception: - LOG.exception( - "Failed to prepare subcloud %s for update." % subcloud_name) + LOG.exception("Failed to prepare subcloud %s for update." % subcloud_name) return try: apply_thread = threading.Thread( target=self._run_network_reconfiguration, - args=(subcloud_name, update_command, overrides_file, - payload, context, subcloud)) + args=( + subcloud_name, + update_command, + overrides_file, + payload, + context, + subcloud, + ), + ) apply_thread.start() except Exception: LOG.exception("Failed to update subcloud %s" % subcloud_name) def _run_network_reconfiguration( - self, subcloud_name, update_command, overrides_file, - payload, context, subcloud + self, subcloud_name, update_command, overrides_file, payload, context, subcloud ): - log_file = (os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud_name) + - '_playbook_output.log') + log_file = ( + os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud_name) + + "_playbook_output.log" + ) subcloud_id = subcloud.id try: ansible = AnsiblePlaybook(subcloud_name) @@ -3335,12 +3746,14 @@ class SubcloudManager(manager.Manager): utils.delete_subcloud_inventory(overrides_file) except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( - subcloud_name, log_file, consts.DEPLOY_STATE_RECONFIGURING_NETWORK) + subcloud_name, log_file, consts.DEPLOY_STATE_RECONFIGURING_NETWORK + ) LOG.error(msg) db_api.subcloud_update( - context, subcloud_id, + context, + subcloud_id, deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED, - error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH] + error_description=msg[0 : consts.ERROR_DESCRIPTION_LENGTH], ) return @@ -3353,21 +3766,22 @@ class SubcloudManager(manager.Manager): subcloud = db_api.subcloud_update( context, subcloud_id, - description=payload.get('description', subcloud.description), - management_subnet=payload.get('management_subnet'), - management_gateway_ip=payload.get('management_gateway_ip'), - management_start_ip=payload.get('management_start_ip'), - management_end_ip=payload.get('management_end_ip'), - location=payload.get('location', subcloud.location), - group_id=payload.get('group_id', subcloud.group_id), - data_install=payload.get('data_install', subcloud.data_install) + description=payload.get("description", subcloud.description), + management_subnet=payload.get("management_subnet"), + management_gateway_ip=payload.get("management_gateway_ip"), + management_start_ip=payload.get("management_start_ip"), + management_end_ip=payload.get("management_end_ip"), + location=payload.get("location", subcloud.location), + group_id=payload.get("group_id", subcloud.group_id), + data_install=payload.get("data_install", subcloud.data_install), ) # Regenerate the addn_hosts_dc file self._create_addn_hosts_dc(context) - def _configure_system_controller_network(self, context, payload, subcloud, - update_db=True): + def _configure_system_controller_network( + self, context, payload, subcloud, update_db=True + ): """Configure system controller network :param context: request context object @@ -3378,8 +3792,9 @@ class SubcloudManager(manager.Manager): subcloud_name = subcloud.name subcloud_region = subcloud.region_name subcloud_id = subcloud.id - sys_controller_gw_ip = payload.get("systemcontroller_gateway_address", - subcloud.systemcontroller_gateway_ip) + sys_controller_gw_ip = payload.get( + "systemcontroller_gateway_address", subcloud.systemcontroller_gateway_ip + ) try: m_ks_client = OpenStackDriver( @@ -3387,28 +3802,29 @@ class SubcloudManager(manager.Manager): region_clients=None, fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips, ).keystone_client - self._create_subcloud_route(payload, m_ks_client, - sys_controller_gw_ip) + self._create_subcloud_route(payload, m_ks_client, sys_controller_gw_ip) except Exception: - LOG.exception( - "Failed to create route to subcloud %s." % subcloud_name) + LOG.exception("Failed to create route to subcloud %s." % subcloud_name) if update_db: db_api.subcloud_update( - context, subcloud_id, + context, + subcloud_id, deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED, - error_description=consts.ERROR_DESC_EMPTY + error_description=consts.ERROR_DESC_EMPTY, ) return try: self._update_services_endpoint( - context, payload, subcloud_region, m_ks_client) + context, payload, subcloud_region, m_ks_client + ) except Exception: LOG.exception("Failed to update subcloud %s endpoints" % subcloud_name) if update_db: db_api.subcloud_update( - context, subcloud_id, + context, + subcloud_id, deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED, - error_description=consts.ERROR_DESC_EMPTY + error_description=consts.ERROR_DESC_EMPTY, ) return @@ -3416,24 +3832,29 @@ class SubcloudManager(manager.Manager): if utils.get_management_subnet(payload) != subcloud.management_subnet: self._delete_subcloud_routes(m_ks_client, subcloud) - def _create_subcloud_route(self, payload, keystone_client, - systemcontroller_gateway_ip): + def _create_subcloud_route( + self, payload, keystone_client, systemcontroller_gateway_ip + ): subcloud_subnet = netaddr.IPNetwork(utils.get_management_subnet(payload)) - endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv') - sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, - keystone_client.session, - endpoint=endpoint) + endpoint = keystone_client.endpoint_cache.get_endpoint("sysinv") + sysinv_client = SysinvClient( + dccommon_consts.DEFAULT_REGION_NAME, + keystone_client.session, + endpoint=endpoint, + ) cached_regionone_data = self._get_cached_regionone_data( - keystone_client, sysinv_client) - for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']: - sysinv_client.create_route(mgmt_if_uuid, - str(subcloud_subnet.ip), - subcloud_subnet.prefixlen, - systemcontroller_gateway_ip, - 1) + keystone_client, sysinv_client + ) + for mgmt_if_uuid in cached_regionone_data["mgmt_interface_uuids"]: + sysinv_client.create_route( + mgmt_if_uuid, + str(subcloud_subnet.ip), + subcloud_subnet.prefixlen, + systemcontroller_gateway_ip, + 1, + ) - def _update_services_endpoint( - self, context, payload, subcloud_region, m_ks_client): + def _update_services_endpoint(self, context, payload, subcloud_region, m_ks_client): ip = utils.get_management_start_address(payload) formatted_ip = f"[{ip}]" if netaddr.IPAddress(ip).version == 6 else ip @@ -3447,38 +3868,46 @@ class SubcloudManager(manager.Manager): } for endpoint in m_ks_client.keystone_client.endpoints.list( - region=subcloud_region): + region=subcloud_region + ): service_type = m_ks_client.keystone_client.services.get( - endpoint.service_id).type + endpoint.service_id + ).type if service_type == dccommon_consts.ENDPOINT_TYPE_PLATFORM: - admin_endpoint_url = services_endpoints.get('sysinv') + admin_endpoint_url = services_endpoints.get("sysinv") elif service_type == dccommon_consts.ENDPOINT_TYPE_IDENTITY: - admin_endpoint_url = services_endpoints.get('keystone') + admin_endpoint_url = services_endpoints.get("keystone") elif service_type == dccommon_consts.ENDPOINT_TYPE_PATCHING: - admin_endpoint_url = services_endpoints.get('patching') + admin_endpoint_url = services_endpoints.get("patching") elif service_type == dccommon_consts.ENDPOINT_TYPE_FM: - admin_endpoint_url = services_endpoints.get('fm') + admin_endpoint_url = services_endpoints.get("fm") elif service_type == dccommon_consts.ENDPOINT_TYPE_NFV: - admin_endpoint_url = services_endpoints.get('vim') + admin_endpoint_url = services_endpoints.get("vim") elif service_type == dccommon_consts.ENDPOINT_TYPE_SOFTWARE: - admin_endpoint_url = services_endpoints.get('usm') + admin_endpoint_url = services_endpoints.get("usm") else: LOG.exception("Endpoint Type Error: %s" % service_type) m_ks_client.keystone_client.endpoints.update( - endpoint, url=admin_endpoint_url) + endpoint, url=admin_endpoint_url + ) - LOG.info("Update services endpoint to %s in subcloud region %s" % ( - formatted_ip, subcloud_region)) + LOG.info( + "Update services endpoint to %s in subcloud region %s" + % (formatted_ip, subcloud_region) + ) # Update service URLs in subcloud endpoint cache self.audit_rpc_client.trigger_subcloud_endpoints_update( - context, subcloud_region, services_endpoints) + context, subcloud_region, services_endpoints + ) # Update the management ip inside dcorch database (triggers endpoint update) self.dcorch_rpc_client.update_subcloud_management_ip( - context, subcloud_region, ip) + context, subcloud_region, ip + ) # Update sysinv URL in cert-mon cache dc_notification = dcmanager_rpc_client.DCManagerNotifications() dc_notification.subcloud_sysinv_endpoint_update( - context, subcloud_region, services_endpoints.get("sysinv")) + context, subcloud_region, services_endpoints.get("sysinv") + ) # Update dcmanager endpoint cache EndpointCache.update_master_service_endpoint_region( @@ -3486,18 +3915,19 @@ class SubcloudManager(manager.Manager): ) def _create_subcloud_update_overrides_file( - self, payload, subcloud_name, filename_suffix): + self, payload, subcloud_name, filename_suffix + ): update_overrides_file = os.path.join( dccommon_consts.ANSIBLE_OVERRIDES_PATH, - subcloud_name + '_' + filename_suffix + '.yml' + subcloud_name + "_" + filename_suffix + ".yml", ) self._update_override_values(payload) - with open(update_overrides_file, 'w', encoding='UTF-8') as f_out: - f_out.write('---\n') - for key, value in payload['override_values'].items(): - if key in ['ansible_ssh_pass', 'ansible_become_pass']: + with open(update_overrides_file, "w", encoding="UTF-8") as f_out: + f_out.write("---\n") + for key, value in payload["override_values"].items(): + if key in ["ansible_ssh_pass", "ansible_become_pass"]: f_out.write(f"{key}: {value}\n") else: f_out.write(f"{key}: {json.dumps(value)}\n") @@ -3505,70 +3935,74 @@ class SubcloudManager(manager.Manager): return update_overrides_file def _update_override_values(self, payload): - if not payload.get('override_values'): - payload['override_values'] = {} + if not payload.get("override_values"): + payload["override_values"] = {} - payload['override_values']['ansible_ssh_pass'] = ( - payload['sysadmin_password']) - payload['override_values']['ansible_become_pass'] = ( - payload['sysadmin_password']) + payload["override_values"]["ansible_ssh_pass"] = payload["sysadmin_password"] + payload["override_values"]["ansible_become_pass"] = payload["sysadmin_password"] - payload['override_values']['sc_gateway_address'] = ( - payload['management_gateway_ip']) - payload['override_values']['sc_floating_address'] = ( - payload['management_start_ip']) - payload['override_values']['system_controller_network'] = ( - payload['system_controller_network']) - payload['override_values']['system_controller_network_prefix'] = ( - payload['system_controller_network_prefix']) - payload['override_values']['sc_subnet'] = payload['management_subnet'] + payload["override_values"]["sc_gateway_address"] = payload[ + "management_gateway_ip" + ] + payload["override_values"]["sc_floating_address"] = payload[ + "management_start_ip" + ] + payload["override_values"]["system_controller_network"] = payload[ + "system_controller_network" + ] + payload["override_values"]["system_controller_network_prefix"] = payload[ + "system_controller_network_prefix" + ] + payload["override_values"]["sc_subnet"] = payload["management_subnet"] - payload['override_values']['dc_root_ca_cert'] = payload['dc_root_ca_cert'] - payload['override_values']['sc_ca_cert'] = payload['sc_ca_cert'] - payload['override_values']['sc_ca_key'] = payload['sc_ca_key'] + payload["override_values"]["dc_root_ca_cert"] = payload["dc_root_ca_cert"] + payload["override_values"]["sc_ca_cert"] = payload["sc_ca_cert"] + payload["override_values"]["sc_ca_key"] = payload["sc_ca_key"] - def update_subcloud_sync_endpoint_type(self, context, - subcloud_region, - endpoint_type_list, - openstack_installed): - operation = 'add' if openstack_installed else 'remove' + def update_subcloud_sync_endpoint_type( + self, context, subcloud_region, endpoint_type_list, openstack_installed + ): + operation = "add" if openstack_installed else "remove" func_switcher = { - 'add': ( + "add": ( self.dcorch_rpc_client.add_subcloud_sync_endpoint_type, - db_api.subcloud_status_create + db_api.subcloud_status_create, ), - 'remove': ( + "remove": ( self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type, - db_api.subcloud_status_delete - ) + db_api.subcloud_status_delete, + ), } try: subcloud = db_api.subcloud_get_by_region_name(context, subcloud_region) except Exception: - LOG.exception( - "Failed to get subcloud by region name: %s" % subcloud_region - ) + LOG.exception("Failed to get subcloud by region name: %s" % subcloud_region) raise try: # Notify dcorch to add/remove sync endpoint type list - func_switcher[operation][0](self.context, subcloud_region, - endpoint_type_list) - LOG.info('Notifying dcorch, subcloud: %s new sync endpoint: %s' % - (subcloud.name, endpoint_type_list)) + func_switcher[operation][0]( + self.context, subcloud_region, endpoint_type_list + ) + LOG.info( + "Notifying dcorch, subcloud: %s new sync endpoint: %s" + % (subcloud.name, endpoint_type_list) + ) # Update subcloud status table by adding/removing openstack sync # endpoint types for endpoint_type in endpoint_type_list: - func_switcher[operation][1](self.context, subcloud.id, - endpoint_type) + func_switcher[operation][1](self.context, subcloud.id, endpoint_type) # Update openstack_installed of subcloud table - db_api.subcloud_update(self.context, subcloud.id, - openstack_installed=openstack_installed) + db_api.subcloud_update( + self.context, subcloud.id, openstack_installed=openstack_installed + ) except Exception: - LOG.exception('Problem informing dcorch of subcloud sync endpoint' - ' type change, subcloud region: %s' % subcloud_region) + LOG.exception( + "Problem informing dcorch of subcloud sync endpoint " + "type change, subcloud region: %s" % subcloud_region + ) def handle_subcloud_operations_in_progress(self): """Identify subclouds in transitory stages and update subcloud @@ -3576,7 +4010,7 @@ class SubcloudManager(manager.Manager): state to failure. """ - LOG.info('Identifying subclouds in transitory stages.') + LOG.info("Identifying subclouds in transitory stages.") subclouds = db_api.subcloud_get_all(self.context) @@ -3585,32 +4019,35 @@ class SubcloudManager(manager.Manager): new_deploy_status = TRANSITORY_STATES.get(subcloud.deploy_status) new_backup_status = TRANSITORY_BACKUP_STATES.get(subcloud.backup_status) new_prestage_status = TRANSITORY_PRESTAGE_STATES.get( - subcloud.prestage_status) + subcloud.prestage_status + ) # update deploy, backup and prestage states to # the corresponding failure states if new_deploy_status or new_backup_status or new_prestage_status: if new_deploy_status: - LOG.info("Changing subcloud %s deploy status from %s to %s." - % (subcloud.name, subcloud.deploy_status, - new_deploy_status)) + LOG.info( + "Changing subcloud %s deploy status from %s to %s." + % (subcloud.name, subcloud.deploy_status, new_deploy_status) + ) if new_backup_status: - LOG.info("Changing subcloud %s backup status from %s to %s." - % (subcloud.name, subcloud.backup_status, - new_backup_status)) + LOG.info( + "Changing subcloud %s backup status from %s to %s." + % (subcloud.name, subcloud.backup_status, new_backup_status) + ) if new_prestage_status: - LOG.info("Changing subcloud %s prestage status from %s" - " to %s." - % (subcloud.name, subcloud.prestage_status, - new_prestage_status)) + LOG.info( + "Changing subcloud %s prestage status from %s to %s." + % (subcloud.name, subcloud.prestage_status, new_prestage_status) + ) db_api.subcloud_update( self.context, subcloud.id, deploy_status=new_deploy_status or subcloud.deploy_status, backup_status=new_backup_status or subcloud.backup_status, - prestage_status=new_prestage_status or subcloud.prestage_status + prestage_status=new_prestage_status or subcloud.prestage_status, ) @staticmethod @@ -3620,86 +4057,98 @@ class SubcloudManager(manager.Manager): @utils.synchronized("regionone-data-cache", external=False) def _get_cached_regionone_data( - self, regionone_keystone_client, regionone_sysinv_client=None): - if (not SubcloudManager.regionone_data or - SubcloudManager.regionone_data['expiry'] <= - datetime.datetime.utcnow()): + self, regionone_keystone_client, regionone_sysinv_client=None + ): + if ( + not SubcloudManager.regionone_data + or SubcloudManager.regionone_data["expiry"] <= datetime.datetime.utcnow() + ): user_list = regionone_keystone_client.get_enabled_users(id_only=False) for user in user_list: if user.name == dccommon_consts.ADMIN_USER_NAME: - SubcloudManager.regionone_data['admin_user_id'] = user.id + SubcloudManager.regionone_data["admin_user_id"] = user.id elif user.name == dccommon_consts.SYSINV_USER_NAME: - SubcloudManager.regionone_data['sysinv_user_id'] = user.id + SubcloudManager.regionone_data["sysinv_user_id"] = user.id elif user.name == dccommon_consts.DCMANAGER_USER_NAME: - SubcloudManager.regionone_data['dcmanager_user_id'] = user.id + SubcloudManager.regionone_data["dcmanager_user_id"] = user.id - project_list = regionone_keystone_client.get_enabled_projects( - id_only=False) + project_list = regionone_keystone_client.get_enabled_projects(id_only=False) for project in project_list: if project.name == dccommon_consts.ADMIN_PROJECT_NAME: - SubcloudManager.regionone_data['admin_project_id'] = project.id + SubcloudManager.regionone_data["admin_project_id"] = project.id elif project.name == dccommon_consts.SERVICES_USER_NAME: - SubcloudManager.regionone_data['services_project_id'] = \ - project.id + SubcloudManager.regionone_data["services_project_id"] = project.id if regionone_sysinv_client is None: endpoint = regionone_keystone_client.endpoint_cache.get_endpoint( - 'sysinv') + "sysinv" + ) regionone_sysinv_client = SysinvClient( dccommon_consts.DEFAULT_REGION_NAME, regionone_keystone_client.session, - endpoint=endpoint) + endpoint=endpoint, + ) controllers = regionone_sysinv_client.get_controller_hosts() mgmt_interface_uuids = [] for controller in controllers: mgmt_interface = regionone_sysinv_client.get_management_interface( - controller.hostname) + controller.hostname + ) if mgmt_interface is not None: mgmt_interface_uuids.append(mgmt_interface.uuid) - SubcloudManager.regionone_data['mgmt_interface_uuids'] = \ + SubcloudManager.regionone_data["mgmt_interface_uuids"] = ( mgmt_interface_uuids - SubcloudManager.regionone_data['mgmt_pool'] = \ + ) + SubcloudManager.regionone_data["mgmt_pool"] = ( regionone_sysinv_client.get_management_address_pool() - SubcloudManager.regionone_data['oam_addresses'] = \ + ) + SubcloudManager.regionone_data["oam_addresses"] = ( regionone_sysinv_client.get_oam_addresses() + ) - SubcloudManager.regionone_data['expiry'] = \ + SubcloudManager.regionone_data["expiry"] = ( datetime.datetime.utcnow() + datetime.timedelta(hours=1) + ) LOG.info( - "RegionOne cached data updated %s" % SubcloudManager.regionone_data) + "RegionOne cached data updated %s" % SubcloudManager.regionone_data + ) cached_regionone_data = SubcloudManager.regionone_data return cached_regionone_data - def _populate_payload_with_cached_keystone_data(self, cached_data, payload, - populate_passwords=True): - payload['system_controller_keystone_admin_user_id'] = \ - cached_data['admin_user_id'] - payload['system_controller_keystone_admin_project_id'] = \ - cached_data['admin_project_id'] - payload['system_controller_keystone_services_project_id'] = \ - cached_data['services_project_id'] - payload['system_controller_keystone_sysinv_user_id'] = \ - cached_data['sysinv_user_id'] - payload['system_controller_keystone_dcmanager_user_id'] = \ - cached_data['dcmanager_user_id'] + def _populate_payload_with_cached_keystone_data( + self, cached_data, payload, populate_passwords=True + ): + payload["system_controller_keystone_admin_user_id"] = cached_data[ + "admin_user_id" + ] + payload["system_controller_keystone_admin_project_id"] = cached_data[ + "admin_project_id" + ] + payload["system_controller_keystone_services_project_id"] = cached_data[ + "services_project_id" + ] + payload["system_controller_keystone_sysinv_user_id"] = cached_data[ + "sysinv_user_id" + ] + payload["system_controller_keystone_dcmanager_user_id"] = cached_data[ + "dcmanager_user_id" + ] if populate_passwords: # While at it, add the admin and service user passwords to the # payload so they get copied to the overrides file - payload['ansible_become_pass'] = payload['sysadmin_password'] - payload['ansible_ssh_pass'] = payload['sysadmin_password'] - payload['admin_password'] = str(keyring.get_password('CGCS', - 'admin')) + payload["ansible_become_pass"] = payload["sysadmin_password"] + payload["ansible_ssh_pass"] = payload["sysadmin_password"] + payload["admin_password"] = str(keyring.get_password("CGCS", "admin")) def _populate_payload_with_dc_intermediate_ca_cert(self, payload): subcloud_region = payload["region_name"] - secret_name = SubcloudManager._get_subcloud_cert_secret_name( - subcloud_region) + secret_name = SubcloudManager._get_subcloud_cert_secret_name(subcloud_region) kube = kubeoperator.KubeOperator() secret = kube.kube_get_secret(secret_name, CERT_NAMESPACE) data = secret.data - payload['dc_root_ca_cert'] = data['ca.crt'] - payload['sc_ca_cert'] = data['tls.crt'] - payload['sc_ca_key'] = data['tls.key'] + payload["dc_root_ca_cert"] = data["ca.crt"] + payload["sc_ca_cert"] = data["tls.crt"] + payload["sc_ca_key"] = data["tls.key"] diff --git a/distributedcloud/dcmanager/manager/system_peer_manager.py b/distributedcloud/dcmanager/manager/system_peer_manager.py index fbfc21698..0270d52de 100644 --- a/distributedcloud/dcmanager/manager/system_peer_manager.py +++ b/distributedcloud/dcmanager/manager/system_peer_manager.py @@ -30,12 +30,12 @@ from dcmanager.db.sqlalchemy import models LOG = logging.getLogger(__name__) -TEMP_BOOTSTRAP_PREFIX = 'peer_subcloud_bootstrap_yaml' -TEMP_INSTALL_PREFIX = 'peer_subcloud_install_yaml' +TEMP_BOOTSTRAP_PREFIX = "peer_subcloud_bootstrap_yaml" +TEMP_INSTALL_PREFIX = "peer_subcloud_install_yaml" MAX_PARALLEL_SUBCLOUD_SYNC = 10 MAX_PARALLEL_SUBCLOUD_DELETE = 10 -VERIFY_SUBCLOUD_SYNC_VALID = 'valid' -VERIFY_SUBCLOUD_SYNC_IGNORE = 'ignore' +VERIFY_SUBCLOUD_SYNC_VALID = "valid" +VERIFY_SUBCLOUD_SYNC_IGNORE = "ignore" TRANSITORY_STATES = { consts.ASSOCIATION_SYNC_STATUS_SYNCING: consts.ASSOCIATION_SYNC_STATUS_FAILED @@ -46,28 +46,37 @@ class SystemPeerManager(manager.Manager): """Manages tasks related to system peers.""" def __init__(self, peer_monitor_manager, *args, **kwargs): - LOG.debug(_('SystemPeerManager initialization...')) + LOG.debug(_("SystemPeerManager initialization...")) self.context = dcmanager_context.get_admin_context() self.peer_monitor_manager = peer_monitor_manager super(SystemPeerManager, self).__init__( - service_name="system_peer_manager", *args, **kwargs) + service_name="system_peer_manager", *args, **kwargs + ) @staticmethod def get_local_associations(ctx, peer, local_pg=None): if local_pg is None: # Get associations by system peer id - return db_api.peer_group_association_get_by_system_peer_id(ctx, - peer.id) + return db_api.peer_group_association_get_by_system_peer_id(ctx, peer.id) else: # Get association by system peer id and peer group id - association = db_api.\ - peer_group_association_get_by_peer_group_and_system_peer_id( - ctx, local_pg.id, peer.id) + association = ( + db_api.peer_group_association_get_by_peer_group_and_system_peer_id( + ctx, local_pg.id, peer.id + ) + ) return [association] if association else [] @staticmethod - def update_sync_status(ctx, peer, sync_status, local_pg=None, - remote_pg=None, message="None", association=None): + def update_sync_status( + ctx, + peer, + sync_status, + local_pg=None, + remote_pg=None, + message="None", + association=None, + ): """Update sync status of association. This function updates the sync status of the association on the peer @@ -83,18 +92,23 @@ class SystemPeerManager(manager.Manager): :param association: peer group association object """ - def _update_association_on_peer_site(peer, sync_status, - local_pg, remote_pg, message): + def _update_association_on_peer_site( + peer, sync_status, local_pg, remote_pg, message + ): try: # Get peer site dcmanager client dc_client = SystemPeerManager.get_peer_dc_client(peer) # Get peer site peer group if not exist - remote_pg = remote_pg if remote_pg is not None else dc_client.\ - get_subcloud_peer_group(local_pg.peer_group_name) + remote_pg = ( + remote_pg + if remote_pg is not None + else dc_client.get_subcloud_peer_group(local_pg.peer_group_name) + ) # Get peer site system peer dc_peer_system_peer = dc_client.get_system_peer( - utils.get_local_system().uuid) + utils.get_local_system().uuid + ) # Get peer site group association dc_peer_association = ( dc_client.get_peer_group_association_with_peer_id_and_pg_id( @@ -104,52 +118,63 @@ class SystemPeerManager(manager.Manager): # Update peer site association sync_status only if the # sync_status is different from the current sync_status - if dc_peer_association.get('sync_status') != sync_status: + if dc_peer_association.get("sync_status") != sync_status: # Update peer site association sync_status - dc_peer_association_id = dc_peer_association.get('id') + dc_peer_association_id = dc_peer_association.get("id") dc_client.update_peer_group_association_sync_status( - dc_peer_association_id, sync_status) - LOG.info(f"Updated Peer site {dc_peer_system_peer.get('id')} " - f"Peer Group Association {dc_peer_association_id} " - f"sync_status to {sync_status}.") + dc_peer_association_id, sync_status + ) + LOG.info( + f"Updated Peer site {dc_peer_system_peer.get('id')} " + f"Peer Group Association {dc_peer_association_id} " + f"sync_status to {sync_status}." + ) except Exception as e: - message = f"Failed to Update Peer Site ({peer.peer_uuid}) " \ - f"Association sync_status to {sync_status}." + message = ( + f"Failed to Update Peer Site ({peer.peer_uuid}) " + f"Association sync_status to {sync_status}." + ) LOG.exception(f"{message} Error: {e}") sync_status = consts.ASSOCIATION_SYNC_STATUS_FAILED return sync_status, message if association is None: - associations = SystemPeerManager.get_local_associations( - ctx, peer, local_pg) + associations = SystemPeerManager.get_local_associations(ctx, peer, local_pg) else: associations = [association] for association in associations: - if association.association_type == \ - consts.ASSOCIATION_TYPE_NON_PRIMARY: - LOG.debug(f"Skip update Peer Site association " - f"sync_status to {sync_status} as current " - f"site Association is not primary.") + if association.association_type == consts.ASSOCIATION_TYPE_NON_PRIMARY: + LOG.debug( + f"Skip update Peer Site association sync_status to {sync_status} " + "as current site Association is not primary." + ) continue - local_pg = local_pg if local_pg is not None else db_api.\ - subcloud_peer_group_get(ctx, association.peer_group_id) + local_pg = ( + local_pg + if local_pg is not None + else db_api.subcloud_peer_group_get(ctx, association.peer_group_id) + ) sync_status, message = _update_association_on_peer_site( - peer, sync_status, local_pg, remote_pg, message) + peer, sync_status, local_pg, remote_pg, message + ) - if association.sync_status == sync_status and sync_status != \ - consts.ASSOCIATION_SYNC_STATUS_FAILED: - LOG.debug(f"Skip update current site association " - f"sync_status to {sync_status} as current " - f"site Association is already in the same status.") + if ( + association.sync_status == sync_status + and sync_status != consts.ASSOCIATION_SYNC_STATUS_FAILED + ): + LOG.debug( + "Skip update current site association sync_status to " + f"{sync_status} as current site Association is already " + "in the same status." + ) continue # Update primary site association sync_status db_api.peer_group_association_update( - ctx, association.id, - sync_status=sync_status, - sync_message=message) + ctx, association.id, sync_status=sync_status, sync_message=message + ) @staticmethod def get_peer_ks_client(peer): @@ -158,38 +183,46 @@ class SystemPeerManager(manager.Manager): os_client = PeerSiteDriver( auth_url=peer.manager_endpoint, username=peer.manager_username, - password=base64.b64decode( - peer.manager_password.encode("utf-8")).decode("utf-8"), - site_uuid=peer.peer_uuid) + password=base64.b64decode(peer.manager_password.encode("utf-8")).decode( + "utf-8" + ), + site_uuid=peer.peer_uuid, + ) return os_client.keystone_client except Exception: - LOG.warn('Failure initializing KeystoneClient ' - f'for system peer {peer.peer_name}') + LOG.warn( + f"Failure initializing KeystoneClient for system peer {peer.peer_name}" + ) raise @staticmethod def get_peer_sysinv_client(peer): p_ks_client = SystemPeerManager.get_peer_ks_client(peer) sysinv_endpoint = p_ks_client.session.get_endpoint( - service_type='platform', + service_type="platform", region_name=dccommon_consts.DEFAULT_REGION_NAME, - interface=dccommon_consts.KS_ENDPOINT_PUBLIC) - return SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, - p_ks_client.session, - endpoint_type=dccommon_consts. - KS_ENDPOINT_PUBLIC, - endpoint=sysinv_endpoint) + interface=dccommon_consts.KS_ENDPOINT_PUBLIC, + ) + return SysinvClient( + dccommon_consts.DEFAULT_REGION_NAME, + p_ks_client.session, + endpoint_type=dccommon_consts.KS_ENDPOINT_PUBLIC, + endpoint=sysinv_endpoint, + ) @staticmethod def get_peer_dc_client(peer): p_ks_client = SystemPeerManager.get_peer_ks_client(peer) dc_endpoint = p_ks_client.session.get_endpoint( - service_type='dcmanager', + service_type="dcmanager", region_name=dccommon_consts.SYSTEM_CONTROLLER_NAME, - interface=dccommon_consts.KS_ENDPOINT_PUBLIC) - return DcmanagerClient(dccommon_consts.SYSTEM_CONTROLLER_NAME, - p_ks_client.session, - endpoint=dc_endpoint) + interface=dccommon_consts.KS_ENDPOINT_PUBLIC, + ) + return DcmanagerClient( + dccommon_consts.SYSTEM_CONTROLLER_NAME, + p_ks_client.session, + endpoint=dc_endpoint, + ) @staticmethod def get_peer_subcloud(dc_client, subcloud_name): @@ -206,8 +239,9 @@ class SystemPeerManager(manager.Manager): @staticmethod def get_subcloud_deploy_status(subcloud): - deploy_status = 'deploy-status' if 'deploy-status' in subcloud else \ - 'deploy_status' + deploy_status = ( + "deploy-status" if "deploy-status" in subcloud else "deploy_status" + ) return subcloud.get(deploy_status) @staticmethod @@ -218,7 +252,7 @@ class SystemPeerManager(manager.Manager): """ if SystemPeerManager.get_subcloud_deploy_status(subcloud) not in ( consts.DEPLOY_STATE_SECONDARY_FAILED, - consts.DEPLOY_STATE_SECONDARY + consts.DEPLOY_STATE_SECONDARY, ): return False return True @@ -230,29 +264,30 @@ class SystemPeerManager(manager.Manager): :param dc_client: the dcmanager client object :param subcloud_ref: subcloud name to delete """ - peer_subcloud = SystemPeerManager.get_peer_subcloud(dc_client, - subcloud_ref) + peer_subcloud = SystemPeerManager.get_peer_subcloud(dc_client, subcloud_ref) if not peer_subcloud: - LOG.info(f"Skip delete Peer Site Subcloud {subcloud_ref} cause " - f"it doesn't exist.") + LOG.info( + f"Skip delete Peer Site Subcloud {subcloud_ref} cause it doesn't exist." + ) return if SystemPeerManager.get_subcloud_deploy_status(peer_subcloud) not in ( - consts.DEPLOY_STATE_SECONDARY_FAILED, - consts.DEPLOY_STATE_SECONDARY, - consts.DEPLOY_STATE_REHOME_FAILED, - consts.DEPLOY_STATE_REHOME_PREP_FAILED + consts.DEPLOY_STATE_SECONDARY_FAILED, + consts.DEPLOY_STATE_SECONDARY, + consts.DEPLOY_STATE_REHOME_FAILED, + consts.DEPLOY_STATE_REHOME_PREP_FAILED, ): - LOG.info(f"Ignoring delete Peer Site Subcloud {subcloud_ref} " - f"as is not in secondary or rehome failed state.") + LOG.info( + f"Ignoring delete Peer Site Subcloud {subcloud_ref} " + "as is not in secondary or rehome failed state." + ) return dc_client.delete_subcloud(subcloud_ref) LOG.info(f"Deleted Subcloud {subcloud_ref} on peer site.") @staticmethod - def _run_parallel_group_operation(op_type, op_function, thread_pool, - subclouds): + def _run_parallel_group_operation(op_type, op_function, thread_pool, subclouds): """Run parallel group operation on subclouds.""" failed_subclouds = [] processed = 0 @@ -263,46 +298,55 @@ class SystemPeerManager(manager.Manager): if not success: failed_subclouds.append(subcloud) - if hasattr(subcloud, 'msg'): + if hasattr(subcloud, "msg"): error_msg[subcloud.name] = subcloud.msg completion = float(processed) / float(len(subclouds)) * 100 remaining = len(subclouds) - processed - LOG.info("Processed subcloud %s for %s (operation %.0f%% " - "complete, %d subcloud(s) remaining)" % - (subcloud.name, op_type, completion, remaining)) + LOG.info( + "Processed subcloud %s for %s (operation %.0f%% " + "complete, %d subcloud(s) remaining)" + % (subcloud.name, op_type, completion, remaining) + ) return failed_subclouds, error_msg - def _add_or_update_subcloud(self, dc_client: DcmanagerClient, - peer_controller_gateway_ip: str, - dc_peer_pg_id: int, - subcloud: models.Subcloud): + def _add_or_update_subcloud( + self, + dc_client: DcmanagerClient, + peer_controller_gateway_ip: str, + dc_peer_pg_id: int, + subcloud: models.Subcloud, + ): """Add or update subcloud on peer site in parallel.""" with tempfile.NamedTemporaryFile( prefix=TEMP_BOOTSTRAP_PREFIX, suffix=".yaml", mode="w" - ) as temp_bootstrap_file, tempfile.NamedTemporaryFile( - prefix=TEMP_INSTALL_PREFIX, suffix=".yaml", mode="w" - ) if subcloud.data_install else nullcontext() as temp_install_file: - subcloud_name = subcloud.get('name') - region_name = subcloud.get('region_name') + ) as temp_bootstrap_file, ( + tempfile.NamedTemporaryFile( + prefix=TEMP_INSTALL_PREFIX, suffix=".yaml", mode="w" + ) + if subcloud.data_install + else nullcontext() + ) as temp_install_file: + subcloud_name = subcloud.get("name") + region_name = subcloud.get("region_name") rehome_data = json.loads(subcloud.rehome_data) - subcloud_payload = rehome_data['saved_payload'] + subcloud_payload = rehome_data["saved_payload"] # Update bootstrap_values with the peer site # systemcontroller_gateway_address - subcloud_payload['systemcontroller_gateway_address'] = \ + subcloud_payload["systemcontroller_gateway_address"] = ( peer_controller_gateway_ip + ) yaml.dump(subcloud_payload, temp_bootstrap_file) files = {consts.BOOTSTRAP_VALUES: temp_bootstrap_file.name} data = { - consts.BOOTSTRAP_ADDRESS: subcloud_payload[ - consts.BOOTSTRAP_ADDRESS], + consts.BOOTSTRAP_ADDRESS: subcloud_payload[consts.BOOTSTRAP_ADDRESS], "region_name": subcloud.region_name, "location": subcloud.location, - "description": subcloud.description + "description": subcloud.description, } if temp_install_file: @@ -317,105 +361,126 @@ class SystemPeerManager(manager.Manager): # The subcloud update API expects 'bootstrap_address' # instead of 'bootstrap-address' data["bootstrap_address"] = data.pop(consts.BOOTSTRAP_ADDRESS) - dc_peer_subcloud = dc_client.update_subcloud(region_name, - files, data, - is_region_name=True) - LOG.info(f"Updated Subcloud {dc_peer_subcloud.get('name')} " - "(region_name: " - f"{dc_peer_subcloud.get('region-name')}) on peer " - "site.") + dc_peer_subcloud = dc_client.update_subcloud( + region_name, files, data, is_region_name=True + ) + LOG.info( + f"Updated Subcloud {dc_peer_subcloud.get('name')} " + f"(region_name: {dc_peer_subcloud.get('region-name')}) " + "on peer site." + ) else: # Create subcloud on peer site if not exist - dc_peer_subcloud = dc_client. \ - add_subcloud_with_secondary_status(files, data) - LOG.info(f"Created Subcloud {dc_peer_subcloud.get('name')} " - "(region_name: " - f"{dc_peer_subcloud.get('region-name')}) on peer " - "site.") - LOG.debug(f"Updating subcloud {subcloud_name} (region_name: " - f"{region_name}) with subcloud peer group id " - f"{dc_peer_pg_id} on peer site.") + dc_peer_subcloud = dc_client.add_subcloud_with_secondary_status( + files, data + ) + LOG.info( + f"Created Subcloud {dc_peer_subcloud.get('name')} " + f"(region_name: {dc_peer_subcloud.get('region-name')}) " + "on peer site." + ) + LOG.debug( + f"Updating subcloud {subcloud_name} (region_name: {region_name}) " + f"with subcloud peer group id {dc_peer_pg_id} on peer site." + ) # Update subcloud associated peer group on peer site. # The peer_group update will check the header and should # use the region_name as subcloud_ref. peer_subcloud = dc_client.update_subcloud( - dc_peer_subcloud.get('region-name'), files=None, + dc_peer_subcloud.get("region-name"), + files=None, data={"peer_group": str(dc_peer_pg_id)}, - is_region_name=True) + is_region_name=True, + ) # Need to check the subcloud only in secondary, otherwise it # should be recorded as a failure. peer_subcloud_deploy_status = self.get_subcloud_deploy_status( - peer_subcloud) - if peer_subcloud_deploy_status not in \ - (consts.DEPLOY_STATE_SECONDARY, - consts.DEPLOY_STATE_REHOME_FAILED, - consts.DEPLOY_STATE_REHOME_PREP_FAILED): - subcloud.msg = "Subcloud's deploy status not correct: %s" \ + peer_subcloud + ) + if peer_subcloud_deploy_status not in ( + consts.DEPLOY_STATE_SECONDARY, + consts.DEPLOY_STATE_REHOME_FAILED, + consts.DEPLOY_STATE_REHOME_PREP_FAILED, + ): + subcloud.msg = ( + "Subcloud's deploy status not correct: %s" % peer_subcloud_deploy_status + ) return subcloud, False return subcloud, True except Exception as e: subcloud.msg = str(e) # Store error message for subcloud - LOG.error(f"Failed to add/update Subcloud {subcloud_name} " - f"(region_name: {region_name}) " - f"on peer site: {str(e)}") + LOG.error( + f"Failed to add/update Subcloud {subcloud_name} " + f"(region_name: {region_name}) on peer site: {str(e)}" + ) return subcloud, False def _delete_subcloud(self, dc_client, subcloud): """Delete subcloud on peer site in parallel.""" try: - subcloud_name = subcloud.get('name') + subcloud_name = subcloud.get("name") self.delete_peer_secondary_subcloud(dc_client, subcloud_name) return subcloud, True except Exception as e: subcloud.msg = str(e) - LOG.exception(f"Failed to delete Subcloud {subcloud_name} on peer " - f"site: {str(e)}") + LOG.exception( + f"Failed to delete Subcloud {subcloud_name} on peer site: {str(e)}" + ) return subcloud, False def _is_valid_for_subcloud_sync(self, subcloud): """Verify subcloud data for sync.""" - subcloud_name = subcloud.get('name') - region_name = subcloud.get('region_name') + subcloud_name = subcloud.get("name") + region_name = subcloud.get("region_name") # Ignore the secondary subclouds to sync with peer site if self.is_subcloud_secondary(subcloud): - LOG.info(f"Ignoring the Subcloud {subcloud_name} (region_name: " - f"{region_name}) in secondary status to sync with " - "peer site.") + LOG.info( + f"Ignoring the Subcloud {subcloud_name} (region_name: " + f"{region_name}) in secondary status to sync with peer site." + ) return VERIFY_SUBCLOUD_SYNC_IGNORE # Verify subcloud payload data rehome_json = subcloud.rehome_data if not rehome_json: - msg = f"Subcloud {subcloud_name} (region_name: " + \ - f"{region_name}) does not have rehome_data." + msg = ( + f"Subcloud {subcloud_name} (region_name: {region_name}) does not " + "have rehome_data." + ) return msg rehome_data = json.loads(rehome_json) - if 'saved_payload' not in rehome_data: - msg = f"Subcloud {subcloud_name} (region_name: " + \ - f"{region_name}) does not have saved_payload." + if "saved_payload" not in rehome_data: + msg = ( + f"Subcloud {subcloud_name} (region_name: {region_name}) does not " + "have saved_payload." + ) return msg - subcloud_payload = rehome_data['saved_payload'] + subcloud_payload = rehome_data["saved_payload"] if not subcloud_payload: - msg = f"Subcloud {subcloud_name} (region_name: " + \ - f"{region_name}) saved_payload is empty." + msg = ( + f"Subcloud {subcloud_name} (region_name: {region_name}) saved_payload " + "is empty." + ) return msg - if 'bootstrap-address' not in subcloud_payload: - msg = f"Subcloud {subcloud_name} (region_name: " + \ - f"{region_name}) does not have bootstrap-address in " + \ - "saved_payload." + if "bootstrap-address" not in subcloud_payload: + msg = ( + f"Subcloud {subcloud_name} (region_name: {region_name}) does not " + "have bootstrap-address in saved_payload." + ) return msg - if 'systemcontroller_gateway_address' not in subcloud_payload: - msg = f"Subcloud {subcloud_name} (region_name: " + \ - f"{region_name}) does not have systemcontroller_" + \ - "gateway_address in saved_payload." + if "systemcontroller_gateway_address" not in subcloud_payload: + msg = ( + f"Subcloud {subcloud_name} (region_name: {region_name}) does not " + "have systemcontroller_gateway_address in saved_payload." + ) return msg return VERIFY_SUBCLOUD_SYNC_VALID @@ -426,12 +491,14 @@ class SystemPeerManager(manager.Manager): error_msg = {} # Dictinary to store error message for each subcloud for subcloud in subclouds: - subcloud_name = subcloud.get('name') - region_name = subcloud.get('region_name') + subcloud_name = subcloud.get("name") + region_name = subcloud.get("region_name") validation = self._is_valid_for_subcloud_sync(subcloud) - if validation != VERIFY_SUBCLOUD_SYNC_IGNORE and \ - validation != VERIFY_SUBCLOUD_SYNC_VALID: + if ( + validation != VERIFY_SUBCLOUD_SYNC_IGNORE + and validation != VERIFY_SUBCLOUD_SYNC_VALID + ): LOG.error(validation) error_msg[subcloud_name] = validation continue @@ -442,17 +509,22 @@ class SystemPeerManager(manager.Manager): # the "get_subcloud_list_by_peer_group" method peer_subcloud = self.get_peer_subcloud(dc_client, subcloud_name) if not peer_subcloud: - LOG.info(f"Subcloud {subcloud_name} (region_name: " - f"{region_name}) does not exist on peer site.") + LOG.info( + f"Subcloud {subcloud_name} (region_name: {region_name}) does " + "not exist on peer site." + ) valid_subclouds.append(subcloud) continue - if not self.is_subcloud_secondary(peer_subcloud) and \ - self.get_subcloud_deploy_status(peer_subcloud) not in \ - (consts.DEPLOY_STATE_REHOME_FAILED, - consts.DEPLOY_STATE_REHOME_PREP_FAILED): - msg = (f"Subcloud {subcloud_name} is not in the right state " - f"for sync.") + if not self.is_subcloud_secondary( + peer_subcloud + ) and self.get_subcloud_deploy_status(peer_subcloud) not in ( + consts.DEPLOY_STATE_REHOME_FAILED, + consts.DEPLOY_STATE_REHOME_PREP_FAILED, + ): + msg = ( + f"Subcloud {subcloud_name} is not in the right state for sync." + ) LOG.info(msg) error_msg[subcloud_name] = msg continue @@ -461,8 +533,10 @@ class SystemPeerManager(manager.Manager): except Exception as e: subcloud.msg = str(e) # Store error message for subcloud - LOG.error(f"Failed to validate Subcloud {subcloud_name} " - f"(region_name: {region_name}): {str(e)}") + LOG.error( + f"Failed to validate Subcloud {subcloud_name} " + f"(region_name: {region_name}): {str(e)}" + ) error_msg[subcloud_name] = str(e) return valid_subclouds, error_msg @@ -479,19 +553,23 @@ class SystemPeerManager(manager.Manager): subclouds = db_api.subcloud_get_for_peer_group(context, dc_local_pg_id) subclouds_to_sync, error_msg = self._validate_subclouds_for_sync( - subclouds, dc_client) + subclouds, dc_client + ) # Use thread pool to limit number of operations in parallel sync_pool = greenpool.GreenPool(size=MAX_PARALLEL_SUBCLOUD_SYNC) # Spawn threads to sync each applicable subcloud - sync_function = functools.partial(self._add_or_update_subcloud, - dc_client, - peer.peer_controller_gateway_ip, - dc_peer_pg_id) + sync_function = functools.partial( + self._add_or_update_subcloud, + dc_client, + peer.peer_controller_gateway_ip, + dc_peer_pg_id, + ) failed_subclouds, sync_error_msg = self._run_parallel_group_operation( - 'peer sync', sync_function, sync_pool, subclouds_to_sync) + "peer sync", sync_function, sync_pool, subclouds_to_sync + ) error_msg.update(sync_error_msg) LOG.info("Subcloud peer sync operation finished") @@ -501,21 +579,20 @@ class SystemPeerManager(manager.Manager): # Ignore the secondary subclouds to sync with peer site if not self.is_subcloud_secondary(subcloud): # Count all subcloud need to be sync - dc_local_region_names.add(subcloud.get('name')) + dc_local_region_names.add(subcloud.get("name")) dc_peer_subclouds = dc_client.get_subcloud_list_by_peer_group( - str(dc_peer_pg_id)) - dc_peer_region_names = set(subcloud.get('name') for subcloud in - dc_peer_subclouds) + str(dc_peer_pg_id) + ) + dc_peer_region_names = set( + subcloud.get("name") for subcloud in dc_peer_subclouds + ) - dc_peer_subcloud_diff_names = dc_peer_region_names - \ - dc_local_region_names + dc_peer_subcloud_diff_names = dc_peer_region_names - dc_local_region_names for subcloud_to_delete in dc_peer_subcloud_diff_names: try: - LOG.debug(f"Deleting Subcloud name {subcloud_to_delete} " - "on peer site.") - self.delete_peer_secondary_subcloud(dc_client, - subcloud_to_delete) + LOG.debug(f"Deleting Subcloud name {subcloud_to_delete} on peer site.") + self.delete_peer_secondary_subcloud(dc_client, subcloud_to_delete) except Exception as e: msg = f"Subcloud delete failed: {str(e)}" LOG.error(msg) @@ -523,36 +600,52 @@ class SystemPeerManager(manager.Manager): return error_msg - def _update_sync_status(self, context, association_id, sync_status, - sync_message, dc_peer_association_id=None, - dc_client=None, **kwargs): + def _update_sync_status( + self, + context, + association_id, + sync_status, + sync_message, + dc_peer_association_id=None, + dc_client=None, + **kwargs, + ): """Update sync status of association.""" if dc_peer_association_id is not None: if dc_client is None: - association = db_api.peer_group_association_get(context, - association_id) - peer = db_api.system_peer_get(context, - association.system_peer_id) + association = db_api.peer_group_association_get(context, association_id) + peer = db_api.system_peer_get(context, association.system_peer_id) dc_client = self.get_peer_dc_client(peer) dc_client.update_peer_group_association_sync_status( - dc_peer_association_id, sync_status) - LOG.info(f"Updated non-primary Peer Group Association " - f"{dc_peer_association_id} sync_status to {sync_status}.") + dc_peer_association_id, sync_status + ) + LOG.info( + "Updated non-primary Peer Group Association " + f"{dc_peer_association_id} sync_status to {sync_status}." + ) return db_api.peer_group_association_update( - context, association_id, sync_status=sync_status, - sync_message=sync_message, **kwargs) + context, + association_id, + sync_status=sync_status, + sync_message=sync_message, + **kwargs, + ) - def _update_sync_status_to_failed(self, context, association_id, - failed_message, - dc_peer_association_id=None): + def _update_sync_status_to_failed( + self, context, association_id, failed_message, dc_peer_association_id=None + ): """Update sync status to failed.""" - return self._update_sync_status(context, association_id, - consts.ASSOCIATION_SYNC_STATUS_FAILED, - failed_message, - dc_peer_association_id) + return self._update_sync_status( + context, + association_id, + consts.ASSOCIATION_SYNC_STATUS_FAILED, + failed_message, + dc_peer_association_id, + ) - def update_association_sync_status(self, context, peer_group_id, - sync_status, sync_message=None): + def update_association_sync_status( + self, context, peer_group_id, sync_status, sync_message=None + ): """Update PGA sync status on primary and peer site(s). The update of PGA sync status is always triggered on the primary site, @@ -568,7 +661,8 @@ class SystemPeerManager(manager.Manager): local_peer_gp = db_api.subcloud_peer_group_get(self.context, peer_group_id) # Get associations by peer group id associations = db_api.peer_group_association_get_by_peer_group_id( - context, peer_group_id) + context, peer_group_id + ) if not associations: LOG.debug("No association found for peer group %s" % peer_group_id) else: @@ -579,12 +673,14 @@ class SystemPeerManager(manager.Manager): pre_sync_status = association.sync_status new_sync_status = sync_status new_sync_message = sync_message - if sync_status in (consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, - consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC): + if sync_status in ( + consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, + consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC, + ): # We don't need sync_message for in-sync and out-of-sync # status, so clear the previous remained message for these # two status. - new_sync_message = 'None' + new_sync_message = "None" # If the local sync_status already set to unknown, indicating # that the peer site is unreachable, we'll change the sync_status @@ -595,17 +691,26 @@ class SystemPeerManager(manager.Manager): if pre_sync_status == consts.ASSOCIATION_SYNC_STATUS_UNKNOWN: if sync_status != consts.ASSOCIATION_SYNC_STATUS_IN_SYNC: new_sync_status = consts.ASSOCIATION_SYNC_STATUS_FAILED - new_sync_message = ("Failed to update sync_status, " - "because the peer site is unreachable.") + new_sync_message = ( + "Failed to update sync_status, " + "because the peer site is unreachable." + ) # Update peer site peer association sync_status else: # Get system peer by peer id from the association system_peer = db_api.system_peer_get( - context, association.system_peer_id) + context, association.system_peer_id + ) if pre_sync_status != sync_status: SystemPeerManager.update_sync_status( - context, system_peer, sync_status, local_peer_gp, - None, new_sync_message, association) + context, + system_peer, + sync_status, + local_peer_gp, + None, + new_sync_message, + association, + ) # Already update sync_status on both peer and local sites continue @@ -615,33 +720,45 @@ class SystemPeerManager(manager.Manager): context, association.id, sync_status=new_sync_status, - sync_message=new_sync_message) + sync_message=new_sync_message, + ) LOG.debug( f"Updated Local Peer Group Association {association.id} " - f"sync_status to {new_sync_status}.") + f"sync_status to {new_sync_status}." + ) return out_of_sync_associations_ids - def update_subcloud_peer_group(self, context, peer_group_id, - group_state, max_subcloud_rehoming, - group_name, new_group_name=None): + def update_subcloud_peer_group( + self, + context, + peer_group_id, + group_state, + max_subcloud_rehoming, + group_name, + new_group_name=None, + ): # Collect the success and failed peer ids. success_peer_ids = set() failed_peer_ids = set() # Get associations by peer group id associations = db_api.peer_group_association_get_by_peer_group_id( - context, peer_group_id) + context, peer_group_id + ) if not associations: LOG.info("No association found for peer group %s" % peer_group_id) else: for association in associations: # Get system peer by peer id from the association system_peer = db_api.system_peer_get( - context, association.system_peer_id) + context, association.system_peer_id + ) # Get 'available' system peer - if system_peer.availability_state != \ - consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE: + if ( + system_peer.availability_state + != consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE + ): LOG.warning("Peer system %s offline" % system_peer.id) failed_peer_ids.add(system_peer.id) else: @@ -649,33 +766,39 @@ class SystemPeerManager(manager.Manager): # Get a client for sending request to this system peer dc_client = self.get_peer_dc_client(system_peer) peer_group_kwargs = { - 'peer-group-name': new_group_name, - 'group-state': group_state, - 'max-subcloud-rehoming': max_subcloud_rehoming + "peer-group-name": new_group_name, + "group-state": group_state, + "max-subcloud-rehoming": max_subcloud_rehoming, } # Make an API call to update peer group on peer site dc_client.update_subcloud_peer_group( - group_name, **peer_group_kwargs) + group_name, **peer_group_kwargs + ) success_peer_ids.add(system_peer.id) except Exception: - LOG.error(f"Failed to update Subcloud Peer Group " - f"{group_name} on peer site {system_peer.id}" - f" with the values: {peer_group_kwargs}") + LOG.error( + f"Failed to update Subcloud Peer Group {group_name} on " + f"peer site {system_peer.id} with the values: " + f"{peer_group_kwargs}" + ) failed_peer_ids.add(system_peer.id) return success_peer_ids, failed_peer_ids - def _get_non_primary_association(self, dc_client, dc_peer_system_peer_id, - dc_peer_pg_id): + def _get_non_primary_association( + self, dc_client, dc_peer_system_peer_id, dc_peer_pg_id + ): """Get non-primary Association from peer site.""" try: return dc_client.get_peer_group_association_with_peer_id_and_pg_id( dc_peer_system_peer_id, dc_peer_pg_id ) except dccommon_exceptions.PeerGroupAssociationNotFound: - LOG.error(f"Peer Group association does not exist on peer site." - f"Peer Group ID: {dc_peer_pg_id}, Peer System Peer ID: " - f"{dc_peer_system_peer_id}") + LOG.error( + "Peer Group association does not exist on peer site. " + f"Peer Group ID: {dc_peer_pg_id}, Peer System Peer ID: " + f"{dc_peer_system_peer_id}" + ) return None def _get_peer_site_pg_by_name(self, dc_client, peer_group_name): @@ -683,22 +806,21 @@ class SystemPeerManager(manager.Manager): try: return dc_client.get_subcloud_peer_group(peer_group_name) except dccommon_exceptions.SubcloudPeerGroupNotFound: - LOG.error(f"Peer Group {peer_group_name} does not exist on peer " - f"site.") + LOG.error(f"Peer Group {peer_group_name} does not exist on peer site.") return None def _get_peer_site_system_peer(self, dc_client, peer_uuid=None): """Get System Peer from peer site.""" try: - peer_uuid = peer_uuid if peer_uuid is not None else \ - utils.get_local_system().uuid + peer_uuid = ( + peer_uuid if peer_uuid is not None else utils.get_local_system().uuid + ) return dc_client.get_system_peer(peer_uuid) except dccommon_exceptions.SystemPeerNotFound: LOG.error(f"Peer Site System Peer {peer_uuid} does not exist.") return None - def sync_subcloud_peer_group(self, context, association_id, - sync_subclouds=True): + def sync_subcloud_peer_group(self, context, association_id, sync_subclouds=True): """Sync subcloud peer group to peer site. This function synchronizes subcloud peer groups from current site @@ -716,15 +838,15 @@ class SystemPeerManager(manager.Manager): :param association_id: id of association to sync :param sync_subclouds: Enabled to sync subclouds to peer site """ - LOG.info(f"Synchronize the association {association_id} of the " - "Subcloud Peer Group with the System Peer pointing to the " - "peer site.") + LOG.info( + f"Synchronize the association {association_id} of the " + "Subcloud Peer Group with the System Peer pointing to the " + "peer site." + ) - association = db_api.peer_group_association_get(context, - association_id) + association = db_api.peer_group_association_get(context, association_id) peer = db_api.system_peer_get(context, association.system_peer_id) - dc_local_pg = db_api.subcloud_peer_group_get(context, - association.peer_group_id) + dc_local_pg = db_api.subcloud_peer_group_get(context, association.peer_group_id) peer_group_name = dc_local_pg.peer_group_name dc_peer_association_id = None @@ -733,10 +855,11 @@ class SystemPeerManager(manager.Manager): # peer_uuid system = self.get_peer_sysinv_client(peer).get_system() if system.uuid != peer.peer_uuid: - LOG.error(f"Peer site system uuid {system.uuid} does not match " - f"with the peer_uuid {peer.peer_uuid}") - raise exceptions.PeerGroupAssociationTargetNotMatch( - uuid=system.uuid) + LOG.error( + f"Peer site system uuid {system.uuid} does not match " + f"with the peer_uuid {peer.peer_uuid}" + ) + raise exceptions.PeerGroupAssociationTargetNotMatch(uuid=system.uuid) dc_client = self.get_peer_dc_client(peer) @@ -745,103 +868,122 @@ class SystemPeerManager(manager.Manager): # Get peer site system peer dc_peer_system_peer = self._get_peer_site_system_peer( - dc_client, local_system_uuid) + dc_client, local_system_uuid + ) if dc_peer_system_peer is None: - failed_message = f"System Peer {local_system_uuid} does not" + \ - " exist on peer site." + failed_message = ( + f"System Peer {local_system_uuid} does not exist on peer site." + ) return db_api.peer_group_association_db_model_to_dict( - self._update_sync_status_to_failed(context, association_id, - failed_message)) - dc_peer_system_peer_id = dc_peer_system_peer.get('id') + self._update_sync_status_to_failed( + context, association_id, failed_message + ) + ) + dc_peer_system_peer_id = dc_peer_system_peer.get("id") # Get peer site peer group, create if not exist - dc_peer_pg = self._get_peer_site_pg_by_name(dc_client, - peer_group_name) + dc_peer_pg = self._get_peer_site_pg_by_name(dc_client, peer_group_name) if dc_peer_pg is None: peer_group_kwargs = { - 'group-priority': association.peer_group_priority, - 'group-state': dc_local_pg.group_state, - 'system-leader-id': dc_local_pg.system_leader_id, - 'system-leader-name': dc_local_pg.system_leader_name, - 'max-subcloud-rehoming': dc_local_pg.max_subcloud_rehoming + "group-priority": association.peer_group_priority, + "group-state": dc_local_pg.group_state, + "system-leader-id": dc_local_pg.system_leader_id, + "system-leader-name": dc_local_pg.system_leader_name, + "max-subcloud-rehoming": dc_local_pg.max_subcloud_rehoming, } - peer_group_kwargs['peer-group-name'] = peer_group_name - dc_peer_pg = dc_client.add_subcloud_peer_group( - **peer_group_kwargs) - LOG.info(f"Created Subcloud Peer Group {peer_group_name} on " - f"peer site. ID is {dc_peer_pg.get('id')}.") - dc_peer_pg_id = dc_peer_pg.get('id') - dc_peer_pg_priority = dc_peer_pg.get('group_priority') + peer_group_kwargs["peer-group-name"] = peer_group_name + dc_peer_pg = dc_client.add_subcloud_peer_group(**peer_group_kwargs) + LOG.info( + f"Created Subcloud Peer Group {peer_group_name} on " + f"peer site. ID is {dc_peer_pg.get('id')}." + ) + dc_peer_pg_id = dc_peer_pg.get("id") + dc_peer_pg_priority = dc_peer_pg.get("group_priority") # Check if the peer group priority is 0, if so, raise exception if dc_peer_pg_priority == 0: - LOG.error(f"Skip update. Peer Site {peer_group_name} " - f"has priority 0.") + LOG.error(f"Skip update. Peer Site {peer_group_name} has priority 0.") raise exceptions.SubcloudPeerGroupHasWrongPriority( - priority=dc_peer_pg_priority) + priority=dc_peer_pg_priority + ) # Get peer site non-primary association, create if not exist dc_peer_association = self._get_non_primary_association( - dc_client, dc_peer_system_peer_id, dc_peer_pg_id) + dc_client, dc_peer_system_peer_id, dc_peer_pg_id + ) if dc_peer_association is None: non_primary_association_kwargs = { - 'peer_group_id': dc_peer_pg_id, - 'system_peer_id': dc_peer_system_peer_id + "peer_group_id": dc_peer_pg_id, + "system_peer_id": dc_peer_system_peer_id, } dc_peer_association = dc_client.add_peer_group_association( - **non_primary_association_kwargs) - LOG.info(f"Created \"non-primary\" Peer Group Association " - f"{dc_peer_association.get('id')} on peer site.") + **non_primary_association_kwargs + ) + LOG.info( + "Created 'non-primary' Peer Group Association " + f"{dc_peer_association.get('id')} on peer site." + ) dc_peer_association_id = dc_peer_association.get("id") # Update peer group association sync status to syncing dc_client.update_peer_group_association_sync_status( - dc_peer_association_id, consts.ASSOCIATION_SYNC_STATUS_SYNCING) + dc_peer_association_id, consts.ASSOCIATION_SYNC_STATUS_SYNCING + ) # Update peer group on peer site peer_group_kwargs = { - 'group-priority': association.peer_group_priority, - 'group-state': dc_local_pg.group_state, - 'system-leader-id': dc_local_pg.system_leader_id, - 'system-leader-name': dc_local_pg.system_leader_name, - 'max-subcloud-rehoming': dc_local_pg.max_subcloud_rehoming + "group-priority": association.peer_group_priority, + "group-state": dc_local_pg.group_state, + "system-leader-id": dc_local_pg.system_leader_id, + "system-leader-name": dc_local_pg.system_leader_name, + "max-subcloud-rehoming": dc_local_pg.max_subcloud_rehoming, } dc_peer_pg = dc_client.update_subcloud_peer_group( - peer_group_name, **peer_group_kwargs) - LOG.info(f"Updated Subcloud Peer Group {peer_group_name} on " - f"peer site, ID is {dc_peer_pg.get('id')}.") + peer_group_name, **peer_group_kwargs + ) + LOG.info( + f"Updated Subcloud Peer Group {peer_group_name} on " + f"peer site, ID is {dc_peer_pg.get('id')}." + ) association_update = { - 'sync_status': consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, - 'sync_message': 'None', - 'dc_peer_association_id': dc_peer_association_id, - 'dc_client': dc_client + "sync_status": consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, + "sync_message": "None", + "dc_peer_association_id": dc_peer_association_id, + "dc_client": dc_client, } if sync_subclouds: - error_msg = self._sync_subclouds(context, peer, dc_local_pg.id, - dc_peer_pg_id) + error_msg = self._sync_subclouds( + context, peer, dc_local_pg.id, dc_peer_pg_id + ) if len(error_msg) > 0: - LOG.error(f"Failed to sync subcloud(s) in the Subcloud " - f"Peer Group {peer_group_name}: " - f"{json.dumps(error_msg)}") - association_update['sync_status'] = \ + LOG.error( + f"Failed to sync subcloud(s) in the Subcloud " + f"Peer Group {peer_group_name}: {json.dumps(error_msg)}" + ) + association_update["sync_status"] = ( consts.ASSOCIATION_SYNC_STATUS_FAILED - association_update['sync_message'] = \ - (f"Failed to sync {list(error_msg.keys())} in the " - f"Subcloud Peer Group {peer_group_name}.") + ) + association_update["sync_message"] = ( + f"Failed to sync {list(error_msg.keys())} in the " + f"Subcloud Peer Group {peer_group_name}." + ) association = self._update_sync_status( - context, association_id, **association_update) + context, association_id, **association_update + ) self.peer_monitor_manager.peer_monitor_notify(context) return db_api.peer_group_association_db_model_to_dict(association) except Exception as exception: - LOG.exception(f"Failed to sync peer group {peer_group_name} to " - f"peer site {peer.peer_name}") - self._update_sync_status_to_failed(context, association_id, - str(exception), - dc_peer_association_id) + LOG.exception( + f"Failed to sync peer group {peer_group_name} to " + f"peer site {peer.peer_name}" + ) + self._update_sync_status_to_failed( + context, association_id, str(exception), dc_peer_association_id + ) raise exception def _delete_primary_association(self, context, association_id): @@ -859,11 +1001,9 @@ class SystemPeerManager(manager.Manager): LOG.info(f"Deleting association peer group {association_id}.") # Retrieve the peer group association details from the database - association = db_api.peer_group_association_get(context, - association_id) + association = db_api.peer_group_association_get(context, association_id) peer = db_api.system_peer_get(context, association.system_peer_id) - dc_local_pg = db_api.subcloud_peer_group_get(context, - association.peer_group_id) + dc_local_pg = db_api.subcloud_peer_group_get(context, association.peer_group_id) peer_group_name = dc_local_pg.peer_group_name try: @@ -871,8 +1011,10 @@ class SystemPeerManager(manager.Manager): # peer_uuid system = self.get_peer_sysinv_client(peer).get_system() if system.uuid != peer.peer_uuid: - LOG.warning(f"Peer site system uuid {system.uuid} does not " - f"match with the peer_uuid {peer.peer_uuid}") + LOG.warning( + f"Peer site system uuid {system.uuid} does not " + f"match with the peer_uuid {peer.peer_uuid}" + ) return self._delete_primary_association(context, association_id) dc_client = self.get_peer_dc_client(peer) @@ -882,94 +1024,105 @@ class SystemPeerManager(manager.Manager): # Get peer site system peer dc_peer_system_peer = self._get_peer_site_system_peer( - dc_client, local_system_uuid) + dc_client, local_system_uuid + ) # Get peer site peer group - dc_peer_pg = self._get_peer_site_pg_by_name(dc_client, - peer_group_name) + dc_peer_pg = self._get_peer_site_pg_by_name(dc_client, peer_group_name) if dc_peer_pg is None: # peer group does not exist on peer site, the association should # be deleted - LOG.warning(f"Subcloud Peer Group {peer_group_name} does " - f"not exist on peer site.") + LOG.warning( + f"Subcloud Peer Group {peer_group_name} does " + "not exist on peer site." + ) return self._delete_primary_association(context, association_id) - dc_peer_pg_id = dc_peer_pg.get('id') - dc_peer_pg_priority = dc_peer_pg.get('group_priority') + dc_peer_pg_id = dc_peer_pg.get("id") + dc_peer_pg_priority = dc_peer_pg.get("group_priority") # Check if the peer group priority is 0, if so, raise exception if dc_peer_pg_priority == 0: - LOG.error(f"Failed to delete peer_group_association. Peer Group" - f" {peer_group_name} has priority 0 on peer site.") + LOG.error( + f"Failed to delete peer_group_association. Peer Group: " + f"{peer_group_name} has priority 0 on peer site." + ) raise exceptions.SubcloudPeerGroupHasWrongPriority( - priority=dc_peer_pg_priority) + priority=dc_peer_pg_priority + ) # Use thread pool to limit number of operations in parallel delete_pool = greenpool.GreenPool(size=MAX_PARALLEL_SUBCLOUD_DELETE) - subclouds = db_api.subcloud_get_for_peer_group(context, - dc_local_pg.id) + subclouds = db_api.subcloud_get_for_peer_group(context, dc_local_pg.id) # Spawn threads to delete each subcloud clean_function = functools.partial(self._delete_subcloud, dc_client) _, delete_error_msg = self._run_parallel_group_operation( - 'peer subcloud clean', clean_function, delete_pool, subclouds) + "peer subcloud clean", clean_function, delete_pool, subclouds + ) if delete_error_msg: - LOG.error(f"Failed to delete subcloud(s) from " - f"the Subcloud Peer Group {peer_group_name} " - f"on peer site: {json.dumps(delete_error_msg)}") - sync_message = (f"Deletion of {list(delete_error_msg.keys())} " - "from the Subcloud Peer Group " - f"{peer_group_name} on the peer site failed.") - self._update_sync_status_to_failed(context, association_id, - sync_message) + LOG.error( + "Failed to delete subcloud(s) from the Subcloud Peer Group " + f"{peer_group_name} on peer site: {json.dumps(delete_error_msg)}" + ) + sync_message = ( + f"Deletion of {list(delete_error_msg.keys())} from the " + f"Subcloud Peer Group: {peer_group_name} on the peer site failed." + ) + self._update_sync_status_to_failed( + context, association_id, sync_message + ) return # System Peer does not exist on peer site, delete peer group if dc_peer_system_peer is None: try: dc_client.delete_subcloud_peer_group(peer_group_name) - LOG.info(f"Deleted Subcloud Peer Group {peer_group_name} " - f"on peer site.") - except dccommon_exceptions.\ - SubcloudPeerGroupDeleteFailedAssociated: - LOG.error(f"Subcloud Peer Group {peer_group_name} " - "delete failed as it is associated with System " - "Peer on peer site.") + LOG.info( + f"Deleted Subcloud Peer Group {peer_group_name} on peer site." + ) + except dccommon_exceptions.SubcloudPeerGroupDeleteFailedAssociated: + LOG.error( + f"Subcloud Peer Group {peer_group_name} delete failed " + "as it is associated with System Peer on peer site." + ) return self._delete_primary_association(context, association_id) - dc_peer_system_peer_id = dc_peer_system_peer.get('id') + dc_peer_system_peer_id = dc_peer_system_peer.get("id") # Get peer site non-primary association dc_peer_association = self._get_non_primary_association( - dc_client, dc_peer_system_peer_id, dc_peer_pg_id) + dc_client, dc_peer_system_peer_id, dc_peer_pg_id + ) # Delete peer group association on peer site if exist if dc_peer_association is not None: dc_peer_association_id = dc_peer_association.get("id") - dc_client.delete_peer_group_association( - dc_peer_association_id) + dc_client.delete_peer_group_association(dc_peer_association_id) elif dc_peer_association is None: - LOG.warning(f"PeerGroupAssociation does not exist on peer site." - f"Peer Group ID: {dc_peer_pg_id}, peer site System " - f"Peer ID: {dc_peer_system_peer_id}") + LOG.warning( + f"PeerGroupAssociation does not exist on peer site. " + f"Peer Group ID: {dc_peer_pg_id}, peer site System " + f"Peer ID: {dc_peer_system_peer_id}" + ) try: dc_client.delete_subcloud_peer_group(peer_group_name) - LOG.info("Deleted Subcloud Peer Group " - f"{peer_group_name} on peer site.") + LOG.info(f"Deleted Subcloud Peer Group {peer_group_name} on peer site.") except dccommon_exceptions.SubcloudPeerGroupDeleteFailedAssociated: - failed_message = f"Subcloud Peer Group {peer_group_name} " \ - + "delete failed as it is associated with system peer " \ - + "on peer site." - self._update_sync_status_to_failed(context, association_id, - failed_message) + failed_message = ( + f"Subcloud Peer Group {peer_group_name} delete failed as it " + "is associated with system peer on peer site." + ) + self._update_sync_status_to_failed( + context, association_id, failed_message + ) LOG.error(failed_message) raise return self._delete_primary_association(context, association_id) except Exception as exception: - LOG.exception("Failed to delete peer_group_association " - f"{association.id}") + LOG.exception(f"Failed to delete peer_group_association {association.id}") raise exception def handle_association_operations_in_progress(self): @@ -978,7 +1131,7 @@ class SystemPeerManager(manager.Manager): state to failure. """ - LOG.info('Identifying associations in transitory stages.') + LOG.info("Identifying associations in transitory stages.") associations = db_api.peer_group_association_get_all(self.context) @@ -988,11 +1141,14 @@ class SystemPeerManager(manager.Manager): # update syncing states to the corresponding failure states if new_sync_status: - LOG.info(f"Changing association {association.id} sync status " - f"from {association.sync_status} to {new_sync_status}") + LOG.info( + f"Changing association {association.id} sync status " + f"from {association.sync_status} to {new_sync_status}" + ) db_api.peer_group_association_update( self.context, association.id, sync_status=new_sync_status or association.sync_status, - sync_message="Service restart during syncing") + sync_message="Service restart during syncing", + ) diff --git a/distributedcloud/dcmanager/objects/base.py b/distributedcloud/dcmanager/objects/base.py index 34d8d535b..f878d1d48 100644 --- a/distributedcloud/dcmanager/objects/base.py +++ b/distributedcloud/dcmanager/objects/base.py @@ -1,5 +1,5 @@ # Copyright (c) 2015 Ericsson AB. -# Copyright (c) 2017, 2019, 2021 Wind River Systems, Inc. +# Copyright (c) 2017, 2019, 2021, 2024 Wind River Systems, Inc. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -34,16 +34,16 @@ class DCManagerObject(base.VersionedObject): "save" object method. """ - OBJ_PROJECT_NAMESPACE = 'dcmanager' - VERSION = '1.0' + OBJ_PROJECT_NAMESPACE = "dcmanager" + VERSION = "1.0" @staticmethod def _from_db_object(context, obj, db_obj): if db_obj is None: return None for field in obj.fields: - if field == 'metadata': - obj['metadata'] = db_obj['meta_data'] + if field == "metadata": + obj["metadata"] = db_obj["meta_data"] else: obj[field] = db_obj[field] @@ -66,6 +66,7 @@ class DCManagerObjectRegistry(base.VersionedObjectRegistry): setattr(objects, cls.obj_name(), cls) else: curr_version = versionutils.convert_version_to_tuple( - getattr(objects, cls.obj_name()).VERSION) + getattr(objects, cls.obj_name()).VERSION + ) if version >= curr_version: setattr(objects, cls.obj_name(), cls) diff --git a/distributedcloud/dcmanager/rpc/client.py b/distributedcloud/dcmanager/rpc/client.py index 8ac1dd849..b28dfd720 100644 --- a/distributedcloud/dcmanager/rpc/client.py +++ b/distributedcloud/dcmanager/rpc/client.py @@ -34,8 +34,9 @@ class RPCClient(object): """ def __init__(self, timeout, topic, version): - self._client = messaging.get_rpc_client(timeout=timeout, topic=topic, - version=version) + self._client = messaging.get_rpc_client( + timeout=timeout, topic=topic, version=version + ) @staticmethod def make_msg(method, **kwargs): @@ -61,71 +62,98 @@ class RPCClient(object): class SubcloudStateClient(RPCClient): """Client to update subcloud availability.""" - BASE_RPC_API_VERSION = '1.0' + BASE_RPC_API_VERSION = "1.0" def __init__(self, timeout=None): super(SubcloudStateClient, self).__init__( - timeout, - consts.TOPIC_DC_MANAGER_STATE, - self.BASE_RPC_API_VERSION) - - def bulk_update_subcloud_availability_and_endpoint_status( - self, ctxt, subcloud_name, subcloud_region, availability_data, - endpoint_data - ): - # Note: This is an asynchronous operation. - return self.cast(ctxt, self.make_msg( - 'bulk_update_subcloud_availability_and_endpoint_status', - subcloud_name=subcloud_name, - subcloud_region=subcloud_region, - availability_data=availability_data, - endpoint_data=endpoint_data) + timeout, consts.TOPIC_DC_MANAGER_STATE, self.BASE_RPC_API_VERSION ) - def update_subcloud_availability(self, ctxt, - subcloud_name, - subcloud_region, - availability_status, - update_state_only=False, - audit_fail_count=None): + def bulk_update_subcloud_availability_and_endpoint_status( + self, ctxt, subcloud_name, subcloud_region, availability_data, endpoint_data + ): + # Note: This is an asynchronous operation. + return self.cast( + ctxt, + self.make_msg( + "bulk_update_subcloud_availability_and_endpoint_status", + subcloud_name=subcloud_name, + subcloud_region=subcloud_region, + availability_data=availability_data, + endpoint_data=endpoint_data, + ), + ) + + def update_subcloud_availability( + self, + ctxt, + subcloud_name, + subcloud_region, + availability_status, + update_state_only=False, + audit_fail_count=None, + ): # Note: synchronous return self.call( ctxt, - self.make_msg('update_subcloud_availability', - subcloud_name=subcloud_name, - subcloud_region=subcloud_region, - availability_status=availability_status, - update_state_only=update_state_only, - audit_fail_count=audit_fail_count)) + self.make_msg( + "update_subcloud_availability", + subcloud_name=subcloud_name, + subcloud_region=subcloud_region, + availability_status=availability_status, + update_state_only=update_state_only, + audit_fail_count=audit_fail_count, + ), + ) def update_subcloud_endpoint_status( - self, ctxt, subcloud_name=None, subcloud_region=None, endpoint_type=None, - sync_status=dccommon_consts.SYNC_STATUS_OUT_OF_SYNC, ignore_endpoints=None, - alarmable=True + self, + ctxt, + subcloud_name=None, + subcloud_region=None, + endpoint_type=None, + sync_status=dccommon_consts.SYNC_STATUS_OUT_OF_SYNC, + ignore_endpoints=None, + alarmable=True, ): # Note: This is an asynchronous operation. # See below for synchronous method call - return self.cast(ctxt, self.make_msg('update_subcloud_endpoint_status', - subcloud_name=subcloud_name, - subcloud_region=subcloud_region, - endpoint_type=endpoint_type, - sync_status=sync_status, - ignore_endpoints=ignore_endpoints, - alarmable=alarmable)) + return self.cast( + ctxt, + self.make_msg( + "update_subcloud_endpoint_status", + subcloud_name=subcloud_name, + subcloud_region=subcloud_region, + endpoint_type=endpoint_type, + sync_status=sync_status, + ignore_endpoints=ignore_endpoints, + alarmable=alarmable, + ), + ) def update_subcloud_endpoint_status_sync( - self, ctxt, subcloud_name=None, subcloud_region=None, endpoint_type=None, - sync_status=dccommon_consts.SYNC_STATUS_OUT_OF_SYNC, ignore_endpoints=None, - alarmable=True + self, + ctxt, + subcloud_name=None, + subcloud_region=None, + endpoint_type=None, + sync_status=dccommon_consts.SYNC_STATUS_OUT_OF_SYNC, + ignore_endpoints=None, + alarmable=True, ): # Note: synchronous - return self.call(ctxt, self.make_msg('update_subcloud_endpoint_status', - subcloud_name=subcloud_name, - subcloud_region=subcloud_region, - endpoint_type=endpoint_type, - sync_status=sync_status, - ignore_endpoints=ignore_endpoints, - alarmable=alarmable)) + return self.call( + ctxt, + self.make_msg( + "update_subcloud_endpoint_status", + subcloud_name=subcloud_name, + subcloud_region=subcloud_region, + endpoint_type=endpoint_type, + sync_status=sync_status, + ignore_endpoints=ignore_endpoints, + alarmable=alarmable, + ), + ) class ManagerClient(RPCClient): @@ -135,188 +163,290 @@ class ManagerClient(RPCClient): 1.0 - Initial version (Mitaka 1.0 release) """ - BASE_RPC_API_VERSION = '1.0' + BASE_RPC_API_VERSION = "1.0" def __init__(self, timeout=None): super(ManagerClient, self).__init__( - timeout, - consts.TOPIC_DC_MANAGER, - self.BASE_RPC_API_VERSION) + timeout, consts.TOPIC_DC_MANAGER, self.BASE_RPC_API_VERSION + ) def add_subcloud(self, ctxt, subcloud_id, payload): - return self.cast(ctxt, self.make_msg('add_subcloud', - subcloud_id=subcloud_id, - payload=payload)) + return self.cast( + ctxt, + self.make_msg("add_subcloud", subcloud_id=subcloud_id, payload=payload), + ) def add_secondary_subcloud(self, ctxt, subcloud_id, payload): - return self.call(ctxt, self.make_msg('add_secondary_subcloud', - subcloud_id=subcloud_id, - payload=payload)) + return self.call( + ctxt, + self.make_msg( + "add_secondary_subcloud", subcloud_id=subcloud_id, payload=payload + ), + ) def delete_subcloud(self, ctxt, subcloud_id): - return self.call(ctxt, self.make_msg('delete_subcloud', - subcloud_id=subcloud_id)) + return self.call( + ctxt, self.make_msg("delete_subcloud", subcloud_id=subcloud_id) + ) def rename_subcloud( self, ctxt, subcloud_id, curr_subcloud_name, new_subcloud_name=None ): - return self.call(ctxt, self.make_msg('rename_subcloud', - subcloud_id=subcloud_id, - curr_subcloud_name=curr_subcloud_name, - new_subcloud_name=new_subcloud_name)) + return self.call( + ctxt, + self.make_msg( + "rename_subcloud", + subcloud_id=subcloud_id, + curr_subcloud_name=curr_subcloud_name, + new_subcloud_name=new_subcloud_name, + ), + ) def update_subcloud( - self, ctxt, subcloud_id, management_state=None, description=None, - location=None, group_id=None, data_install=None, force=None, - deploy_status=None, peer_group_id=None, bootstrap_values=None, - bootstrap_address=None + self, + ctxt, + subcloud_id, + management_state=None, + description=None, + location=None, + group_id=None, + data_install=None, + force=None, + deploy_status=None, + peer_group_id=None, + bootstrap_values=None, + bootstrap_address=None, ): - return self.call(ctxt, self.make_msg('update_subcloud', - subcloud_id=subcloud_id, - management_state=management_state, - description=description, - location=location, - group_id=group_id, - data_install=data_install, - force=force, - deploy_status=deploy_status, - peer_group_id=peer_group_id, - bootstrap_values=bootstrap_values, - bootstrap_address=bootstrap_address)) + return self.call( + ctxt, + self.make_msg( + "update_subcloud", + subcloud_id=subcloud_id, + management_state=management_state, + description=description, + location=location, + group_id=group_id, + data_install=data_install, + force=force, + deploy_status=deploy_status, + peer_group_id=peer_group_id, + bootstrap_values=bootstrap_values, + bootstrap_address=bootstrap_address, + ), + ) def update_subcloud_with_network_reconfig(self, ctxt, subcloud_id, payload): - return self.cast(ctxt, self.make_msg('update_subcloud_with_network_reconfig', - subcloud_id=subcloud_id, - payload=payload)) - - def redeploy_subcloud(self, ctxt, subcloud_id, payload): - return self.cast(ctxt, self.make_msg('redeploy_subcloud', - subcloud_id=subcloud_id, - payload=payload)) - - def backup_subclouds(self, ctxt, payload): - return self.cast(ctxt, self.make_msg('backup_subclouds', - payload=payload)) - - def delete_subcloud_backups(self, ctxt, release_version, payload): - return self.call(ctxt, self.make_msg('delete_subcloud_backups', - release_version=release_version, - payload=payload)) - - def restore_subcloud_backups(self, ctxt, payload): - return self.cast(ctxt, self.make_msg('restore_subcloud_backups', - payload=payload)) - - def update_subcloud_sync_endpoint_type(self, ctxt, - subcloud_region, - endpoint_type_list, - openstack_installed): return self.cast( ctxt, - self.make_msg('update_subcloud_sync_endpoint_type', - subcloud_region=subcloud_region, - endpoint_type_list=endpoint_type_list, - openstack_installed=openstack_installed)) + self.make_msg( + "update_subcloud_with_network_reconfig", + subcloud_id=subcloud_id, + payload=payload, + ), + ) + + def redeploy_subcloud(self, ctxt, subcloud_id, payload): + return self.cast( + ctxt, + self.make_msg( + "redeploy_subcloud", subcloud_id=subcloud_id, payload=payload + ), + ) + + def backup_subclouds(self, ctxt, payload): + return self.cast(ctxt, self.make_msg("backup_subclouds", payload=payload)) + + def delete_subcloud_backups(self, ctxt, release_version, payload): + return self.call( + ctxt, + self.make_msg( + "delete_subcloud_backups", + release_version=release_version, + payload=payload, + ), + ) + + def restore_subcloud_backups(self, ctxt, payload): + return self.cast( + ctxt, self.make_msg("restore_subcloud_backups", payload=payload) + ) + + def update_subcloud_sync_endpoint_type( + self, ctxt, subcloud_region, endpoint_type_list, openstack_installed + ): + return self.cast( + ctxt, + self.make_msg( + "update_subcloud_sync_endpoint_type", + subcloud_region=subcloud_region, + endpoint_type_list=endpoint_type_list, + openstack_installed=openstack_installed, + ), + ) def prestage_subcloud(self, ctxt, payload): - return self.call(ctxt, self.make_msg('prestage_subcloud', - payload=payload)) + return self.call(ctxt, self.make_msg("prestage_subcloud", payload=payload)) def subcloud_deploy_create(self, ctxt, subcloud_id, payload): - return self.call(ctxt, self.make_msg('subcloud_deploy_create', - subcloud_id=subcloud_id, - payload=payload)) + return self.call( + ctxt, + self.make_msg( + "subcloud_deploy_create", subcloud_id=subcloud_id, payload=payload + ), + ) - def subcloud_deploy_install(self, ctxt, subcloud_id, payload, - initial_deployment): - return self.cast(ctxt, self.make_msg('subcloud_deploy_install', - subcloud_id=subcloud_id, - payload=payload, - initial_deployment=initial_deployment)) + def subcloud_deploy_install(self, ctxt, subcloud_id, payload, initial_deployment): + return self.cast( + ctxt, + self.make_msg( + "subcloud_deploy_install", + subcloud_id=subcloud_id, + payload=payload, + initial_deployment=initial_deployment, + ), + ) def subcloud_deploy_enroll(self, ctxt, subcloud_id, payload): - return self.cast(ctxt, self.make_msg('subcloud_deploy_enroll', - subcloud_id=subcloud_id, - payload=payload)) + return self.cast( + ctxt, + self.make_msg( + "subcloud_deploy_enroll", subcloud_id=subcloud_id, payload=payload + ), + ) - def subcloud_deploy_bootstrap(self, ctxt, subcloud_id, payload, - initial_deployment): - return self.cast(ctxt, self.make_msg('subcloud_deploy_bootstrap', - subcloud_id=subcloud_id, - payload=payload, - initial_deployment=initial_deployment)) + def subcloud_deploy_bootstrap(self, ctxt, subcloud_id, payload, initial_deployment): + return self.cast( + ctxt, + self.make_msg( + "subcloud_deploy_bootstrap", + subcloud_id=subcloud_id, + payload=payload, + initial_deployment=initial_deployment, + ), + ) - def subcloud_deploy_config(self, ctxt, subcloud_id, payload, - initial_deployment): - return self.cast(ctxt, self.make_msg('subcloud_deploy_config', - subcloud_id=subcloud_id, - payload=payload, - initial_deployment=initial_deployment)) + def subcloud_deploy_config(self, ctxt, subcloud_id, payload, initial_deployment): + return self.cast( + ctxt, + self.make_msg( + "subcloud_deploy_config", + subcloud_id=subcloud_id, + payload=payload, + initial_deployment=initial_deployment, + ), + ) def subcloud_deploy_complete(self, ctxt, subcloud_id): - return self.call(ctxt, self.make_msg('subcloud_deploy_complete', - subcloud_id=subcloud_id)) + return self.call( + ctxt, self.make_msg("subcloud_deploy_complete", subcloud_id=subcloud_id) + ) def subcloud_deploy_abort(self, ctxt, subcloud_id, deploy_status): - return self.cast(ctxt, self.make_msg('subcloud_deploy_abort', - subcloud_id=subcloud_id, - deploy_status=deploy_status)) + return self.cast( + ctxt, + self.make_msg( + "subcloud_deploy_abort", + subcloud_id=subcloud_id, + deploy_status=deploy_status, + ), + ) - def subcloud_deploy_resume(self, ctxt, subcloud_id, subcloud_name, - payload, deploy_states_to_run): - return self.cast(ctxt, self.make_msg( - 'subcloud_deploy_resume', - subcloud_id=subcloud_id, - subcloud_name=subcloud_name, - payload=payload, - deploy_states_to_run=deploy_states_to_run)) + def subcloud_deploy_resume( + self, ctxt, subcloud_id, subcloud_name, payload, deploy_states_to_run + ): + return self.cast( + ctxt, + self.make_msg( + "subcloud_deploy_resume", + subcloud_id=subcloud_id, + subcloud_name=subcloud_name, + payload=payload, + deploy_states_to_run=deploy_states_to_run, + ), + ) def get_subcloud_name_by_region_name(self, ctxt, subcloud_region): - return self.call(ctxt, self.make_msg('get_subcloud_name_by_region_name', - subcloud_region=subcloud_region)) + return self.call( + ctxt, + self.make_msg( + "get_subcloud_name_by_region_name", subcloud_region=subcloud_region + ), + ) def batch_migrate_subcloud(self, ctxt, payload): - return self.cast(ctxt, self.make_msg('batch_migrate_subcloud', - payload=payload)) + return self.cast(ctxt, self.make_msg("batch_migrate_subcloud", payload=payload)) def sync_subcloud_peer_group(self, ctxt, association_id): - return self.cast(ctxt, self.make_msg( - 'sync_subcloud_peer_group', association_id=association_id)) + return self.cast( + ctxt, + self.make_msg("sync_subcloud_peer_group", association_id=association_id), + ) def sync_subcloud_peer_group_only(self, ctxt, association_id): # Without synchronizing subclouds - return self.call(ctxt, self.make_msg( - 'sync_subcloud_peer_group', association_id=association_id, - sync_subclouds=False)) + return self.call( + ctxt, + self.make_msg( + "sync_subcloud_peer_group", + association_id=association_id, + sync_subclouds=False, + ), + ) - def update_subcloud_peer_group(self, ctxt, peer_group_id, - group_state, max_subcloud_rehoming, - group_name, new_group_name=None): - return self.call(ctxt, self.make_msg( - 'update_subcloud_peer_group', - peer_group_id=peer_group_id, - group_state=group_state, - max_subcloud_rehoming=max_subcloud_rehoming, - group_name=group_name, new_group_name=new_group_name)) + def update_subcloud_peer_group( + self, + ctxt, + peer_group_id, + group_state, + max_subcloud_rehoming, + group_name, + new_group_name=None, + ): + return self.call( + ctxt, + self.make_msg( + "update_subcloud_peer_group", + peer_group_id=peer_group_id, + group_state=group_state, + max_subcloud_rehoming=max_subcloud_rehoming, + group_name=group_name, + new_group_name=new_group_name, + ), + ) def delete_peer_group_association(self, ctxt, association_id): - return self.call(ctxt, self.make_msg('delete_peer_group_association', - association_id=association_id)) + return self.call( + ctxt, + self.make_msg( + "delete_peer_group_association", association_id=association_id + ), + ) - def update_association_sync_status(self, ctxt, peer_group_id, - sync_status, sync_message=None): - return self.call(ctxt, self.make_msg('update_association_sync_status', - peer_group_id=peer_group_id, - sync_status=sync_status, - sync_message=sync_message)) + def update_association_sync_status( + self, ctxt, peer_group_id, sync_status, sync_message=None + ): + return self.call( + ctxt, + self.make_msg( + "update_association_sync_status", + peer_group_id=peer_group_id, + sync_status=sync_status, + sync_message=sync_message, + ), + ) def peer_monitor_notify(self, ctxt): - return self.call(ctxt, self.make_msg('peer_monitor_notify')) + return self.call(ctxt, self.make_msg("peer_monitor_notify")) def peer_group_audit_notify(self, ctxt, peer_group_name, payload): - return self.call(ctxt, self.make_msg('peer_group_audit_notify', - peer_group_name=peer_group_name, - payload=payload)) + return self.call( + ctxt, + self.make_msg( + "peer_group_audit_notify", + peer_group_name=peer_group_name, + payload=payload, + ), + ) class DCManagerNotifications(RPCClient): @@ -325,24 +455,33 @@ class DCManagerNotifications(RPCClient): Version History: 1.0 - Initial version """ - DCMANAGER_RPC_API_VERSION = '1.0' - TOPIC_DC_NOTIFICIATION = 'DCMANAGER-NOTIFICATION' + + DCMANAGER_RPC_API_VERSION = "1.0" + TOPIC_DC_NOTIFICIATION = "DCMANAGER-NOTIFICATION" def __init__(self, timeout=None): super(DCManagerNotifications, self).__init__( - timeout, - self.TOPIC_DC_NOTIFICIATION, - self.DCMANAGER_RPC_API_VERSION) + timeout, self.TOPIC_DC_NOTIFICIATION, self.DCMANAGER_RPC_API_VERSION + ) def subcloud_online(self, ctxt, subcloud_name): - return self.cast(ctxt, self.make_msg('subcloud_online', - subcloud_name=subcloud_name)) + return self.cast( + ctxt, self.make_msg("subcloud_online", subcloud_name=subcloud_name) + ) def subcloud_managed(self, ctxt, subcloud_name): - return self.cast(ctxt, self.make_msg('subcloud_managed', - subcloud_name=subcloud_name)) + return self.cast( + ctxt, self.make_msg("subcloud_managed", subcloud_name=subcloud_name) + ) def subcloud_sysinv_endpoint_update(self, ctxt, subcloud_name, endpoint): - return self.cast(ctxt, self.make_msg( - 'subcloud_sysinv_endpoint_update', subcloud_name=subcloud_name, - endpoint=endpoint), fanout=True, version=self.DCMANAGER_RPC_API_VERSION) + return self.cast( + ctxt, + self.make_msg( + "subcloud_sysinv_endpoint_update", + subcloud_name=subcloud_name, + endpoint=endpoint, + ), + fanout=True, + version=self.DCMANAGER_RPC_API_VERSION, + ) diff --git a/distributedcloud/dcmanager/state/service.py b/distributedcloud/dcmanager/state/service.py index 49771daf9..5156fa094 100644 --- a/distributedcloud/dcmanager/state/service.py +++ b/distributedcloud/dcmanager/state/service.py @@ -192,16 +192,14 @@ class DCManagerStateService(service.Service): ) def bulk_update_subcloud_availability_and_endpoint_status( - self, context, subcloud_name, subcloud_region, availability_data, - endpoint_data + self, context, subcloud_name, subcloud_region, availability_data, endpoint_data ): LOG.info( "Handling bulk_update_subcloud_availability_and_endpoint_status request " f"for subcloud: {subcloud_name}" ) - self.subcloud_state_manager.\ - bulk_update_subcloud_availability_and_endpoint_status( - context, subcloud_name, subcloud_region, availability_data, - endpoint_data - ) + manager = self.subcloud_state_manager + manager.bulk_update_subcloud_availability_and_endpoint_status( + context, subcloud_name, subcloud_region, availability_data, endpoint_data + ) diff --git a/distributedcloud/dcmanager/state/subcloud_state_manager.py b/distributedcloud/dcmanager/state/subcloud_state_manager.py index 462ce6a20..d1bec8b44 100644 --- a/distributedcloud/dcmanager/state/subcloud_state_manager.py +++ b/distributedcloud/dcmanager/state/subcloud_state_manager.py @@ -59,18 +59,25 @@ class SubcloudStateManager(manager.Manager): """Manages tasks related to subclouds.""" def __init__(self, *args, **kwargs): - LOG.debug('SubcloudStateManager initialization...') + LOG.debug("SubcloudStateManager initialization...") - super(SubcloudStateManager, - self).__init__(service_name="subcloud_manager", *args, **kwargs) + super(SubcloudStateManager, self).__init__( + service_name="subcloud_manager", *args, **kwargs + ) self.context = context.get_admin_context() self.dcorch_rpc_client = dcorch_rpc_client.EngineWorkerClient() self.fm_api = fm_api.FaultAPIs() self.audit_rpc_client = dcmanager_audit_rpc_client.ManagerAuditClient() - def _do_update_subcloud_endpoint_status(self, context, subcloud_id, - endpoint_type, sync_status, - alarmable, ignore_endpoints=None): + def _do_update_subcloud_endpoint_status( + self, + context, + subcloud_id, + endpoint_type, + sync_status, + alarmable, + ignore_endpoints=None, + ): """Update online/managed subcloud endpoint status :param context: request context object @@ -91,14 +98,19 @@ class SubcloudStateManager(manager.Manager): # retrieve the info from the db for this subcloud. # subcloud_id should not be None try: - for subcloud, subcloud_status in db_api. \ - subcloud_get_with_status(context, subcloud_id): + for subcloud, subcloud_status in db_api.subcloud_get_with_status( + context, subcloud_id + ): if subcloud_status: subcloud_status_list.append( db_api.subcloud_endpoint_status_db_model_to_dict( - subcloud_status)) - if subcloud_status.endpoint_type == \ - dccommon_consts.ENDPOINT_TYPE_IDENTITY: + subcloud_status + ) + ) + if ( + subcloud_status.endpoint_type + == dccommon_consts.ENDPOINT_TYPE_IDENTITY + ): original_identity_status = subcloud_status.sync_status except Exception as e: LOG.exception(e) @@ -108,28 +120,30 @@ class SubcloudStateManager(manager.Manager): if endpoint_type: # updating a single endpoint on a single subcloud for subcloud_status in subcloud_status_list: - if subcloud_status['endpoint_type'] == endpoint_type: - if subcloud_status['sync_status'] == sync_status: + if subcloud_status["endpoint_type"] == endpoint_type: + if subcloud_status["sync_status"] == sync_status: # No change in the sync_status - LOG.debug("Sync status (%s) for subcloud %s did " - "not change - ignore update" % - (sync_status, subcloud.name)) + LOG.debug( + "Sync status (%s) for subcloud %s did not change " + "- ignore update" % (sync_status, subcloud.name) + ) return # We found the endpoint break else: # We did not find the endpoint raise exceptions.BadRequest( - resource='subcloud', - msg='Endpoint %s not found for subcloud' % - endpoint_type) + resource="subcloud", + msg="Endpoint %s not found for subcloud" % endpoint_type, + ) - LOG.info("Updating subcloud:%s endpoint:%s sync:%s" % - (subcloud.name, endpoint_type, sync_status)) - db_api.subcloud_status_update(context, - subcloud_id, - endpoint_type, - sync_status) + LOG.info( + "Updating subcloud:%s endpoint:%s sync:%s" + % (subcloud.name, endpoint_type, sync_status) + ) + db_api.subcloud_status_update( + context, subcloud_id, endpoint_type, sync_status + ) # Trigger subcloud audits for the subcloud after # its identity endpoint turns to other status from unknown @@ -137,33 +151,38 @@ class SubcloudStateManager(manager.Manager): is_identity_unknown = ( original_identity_status == dccommon_consts.SYNC_STATUS_UNKNOWN ) - if endpoint_type == dccommon_consts.ENDPOINT_TYPE_IDENTITY \ - and is_sync_unknown and is_identity_unknown: + if ( + endpoint_type == dccommon_consts.ENDPOINT_TYPE_IDENTITY + and is_sync_unknown + and is_identity_unknown + ): if not subcloud.first_identity_sync_complete: - db_api.subcloud_update(context, subcloud_id, - first_identity_sync_complete=True) - LOG.debug('Request for audits for %s after updating ' - 'identity out of unknown' % subcloud.name) - self.audit_rpc_client.trigger_subcloud_audits( - context, subcloud_id) + db_api.subcloud_update( + context, subcloud_id, first_identity_sync_complete=True + ) + LOG.debug( + "Request for audits for %s after updating " + "identity out of unknown" % subcloud.name + ) + self.audit_rpc_client.trigger_subcloud_audits(context, subcloud_id) - entity_instance_id = "subcloud=%s.resource=%s" % \ - (subcloud.name, endpoint_type) - fault = self.fm_api.get_fault( - ALARM_OUT_OF_SYNC, - entity_instance_id) + entity_instance_id = "subcloud=%s.resource=%s" % ( + subcloud.name, + endpoint_type, + ) + fault = self.fm_api.get_fault(ALARM_OUT_OF_SYNC, entity_instance_id) - if (sync_status != dccommon_consts.SYNC_STATUS_OUT_OF_SYNC) \ - and fault: + if (sync_status != dccommon_consts.SYNC_STATUS_OUT_OF_SYNC) and fault: try: - self.fm_api.clear_fault( - ALARM_OUT_OF_SYNC, - entity_instance_id) + self.fm_api.clear_fault(ALARM_OUT_OF_SYNC, entity_instance_id) except Exception as e: LOG.exception(e) - elif not fault and alarmable and \ - (sync_status == dccommon_consts.SYNC_STATUS_OUT_OF_SYNC): + elif ( + not fault + and alarmable + and (sync_status == dccommon_consts.SYNC_STATUS_OUT_OF_SYNC) + ): entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD try: @@ -173,15 +192,17 @@ class SubcloudStateManager(manager.Manager): entity_type_id=entity_type_id, entity_instance_id=entity_instance_id, severity=fm_const.FM_ALARM_SEVERITY_MAJOR, - reason_text=("%s %s sync_status is " - "out-of-sync" % - (subcloud.name, endpoint_type)), + reason_text=( + "%s %s sync_status is out-of-sync" + % (subcloud.name, endpoint_type) + ), alarm_type=fm_const.FM_ALARM_TYPE_0, probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2, - proposed_repair_action="If problem persists " - "contact next level " - "of support", - service_affecting=False) + proposed_repair_action=( + "If problem persists contact next level of support" + ), + service_affecting=False, + ) self.fm_api.set_fault(fault) @@ -190,9 +211,11 @@ class SubcloudStateManager(manager.Manager): else: # update all endpoints on this subcloud - LOG.info("Updating all endpoints on subcloud: %s sync: %s " - "ignore_endpoints: %s" % - (subcloud.name, sync_status, ignore_endpoints)) + LOG.info( + "Updating all endpoints on subcloud: %s sync: %s " + "ignore_endpoints: %s" + % (subcloud.name, sync_status, ignore_endpoints) + ) # TODO(yuxing): The following code can be further optimized when # batch alarm clearance APIs are available, so we don't need to @@ -209,28 +232,32 @@ class SubcloudStateManager(manager.Manager): continue endpoint_to_update_list.append(endpoint) - entity_instance_id = "subcloud=%s.resource=%s" % \ - (subcloud.name, endpoint) + entity_instance_id = "subcloud=%s.resource=%s" % ( + subcloud.name, + endpoint, + ) - fault = self.fm_api.get_fault( - ALARM_OUT_OF_SYNC, - entity_instance_id) + fault = self.fm_api.get_fault(ALARM_OUT_OF_SYNC, entity_instance_id) # TODO(yuxing): batch clear all the out-of-sync alarms of a # given subcloud if fm_api support it. Be careful with the # dc-cert endpoint when adding the above; the endpoint # alarm must remain for offline subclouds. - if (sync_status != dccommon_consts.SYNC_STATUS_OUT_OF_SYNC) \ - and fault: + if ( + sync_status != dccommon_consts.SYNC_STATUS_OUT_OF_SYNC + ) and fault: try: self.fm_api.clear_fault( - ALARM_OUT_OF_SYNC, - entity_instance_id) + ALARM_OUT_OF_SYNC, entity_instance_id + ) except Exception as e: LOG.exception(e) - elif not fault and alarmable and \ - (sync_status == dccommon_consts.SYNC_STATUS_OUT_OF_SYNC): + elif ( + not fault + and alarmable + and (sync_status == dccommon_consts.SYNC_STATUS_OUT_OF_SYNC) + ): entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD try: fault = fm_api.Fault( @@ -239,15 +266,17 @@ class SubcloudStateManager(manager.Manager): entity_type_id=entity_type_id, entity_instance_id=entity_instance_id, severity=fm_const.FM_ALARM_SEVERITY_MAJOR, - reason_text=("%s %s sync_status is " - "out-of-sync" % - (subcloud.name, endpoint)), + reason_text=( + "%s %s sync_status is out-of-sync" + % (subcloud.name, endpoint) + ), alarm_type=fm_const.FM_ALARM_TYPE_0, probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2, - proposed_repair_action="If problem persists " - "contact next level " - "of support", - service_affecting=False) + proposed_repair_action=( + "If problem persists contact next level of support" + ), + service_affecting=False, + ) self.fm_api.set_fault(fault) except Exception as e: @@ -256,10 +285,8 @@ class SubcloudStateManager(manager.Manager): if endpoint_to_update_list: try: db_api.subcloud_status_update_endpoints( - context, - subcloud_id, - endpoint_to_update_list, - sync_status) + context, subcloud_id, endpoint_to_update_list, sync_status + ) except Exception as e: LOG.exception(e) @@ -287,30 +314,30 @@ class SubcloudStateManager(manager.Manager): # the sync status update must be done first. # is_in_sync = sync_status == dccommon_consts.SYNC_STATUS_IN_SYNC - is_online = subcloud.availability_status == \ - dccommon_consts.AVAILABILITY_ONLINE - is_managed = subcloud.management_state == \ - dccommon_consts.MANAGEMENT_MANAGED - is_endpoint_type_dc_cert = endpoint_type == \ - dccommon_consts.ENDPOINT_TYPE_DC_CERT + is_online = subcloud.availability_status == dccommon_consts.AVAILABILITY_ONLINE + is_managed = subcloud.management_state == dccommon_consts.MANAGEMENT_MANAGED + is_endpoint_type_dc_cert = ( + endpoint_type == dccommon_consts.ENDPOINT_TYPE_DC_CERT + ) is_secondary = subcloud.deploy_status == consts.DEPLOY_STATE_SECONDARY is_sync_unknown = sync_status == dccommon_consts.SYNC_STATUS_UNKNOWN is_secondary_and_sync_unknown = is_secondary and is_sync_unknown return ( - (not is_in_sync - or (is_online and (is_managed or is_endpoint_type_dc_cert))) + (not is_in_sync or (is_online and (is_managed or is_endpoint_type_dc_cert))) and not is_secondary ) or is_secondary_and_sync_unknown @sync_update_subcloud_endpoint_status def _update_subcloud_endpoint_status( - self, context, - subcloud_region, - endpoint_type=None, - sync_status=dccommon_consts.SYNC_STATUS_OUT_OF_SYNC, - alarmable=True, - ignore_endpoints=None): + self, + context, + subcloud_region, + endpoint_type=None, + sync_status=dccommon_consts.SYNC_STATUS_OUT_OF_SYNC, + alarmable=True, + ignore_endpoints=None, + ): """Update subcloud endpoint status :param context: request context object @@ -327,8 +354,8 @@ class SubcloudStateManager(manager.Manager): if not subcloud_region: raise exceptions.BadRequest( - resource='subcloud', - msg='Subcloud region not provided') + resource="subcloud", msg="Subcloud region not provided" + ) try: subcloud = db_api.subcloud_get_by_region_name(context, subcloud_region) @@ -340,21 +367,31 @@ class SubcloudStateManager(manager.Manager): # update a single subcloud try: self._do_update_subcloud_endpoint_status( - context, subcloud.id, endpoint_type, sync_status, - alarmable, ignore_endpoints + context, + subcloud.id, + endpoint_type, + sync_status, + alarmable, + ignore_endpoints, ) except Exception as e: LOG.exception(e) raise e else: - LOG.info("Ignoring subcloud sync_status update for subcloud:%s " - "availability:%s management:%s endpoint:%s sync:%s" % - (subcloud.name, subcloud.availability_status, - subcloud.management_state, endpoint_type, sync_status)) + LOG.info( + "Ignoring subcloud sync_status update for subcloud:%s " + "availability:%s management:%s endpoint:%s sync:%s" + % ( + subcloud.name, + subcloud.availability_status, + subcloud.management_state, + endpoint_type, + sync_status, + ) + ) def bulk_update_subcloud_availability_and_endpoint_status( - self, context, subcloud_name, subcloud_region, availability_data, - endpoint_data + self, context, subcloud_name, subcloud_region, availability_data, endpoint_data ): # This bulk update is executed as part of the audit process in dcmanager and # its related endpoints. This method is not used by dcorch and cert-mon. @@ -362,21 +399,20 @@ class SubcloudStateManager(manager.Manager): try: subcloud = db_api.subcloud_get_by_region_name(context, subcloud_region) except Exception: - LOG.exception( - f"Failed to get subcloud by region name {subcloud_region}" - ) + LOG.exception(f"Failed to get subcloud by region name {subcloud_region}") raise if availability_data: self.update_subcloud_availability( - context, subcloud_region, availability_data["availability_status"], + context, + subcloud_region, + availability_data["availability_status"], availability_data["update_state_only"], - availability_data["audit_fail_count"], subcloud + availability_data["audit_fail_count"], + subcloud, ) if endpoint_data: - self._bulk_update_subcloud_endpoint_status( - context, subcloud, endpoint_data - ) + self._bulk_update_subcloud_endpoint_status(context, subcloud, endpoint_data) @lockutils.synchronized(LOCK_NAME) def _do_bulk_update_subcloud_endpoint_status( @@ -413,8 +449,7 @@ class SubcloudStateManager(manager.Manager): except Exception as e: LOG.exception(e) - elif not fault and \ - (sync_status == dccommon_consts.SYNC_STATUS_OUT_OF_SYNC): + elif not fault and (sync_status == dccommon_consts.SYNC_STATUS_OUT_OF_SYNC): entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD try: fault = fm_api.Fault( @@ -423,15 +458,17 @@ class SubcloudStateManager(manager.Manager): entity_type_id=entity_type_id, entity_instance_id=entity_instance_id, severity=fm_const.FM_ALARM_SEVERITY_MAJOR, - reason_text=("%s %s sync_status is " - "out-of-sync" % - (subcloud.name, endpoint)), + reason_text=( + "%s %s sync_status is " + "out-of-sync" % (subcloud.name, endpoint) + ), alarm_type=fm_const.FM_ALARM_TYPE_0, probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2, proposed_repair_action="If problem persists " - "contact next level " - "of support", - service_affecting=False) + "contact next level " + "of support", + service_affecting=False, + ) self.fm_api.set_fault(fault) except Exception as e: @@ -439,7 +476,9 @@ class SubcloudStateManager(manager.Manager): try: db_api.subcloud_status_bulk_update_endpoints( - context, subcloud.id, endpoint_list, + context, + subcloud.id, + endpoint_list, ) except Exception as e: LOG.exception( @@ -447,9 +486,7 @@ class SubcloudStateManager(manager.Manager): f"endpoint status: {e}" ) - def _bulk_update_subcloud_endpoint_status( - self, context, subcloud, endpoint_list - ): + def _bulk_update_subcloud_endpoint_status(self, context, subcloud, endpoint_list): """Update the sync status of a list of subcloud endpoints :param context: current context object @@ -483,12 +520,14 @@ class SubcloudStateManager(manager.Manager): ) def update_subcloud_endpoint_status( - self, context, - subcloud_region=None, - endpoint_type=None, - sync_status=dccommon_consts.SYNC_STATUS_OUT_OF_SYNC, - alarmable=True, - ignore_endpoints=None): + self, + context, + subcloud_region=None, + endpoint_type=None, + sync_status=dccommon_consts.SYNC_STATUS_OUT_OF_SYNC, + alarmable=True, + ignore_endpoints=None, + ): """Update subcloud endpoint status :param context: request context object @@ -505,83 +544,108 @@ class SubcloudStateManager(manager.Manager): if subcloud_region: self._update_subcloud_endpoint_status( - context, subcloud_region, endpoint_type, sync_status, alarmable, - ignore_endpoints) + context, + subcloud_region, + endpoint_type, + sync_status, + alarmable, + ignore_endpoints, + ) else: # update all subclouds for subcloud in db_api.subcloud_get_all(context): self._update_subcloud_endpoint_status( - context, subcloud.region_name, endpoint_type, sync_status, - alarmable, ignore_endpoints) + context, + subcloud.region_name, + endpoint_type, + sync_status, + alarmable, + ignore_endpoints, + ) - def _update_subcloud_state(self, context, subcloud_name, subcloud_region, - management_state, availability_status): + def _update_subcloud_state( + self, + context, + subcloud_name, + subcloud_region, + management_state, + availability_status, + ): try: - LOG.info('Notifying dcorch, subcloud:%s management: %s, ' - 'availability:%s' % - (subcloud_name, - management_state, - availability_status)) + LOG.info( + "Notifying dcorch, subcloud:%s management: %s, availability:%s" + % (subcloud_name, management_state, availability_status) + ) self.dcorch_rpc_client.update_subcloud_states( - context, subcloud_region, management_state, availability_status) + context, subcloud_region, management_state, availability_status + ) except Exception: - LOG.exception('Problem informing dcorch of subcloud state change,' - 'subcloud: %s' % subcloud_name) + LOG.exception( + "Problem informing dcorch of subcloud state change, subcloud: %s" + % subcloud_name + ) - def _raise_or_clear_subcloud_status_alarm(self, subcloud_name, - availability_status, - deploy_status=None): + def _raise_or_clear_subcloud_status_alarm( + self, subcloud_name, availability_status, deploy_status=None + ): entity_instance_id = "subcloud=%s" % subcloud_name fault = self.fm_api.get_fault( - fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE, - entity_instance_id) + fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE, entity_instance_id + ) if fault and (availability_status == dccommon_consts.AVAILABILITY_ONLINE): try: self.fm_api.clear_fault( - fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE, - entity_instance_id) + fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE, entity_instance_id + ) except Exception: - LOG.exception("Failed to clear offline alarm for subcloud: %s", - subcloud_name) + LOG.exception( + "Failed to clear offline alarm for subcloud: %s", subcloud_name + ) # Raise the alarm if the subcloud became offline and it's not a # secondary subcloud - elif not fault and \ - (availability_status == dccommon_consts.AVAILABILITY_OFFLINE and - deploy_status != consts.DEPLOY_STATE_SECONDARY): + elif not fault and ( + availability_status == dccommon_consts.AVAILABILITY_OFFLINE + and deploy_status != consts.DEPLOY_STATE_SECONDARY + ): try: fault = fm_api.Fault( alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE, alarm_state=fm_const.FM_ALARM_STATE_SET, entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD, entity_instance_id=entity_instance_id, - severity=fm_const.FM_ALARM_SEVERITY_CRITICAL, - reason_text=('%s is offline' % subcloud_name), + reason_text=("%s is offline" % subcloud_name), alarm_type=fm_const.FM_ALARM_TYPE_0, probable_cause=fm_const.ALARM_PROBABLE_CAUSE_29, - proposed_repair_action="Wait for subcloud to " - "become online; if " - "problem persists contact " - "next level of support.", - service_affecting=True) + proposed_repair_action=( + "Wait for subcloud to become online; if problem persists " + "contact next level of support." + ), + service_affecting=True, + ) self.fm_api.set_fault(fault) except Exception: - LOG.exception("Failed to raise offline alarm for subcloud: %s", - subcloud_name) + LOG.exception( + "Failed to raise offline alarm for subcloud: %s", subcloud_name + ) - def update_subcloud_availability(self, context, subcloud_region, - availability_status, - update_state_only=False, - audit_fail_count=None, subcloud=None): + def update_subcloud_availability( + self, + context, + subcloud_region, + availability_status, + update_state_only=False, + audit_fail_count=None, + subcloud=None, + ): if subcloud is None: try: - subcloud = db_api.subcloud_get_by_region_name(context, - subcloud_region) + subcloud = db_api.subcloud_get_by_region_name(context, subcloud_region) except Exception: LOG.exception( "Failed to get subcloud by region name %s" % subcloud_region @@ -593,29 +657,37 @@ class SubcloudStateManager(manager.Manager): # subcloud's availability. This is required to compensate # for rare alarm update failures, which may occur during # availability updates. - self._raise_or_clear_subcloud_status_alarm(subcloud.name, - availability_status) + self._raise_or_clear_subcloud_status_alarm( + subcloud.name, availability_status + ) # Nothing has changed, but we want to send a state update for this # subcloud as an audit. Get the most up-to-date data. - self._update_subcloud_state(context, subcloud.name, - subcloud.region_name, - subcloud.management_state, - availability_status) + self._update_subcloud_state( + context, + subcloud.name, + subcloud.region_name, + subcloud.management_state, + availability_status, + ) elif availability_status is None: # only update the audit fail count try: - db_api.subcloud_update(self.context, subcloud.id, - audit_fail_count=audit_fail_count) + db_api.subcloud_update( + self.context, subcloud.id, audit_fail_count=audit_fail_count + ) except exceptions.SubcloudNotFound: # slim possibility subcloud could have been deleted since # we found it in db, ignore this benign error. - LOG.info('Ignoring SubcloudNotFound when attempting ' - 'audit_fail_count update: %s' % subcloud.name) + LOG.info( + "Ignoring SubcloudNotFound when attempting " + "audit_fail_count update: %s" % subcloud.name + ) return else: - self._raise_or_clear_subcloud_status_alarm(subcloud.name, - availability_status) + self._raise_or_clear_subcloud_status_alarm( + subcloud.name, availability_status + ) if availability_status == dccommon_consts.AVAILABILITY_OFFLINE: # Subcloud is going offline, set all endpoint statuses to @@ -634,70 +706,77 @@ class SubcloudStateManager(manager.Manager): context, subcloud.id, availability_status=availability_status, - audit_fail_count=audit_fail_count) + audit_fail_count=audit_fail_count, + ) except exceptions.SubcloudNotFound: # slim possibility subcloud could have been deleted since # we found it in db, ignore this benign error. - LOG.info('Ignoring SubcloudNotFound when attempting state' - ' update: %s' % subcloud.name) + LOG.info( + "Ignoring SubcloudNotFound when attempting state update: %s" + % subcloud.name + ) return if availability_status == dccommon_consts.AVAILABILITY_ONLINE: # Subcloud is going online # Tell cert-mon to audit endpoint certificate. - LOG.info('Request for online audit for %s' % subcloud.name) + LOG.info("Request for online audit for %s" % subcloud.name) dc_notification = rpc_client.DCManagerNotifications() dc_notification.subcloud_online(context, subcloud.region_name) # Trigger all the audits for the subcloud so it can update the # sync status ASAP. - self.audit_rpc_client.trigger_subcloud_audits(context, - subcloud.id) + self.audit_rpc_client.trigger_subcloud_audits(context, subcloud.id) # Send dcorch a state update - self._update_subcloud_state(context, subcloud.name, - subcloud.region_name, - updated_subcloud.management_state, - availability_status) - - def update_subcloud_sync_endpoint_type(self, context, - subcloud_region, - endpoint_type_list, - openstack_installed): - operation = 'add' if openstack_installed else 'remove' - func_switcher = { - 'add': ( - self.dcorch_rpc_client.add_subcloud_sync_endpoint_type, - db_api.subcloud_status_create - ), - 'remove': ( - self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type, - db_api.subcloud_status_delete + self._update_subcloud_state( + context, + subcloud.name, + subcloud.region_name, + updated_subcloud.management_state, + availability_status, ) + + def update_subcloud_sync_endpoint_type( + self, context, subcloud_region, endpoint_type_list, openstack_installed + ): + operation = "add" if openstack_installed else "remove" + func_switcher = { + "add": ( + self.dcorch_rpc_client.add_subcloud_sync_endpoint_type, + db_api.subcloud_status_create, + ), + "remove": ( + self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type, + db_api.subcloud_status_delete, + ), } try: subcloud = db_api.subcloud_get_by_region_name(context, subcloud_region) except Exception: - LOG.exception( - "Failed to get subcloud by region name: %s" % subcloud_region - ) + LOG.exception("Failed to get subcloud by region name: %s" % subcloud_region) raise try: # Notify dcorch to add/remove sync endpoint type list - func_switcher[operation][0](self.context, subcloud_region, - endpoint_type_list) - LOG.info('Notifying dcorch, subcloud: %s new sync endpoint: %s' % - (subcloud.name, endpoint_type_list)) + func_switcher[operation][0]( + self.context, subcloud_region, endpoint_type_list + ) + LOG.info( + "Notifying dcorch, subcloud: %s new sync endpoint: %s" + % (subcloud.name, endpoint_type_list) + ) # Update subcloud status table by adding/removing openstack sync # endpoint types for endpoint_type in endpoint_type_list: - func_switcher[operation][1](self.context, subcloud.id, - endpoint_type) + func_switcher[operation][1](self.context, subcloud.id, endpoint_type) # Update openstack_installed of subcloud table - db_api.subcloud_update(self.context, subcloud.id, - openstack_installed=openstack_installed) + db_api.subcloud_update( + self.context, subcloud.id, openstack_installed=openstack_installed + ) except Exception: - LOG.exception('Problem informing dcorch of subcloud sync endpoint' - ' type change, subcloud: %s' % subcloud.name) + LOG.exception( + "Problem informing dcorch of subcloud sync endpoint " + "type change, subcloud: %s" % subcloud.name + ) diff --git a/distributedcloud/dcmanager/tests/unit/manager/test_subcloud_manager.py b/distributedcloud/dcmanager/tests/unit/manager/test_subcloud_manager.py index 62160fc44..ee2358620 100644 --- a/distributedcloud/dcmanager/tests/unit/manager/test_subcloud_manager.py +++ b/distributedcloud/dcmanager/tests/unit/manager/test_subcloud_manager.py @@ -1792,8 +1792,10 @@ class TestSubcloudUpdate(BaseTestSubcloudManager): fake_bootstrap_address = "10.10.20.12" self.assertRaisesRegex( exceptions.BadRequest, - "Cannot update bootstrap_address" - " into rehome data, need to import bootstrap_values first", + ( + "Cannot update bootstrap_address into rehome data, need to " + "import bootstrap_values first" + ), self.sm.update_subcloud, self.ctx, self.subcloud.id, @@ -1933,10 +1935,12 @@ class TestSubcloudUpdate(BaseTestSubcloudManager): '"admin_password": "dGVzdHBhc3M=", ' '"bootstrap-address": "123.123.123.123"}}' ) - fake_bootstrap_values = '{"name": "TestSubcloud", \ - "system_mode": "simplex", "sysadmin_password": "testpass", \ - "ansible_ssh_pass": "fakepass", "ansible_become_pass": "fakepass",\ - "admin_password": "testpass"}' + fake_bootstrap_values = ( + '{"name": "TestSubcloud",' + '"system_mode": "simplex", "sysadmin_password": "testpass",' + '"ansible_ssh_pass": "fakepass", "ansible_become_pass": "fakepass",' + '"admin_password": "testpass"}' + ) fake_bootstrap_address = "123.123.123.123" self.sm.update_subcloud( @@ -2030,10 +2034,11 @@ class TestSubcloudUpdate(BaseTestSubcloudManager): self.subcloud["deploy_status"] = consts.DEPLOY_STATE_DEPLOY_FAILED self.assertRaisesRegex( exceptions.BadRequest, - f"Unable to manage {self.subcloud.name}: " - "its deploy_status must be either" - f" '{consts.DEPLOY_STATE_DONE}' or " - f"'{consts.DEPLOY_STATE_REHOME_PENDING}'", + ( + f"Unable to manage {self.subcloud.name}: its deploy_status " + f"must be either '{consts.DEPLOY_STATE_DONE}' or " + f"'{consts.DEPLOY_STATE_REHOME_PENDING}'" + ), self.sm.update_subcloud, self.ctx, self.subcloud.id, @@ -4042,8 +4047,8 @@ class TestSubcloudBackupRestore(BaseTestSubcloudManager): self.assertIn(expected_log, return_log) self.mock_log.info.assert_called_with( - "Subcloud restore backup operation finished.\nRestored subclouds: 0." - " Invalid subclouds: 1. Failed subclouds: 0." + "Subcloud restore backup operation finished.\nRestored subclouds: 0. " + "Invalid subclouds: 1. Failed subclouds: 0." ) @mock.patch.object( diff --git a/distributedcloud/dcmanager/version.py b/distributedcloud/dcmanager/version.py index f77b7bf5b..c2c570373 100644 --- a/distributedcloud/dcmanager/version.py +++ b/distributedcloud/dcmanager/version.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2021 Wind River Systems, Inc. +# Copyright (c) 2017-2021, 2024 Wind River Systems, Inc. # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at @@ -15,4 +15,4 @@ import pbr.version -version_info = pbr.version.VersionInfo('distributedcloud') +version_info = pbr.version.VersionInfo("distributedcloud") diff --git a/distributedcloud/dcorch/api/proxy/apps/patch.py b/distributedcloud/dcorch/api/proxy/apps/patch.py index f585adecb..5895974d6 100644 --- a/distributedcloud/dcorch/api/proxy/apps/patch.py +++ b/distributedcloud/dcorch/api/proxy/apps/patch.py @@ -126,9 +126,7 @@ class PatchAPIController(Middleware): os.remove(fn) return except OSError: - msg = ( - f"Unable to remove patch file {fn} from the central " "storage." - ) + msg = f"Unable to remove patch file {fn} from the central storage." raise webob.exc.HTTPUnprocessableEntity(explanation=msg) LOG.info(f"Patch {patch} was not found in {vault}") diff --git a/distributedcloud/dcorch/common/config.py b/distributedcloud/dcorch/common/config.py index dfd81b8e7..07376ca3e 100644 --- a/distributedcloud/dcorch/common/config.py +++ b/distributedcloud/dcorch/common/config.py @@ -294,7 +294,7 @@ endpoint_cache_opt_group = cfg.OptGroup( ) openstack_cache_opt_group = cfg.OptGroup( - name="openstack_cache", title="Containerized OpenStack" " Credentials" + name="openstack_cache", title="Containerized OpenStack Credentials" ) fernet_opt_group = cfg.OptGroup(name="fernet", title="Fernet Options") diff --git a/distributedcloud/dcorch/engine/generic_sync_worker_manager.py b/distributedcloud/dcorch/engine/generic_sync_worker_manager.py index 3618ccf9f..38808b5fd 100644 --- a/distributedcloud/dcorch/engine/generic_sync_worker_manager.py +++ b/distributedcloud/dcorch/engine/generic_sync_worker_manager.py @@ -56,8 +56,7 @@ class GenericSyncWorkerManager(object): for endpoint_type in endpoint_type_list: LOG.debug( f"Engine id:({self.engine_id}) create " - f"{subcloud_name}/{endpoint_type}/{management_ip} " - f"sync obj" + f"{subcloud_name}/{endpoint_type}/{management_ip} sync obj" ) sync_obj = sync_object_class_map[endpoint_type]( subcloud_name, endpoint_type, management_ip @@ -70,9 +69,7 @@ class GenericSyncWorkerManager(object): f"Engine id:({self.engine_id}) Start to sync " f"{len(subcloud_sync_list)} (subcloud, endpoint_type) pairs." ) - LOG.debug( - f"Engine id:({self.engine_id}) Start to sync " f"{subcloud_sync_list}." - ) + LOG.debug(f"Engine id:({self.engine_id}) Start to sync {subcloud_sync_list}.") for sc_region_name, ept, ip in subcloud_sync_list: try: @@ -373,13 +370,11 @@ class GenericSyncWorkerManager(object): f"Engine id:({self.engine_id}) Start to audit " f"{len(subcloud_sync_list)} (subcloud, endpoint_type) pairs." ) - LOG.debug( - f"Engine id:({self.engine_id}) Start to audit " f"{subcloud_sync_list}." - ) + LOG.debug(f"Engine id:({self.engine_id}) Start to audit {subcloud_sync_list}.") for sc_region_name, ept, ip in subcloud_sync_list: LOG.debug( - f"Attempt audit_subcloud: " f"{self.engine_id}/{sc_region_name}/{ept}" + f"Attempt audit_subcloud: {self.engine_id}/{sc_region_name}/{ept}" ) try: sync_obj = sync_object_class_map[ept](sc_region_name, ept, ip) @@ -391,7 +386,7 @@ class GenericSyncWorkerManager(object): LOG.debug( f"Engine id:({self.engine_id}/{sc_region_name}/{ept}) " f"SubcloudSyncNotFound: The endpoint in subcloud_sync " - f"has been removed" + "has been removed" ) except Exception as e: LOG.error( diff --git a/distributedcloud/dcorch/engine/initial_sync_worker_manager.py b/distributedcloud/dcorch/engine/initial_sync_worker_manager.py index de24b58a2..0ed6ef0f8 100644 --- a/distributedcloud/dcorch/engine/initial_sync_worker_manager.py +++ b/distributedcloud/dcorch/engine/initial_sync_worker_manager.py @@ -59,7 +59,7 @@ class InitialSyncWorkerManager(object): ) except Exception as e: LOG.error( - f"Exception occurred when running initial_sync for " + "Exception occurred when running initial_sync for " f"subcloud {sc_region_name}: {e}" ) @@ -82,9 +82,7 @@ class InitialSyncWorkerManager(object): ) if result == 0: # Sync is no longer required - LOG.debug( - f"Initial sync for subcloud {subcloud_name} " f"no longer required" - ) + LOG.debug(f"Initial sync for subcloud {subcloud_name} no longer required") return # sync_objs stores the sync object per endpoint @@ -131,12 +129,12 @@ class InitialSyncWorkerManager(object): pass else: LOG.error( - f"Unexpected new_state {new_state} for " f"subcloud {subcloud_name}" + f"Unexpected new_state {new_state} for subcloud {subcloud_name}" ) else: LOG.debug( - f"Initial sync was cancelled for subcloud " - f"{subcloud_name} while in progress" + f"Initial sync was cancelled for subcloud {subcloud_name} " + "while in progress" ) def _reattempt_sync(self, subcloud_name): @@ -159,9 +157,8 @@ class InitialSyncWorkerManager(object): LOG.debug(f"enabling subcloud {subcloud_name}") for endpoint_type, sync_obj in sync_objs.items(): LOG.debug( - f"Engine id: {self.engine_id} enabling sync thread " - f"for subcloud {subcloud_name} and " - f"endpoint type {endpoint_type}." + f"Engine id: {self.engine_id} enabling sync thread for subcloud " + f"{subcloud_name} and endpoint type {endpoint_type}." ) sync_obj.enable() diff --git a/distributedcloud/dcorch/engine/sync_services/sysinv.py b/distributedcloud/dcorch/engine/sync_services/sysinv.py index 4160bdb1e..4cb2cd45d 100644 --- a/distributedcloud/dcorch/engine/sync_services/sysinv.py +++ b/distributedcloud/dcorch/engine/sync_services/sysinv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2024, 2024 Wind River Systems, Inc. +# Copyright (c) 2017-2022, 2024 Wind River Systems, Inc. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -95,7 +95,7 @@ class SysinvSyncThread(SyncThread): sc_sysinv_url = build_subcloud_endpoint(self.management_ip, "sysinv") LOG.debug( - f"Built sc_sysinv_url {sc_sysinv_url} for subcloud " f"{self.subcloud_name}" + f"Built sc_sysinv_url {sc_sysinv_url} for subcloud {self.subcloud_name}" ) self.sc_sysinv_client = SysinvClient( @@ -266,8 +266,9 @@ class SysinvSyncThread(SyncThread): ] LOG.info( - "certificate {} {} [{}] updated with subcloud certificates:" - " {}".format(rsrc.id, subcloud_rsrc_id, signature, sub_certs_updated), + "certificate {} {} [{}] updated with subcloud certificates: {}".format( + rsrc.id, subcloud_rsrc_id, signature, sub_certs_updated + ), extra=self.log_extra, ) @@ -381,8 +382,9 @@ class SysinvSyncThread(SyncThread): if not passwd_hash: LOG.info( - "sync_user no user update found in resource_info" - "{}".format(request.orch_job.resource_info), + "sync_user no user update found in resource_info {}".format( + request.orch_job.resource_info + ), extra=self.log_extra, ) return @@ -531,16 +533,18 @@ class SysinvSyncThread(SyncThread): keystone_exceptions.ConnectFailure, ) as e: LOG.info( - "get subcloud_resources {}: subcloud {} is not reachable" - "[{}]".format(resource_type, self.region_name, str(e)), + "get subcloud_resources {}: subcloud {} is not reachable [{}]".format( + resource_type, self.region_name, str(e) + ), extra=self.log_extra, ) # None will force skip of audit return None except exceptions.NotAuthorized as e: LOG.info( - "get subcloud_resources {}: subcloud {} not authorized" - "[{}]".format(resource_type, self.region_name, str(e)), + "get subcloud_resources {}: subcloud {} not authorized [{}]".format( + resource_type, self.region_name, str(e) + ), extra=self.log_extra, ) OpenStackDriver.delete_region_clients(self.region_name) diff --git a/distributedcloud/dcorch/engine/sync_thread.py b/distributedcloud/dcorch/engine/sync_thread.py index c9f9a866b..621716988 100644 --- a/distributedcloud/dcorch/engine/sync_thread.py +++ b/distributedcloud/dcorch/engine/sync_thread.py @@ -392,7 +392,7 @@ class SyncThread(object): # Early exit in case there are no pending sync requests if not sync_requests: LOG.debug( - "Sync resources done for subcloud - " "no sync requests", + "Sync resources done for subcloud - no sync requests", extra=self.log_extra, ) self.set_sync_status(dccommon_consts.SYNC_STATUS_IN_SYNC) @@ -432,13 +432,13 @@ class SyncThread(object): if not actual_sync_requests: LOG.info( - "Sync resources done for subcloud - " "no valid sync requests", + "Sync resources done for subcloud - no valid sync requests", extra=self.log_extra, ) return elif not self.is_subcloud_enabled(): LOG.info( - "Sync resources done for subcloud - " "subcloud is disabled", + "Sync resources done for subcloud - subcloud is disabled", extra=self.log_extra, ) return diff --git a/distributedcloud/dcorch/objects/orchjob.py b/distributedcloud/dcorch/objects/orchjob.py index 8a4a28e67..c69d6bf77 100644 --- a/distributedcloud/dcorch/objects/orchjob.py +++ b/distributedcloud/dcorch/objects/orchjob.py @@ -50,7 +50,7 @@ class OrchJob(base.OrchestratorObject, base.VersionedObjectDictCompat): except KeyError: raise exceptions.ObjectActionError( action="create", - reason="cannot create a Subcloud object without a " "resource_id set", + reason="cannot create a Subcloud object without a resource_id set", ) updates = self.obj_get_changes() @@ -59,7 +59,7 @@ class OrchJob(base.OrchestratorObject, base.VersionedObjectDictCompat): except KeyError: raise exceptions.ObjectActionError( action="create", - reason="cannot create a Subcloud object without a " "endpoint_type set", + reason="cannot create a Subcloud object without a endpoint_type set", ) updates = self.obj_get_changes() @@ -68,8 +68,7 @@ class OrchJob(base.OrchestratorObject, base.VersionedObjectDictCompat): except KeyError: raise exceptions.ObjectActionError( action="create", - reason="cannot create a Subcloud object without a " - "operation_type set", + reason="cannot create a Subcloud object without a operation_type set", ) db_orch_job = db_api.orch_job_create( diff --git a/distributedcloud/dcorch/objects/orchrequest.py b/distributedcloud/dcorch/objects/orchrequest.py index 9dae3321b..4c4073523 100644 --- a/distributedcloud/dcorch/objects/orchrequest.py +++ b/distributedcloud/dcorch/objects/orchrequest.py @@ -54,7 +54,7 @@ class OrchRequest(base.OrchestratorObject, base.VersionedObjectDictCompat): except KeyError: raise exceptions.ObjectActionError( action="create", - reason="cannot create a Subcloud object without a " "orch_job_id set", + reason="cannot create a Subcloud object without a orch_job_id set", ) updates = self.obj_get_changes() @@ -63,8 +63,9 @@ class OrchRequest(base.OrchestratorObject, base.VersionedObjectDictCompat): except KeyError: raise exceptions.ObjectActionError( action="create", - reason="cannot create a Subcloud object without a " - "target_region_name set", + reason=( + "cannot create a Subcloud object without a target_region_name set" + ), ) db_orch_request = db_api.orch_request_create( diff --git a/distributedcloud/dcorch/objects/resource.py b/distributedcloud/dcorch/objects/resource.py index 428501e94..ae1792cc9 100644 --- a/distributedcloud/dcorch/objects/resource.py +++ b/distributedcloud/dcorch/objects/resource.py @@ -46,7 +46,7 @@ class Resource(base.OrchestratorObject, base.VersionedObjectDictCompat): except KeyError: raise exceptions.ObjectActionError( action="create", - reason="cannot create a Resource object without a " "resource_type set", + reason="cannot create a Resource object without a resource_type set", ) db_resource = db_api.resource_create(self._context, resource_type, updates) diff --git a/distributedcloud/dcorch/objects/subcloud.py b/distributedcloud/dcorch/objects/subcloud.py index 4906cb4a6..20c8a7ef1 100644 --- a/distributedcloud/dcorch/objects/subcloud.py +++ b/distributedcloud/dcorch/objects/subcloud.py @@ -56,7 +56,7 @@ class Subcloud(base.OrchestratorObject, base.VersionedObjectDictCompat): except KeyError: raise exceptions.ObjectActionError( action="create", - reason="cannot create a Subcloud object without a " "region_name set", + reason="cannot create a Subcloud object without a region_name set", ) try: db_subcloud = db_api.subcloud_create(self._context, region_name, updates) diff --git a/distributedcloud/run_black.py b/distributedcloud/run_black.py index 1d20a2af2..d67d64cef 100644 --- a/distributedcloud/run_black.py +++ b/distributedcloud/run_black.py @@ -9,12 +9,6 @@ modules = [ "dcdbsync", "dcagent", "dcorch", - "dcmanager/api", - "dcmanager/audit", - "dcmanager/common", - "dcmanager/db", - "dcmanager/orchestrator", - "dcmanager/tests", "dcmanager", ] @@ -22,14 +16,9 @@ modules = [ formatted_modules = [ "dccommon", "dcdbsync", - "dcorch", "dcagent", - "dcmanager/api", - "dcmanager/audit", - "dcmanager/common", - "dcmanager/db", - "dcmanager/orchestrator", - "dcmanager/tests", + "dcorch", + "dcmanager", ] diff --git a/distributedcloud/setup.py b/distributedcloud/setup.py index 5f9bfbd6f..ac71c5a90 100644 --- a/distributedcloud/setup.py +++ b/distributedcloud/setup.py @@ -1,4 +1,5 @@ # Copyright (c) 2013 Hewlett-Packard Development Company, L.P. +# Copyright (c) 2024 Wind River Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,6 +26,4 @@ except ImportError: pass # Danger - pbr requirement >= 2.0.0 not satisfied... -setuptools.setup( - setup_requires=['pbr>=1.8.0'], - pbr=True) +setuptools.setup(setup_requires=["pbr>=1.8.0"], pbr=True)