Merge "Fix DC geo-redundancy systemcontroller_gateway_address syncing"
This commit is contained in:
commit
59924988cd
@ -10,6 +10,7 @@ from requests_toolbelt import MultipartEncoder
|
||||
from dccommon import consts
|
||||
from dccommon.drivers import base
|
||||
from dccommon import exceptions
|
||||
from dcmanager.db.sqlalchemy import models
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
@ -27,6 +28,7 @@ class DcmanagerClient(base.DriverBase):
|
||||
timeout=DCMANAGER_CLIENT_REST_DEFAULT_TIMEOUT,
|
||||
endpoint_type=consts.KS_ENDPOINT_PUBLIC,
|
||||
endpoint=None,
|
||||
peer: models.SystemPeer = None,
|
||||
):
|
||||
if endpoint is None:
|
||||
endpoint = session.get_endpoint(
|
||||
@ -35,6 +37,7 @@ class DcmanagerClient(base.DriverBase):
|
||||
self.endpoint = endpoint
|
||||
self.timeout = timeout
|
||||
self.session = session
|
||||
self.peer = peer
|
||||
|
||||
def get_system_peer(self, system_peer_uuid):
|
||||
"""Get system peer."""
|
||||
|
@ -1076,8 +1076,11 @@ class SubcloudsController(object):
|
||||
systemcontroller_gateway_address.split(",")[0]
|
||||
!= subcloud.systemcontroller_gateway_ip
|
||||
):
|
||||
# Pass bootstrap_values_dict for patch operations where these
|
||||
# values aren't in payload, unlike subcloud add where they are.
|
||||
# Function needs management/admin_subnet for validation
|
||||
psd_common.validate_systemcontroller_gateway_address(
|
||||
systemcontroller_gateway_address, payload
|
||||
systemcontroller_gateway_address, bootstrap_values_dict
|
||||
)
|
||||
|
||||
management_state = payload.get("management-state")
|
||||
|
@ -541,27 +541,27 @@ def system_peer_create(
|
||||
)
|
||||
|
||||
|
||||
def system_peer_get(context, peer_id):
|
||||
def system_peer_get(context, peer_id) -> models.SystemPeer:
|
||||
"""Retrieve a system_peer or raise if it does not exist."""
|
||||
return IMPL.system_peer_get(context, peer_id)
|
||||
|
||||
|
||||
def system_peer_get_by_uuid(context, uuid):
|
||||
def system_peer_get_by_uuid(context, uuid) -> models.SystemPeer:
|
||||
"""Retrieve a system_peer by uuid or raise if it does not exist."""
|
||||
return IMPL.system_peer_get_by_uuid(context, uuid)
|
||||
|
||||
|
||||
def system_peer_get_by_name(context, uuid):
|
||||
def system_peer_get_by_name(context, uuid) -> models.SystemPeer:
|
||||
"""Retrieve a system_peer by name or raise if it does not exist."""
|
||||
return IMPL.system_peer_get_by_name(context, uuid)
|
||||
|
||||
|
||||
def system_peer_get_all(context):
|
||||
def system_peer_get_all(context) -> list[models.SystemPeer]:
|
||||
"""Retrieve all system peers."""
|
||||
return IMPL.system_peer_get_all(context)
|
||||
|
||||
|
||||
def peer_group_get_for_system_peer(context, peer_id):
|
||||
def peer_group_get_for_system_peer(context, peer_id) -> list[models.SubcloudPeerGroup]:
|
||||
"""Get subcloud peer groups associated with a system peer."""
|
||||
return IMPL.peer_group_get_for_system_peer(context, peer_id)
|
||||
|
||||
@ -656,22 +656,24 @@ def subcloud_peer_group_destroy(context, group_id):
|
||||
return IMPL.subcloud_peer_group_destroy(context, group_id)
|
||||
|
||||
|
||||
def subcloud_peer_group_get(context, group_id):
|
||||
def subcloud_peer_group_get(context, group_id) -> models.SubcloudPeerGroup:
|
||||
"""Retrieve a subcloud_peer_group or raise if it does not exist."""
|
||||
return IMPL.subcloud_peer_group_get(context, group_id)
|
||||
|
||||
|
||||
def subcloud_peer_group_get_by_name(context, name):
|
||||
def subcloud_peer_group_get_by_name(context, name) -> models.SubcloudPeerGroup:
|
||||
"""Retrieve a subcloud_peer_group by name or raise if it does not exist."""
|
||||
return IMPL.subcloud_peer_group_get_by_name(context, name)
|
||||
|
||||
|
||||
def subcloud_peer_group_get_by_leader_id(context, system_leader_id):
|
||||
def subcloud_peer_group_get_by_leader_id(
|
||||
context, system_leader_id
|
||||
) -> list[models.SubcloudPeerGroup]:
|
||||
"""Retrieve subcloud peer groups by system_leader_id."""
|
||||
return IMPL.subcloud_peer_group_get_by_leader_id(context, system_leader_id)
|
||||
|
||||
|
||||
def subcloud_get_for_peer_group(context, group_id):
|
||||
def subcloud_get_for_peer_group(context, group_id) -> list[models.Subcloud]:
|
||||
"""Retrieve all subclouds belonging to a subcloud_peer_group
|
||||
|
||||
or raise if it does not exist.
|
||||
@ -679,7 +681,7 @@ def subcloud_get_for_peer_group(context, group_id):
|
||||
return IMPL.subcloud_get_for_peer_group(context, group_id)
|
||||
|
||||
|
||||
def subcloud_peer_group_get_all(context):
|
||||
def subcloud_peer_group_get_all(context) -> list[models.SubcloudPeerGroup]:
|
||||
"""Retrieve all subcloud peer groups."""
|
||||
return IMPL.subcloud_peer_group_get_all(context)
|
||||
|
||||
@ -765,31 +767,35 @@ def peer_group_association_destroy(context, id):
|
||||
return IMPL.peer_group_association_destroy(context, id)
|
||||
|
||||
|
||||
def peer_group_association_get(context, id):
|
||||
def peer_group_association_get(context, id) -> models.PeerGroupAssociation:
|
||||
"""Retrieve a peer_group_association or raise if it does not exist."""
|
||||
return IMPL.peer_group_association_get(context, id)
|
||||
|
||||
|
||||
def peer_group_association_get_all(context):
|
||||
def peer_group_association_get_all(context) -> list[models.PeerGroupAssociation]:
|
||||
"""Retrieve all peer_group_associations."""
|
||||
return IMPL.peer_group_association_get_all(context)
|
||||
|
||||
|
||||
def peer_group_association_get_by_peer_group_and_system_peer_id(
|
||||
context, peer_group_id, system_peer_id
|
||||
):
|
||||
) -> list[models.PeerGroupAssociation]:
|
||||
"""Get peer group associations by peer_group_id and system_peer_id."""
|
||||
return IMPL.peer_group_association_get_by_peer_group_and_system_peer_id(
|
||||
context, peer_group_id, system_peer_id
|
||||
)
|
||||
|
||||
|
||||
def peer_group_association_get_by_peer_group_id(context, peer_group_id):
|
||||
def peer_group_association_get_by_peer_group_id(
|
||||
context, peer_group_id
|
||||
) -> list[models.PeerGroupAssociation]:
|
||||
"""Get the peer_group_association list by peer_group_id"""
|
||||
return IMPL.peer_group_association_get_by_peer_group_id(context, peer_group_id)
|
||||
|
||||
|
||||
def peer_group_association_get_by_system_peer_id(context, system_peer_id):
|
||||
def peer_group_association_get_by_system_peer_id(
|
||||
context, system_peer_id
|
||||
) -> list[models.PeerGroupAssociation]:
|
||||
"""Get the peer_group_association list by system_peer_id"""
|
||||
return IMPL.peer_group_association_get_by_system_peer_id(context, system_peer_id)
|
||||
|
||||
|
@ -1647,7 +1647,7 @@ def peer_group_association_destroy(context, association_id):
|
||||
|
||||
|
||||
@require_context
|
||||
def peer_group_association_get(context, association_id):
|
||||
def peer_group_association_get(context, association_id) -> models.PeerGroupAssociation:
|
||||
try:
|
||||
result = (
|
||||
model_query(context, models.PeerGroupAssociation)
|
||||
@ -1666,7 +1666,7 @@ def peer_group_association_get(context, association_id):
|
||||
|
||||
|
||||
@require_context
|
||||
def peer_group_association_get_all(context):
|
||||
def peer_group_association_get_all(context) -> list[models.PeerGroupAssociation]:
|
||||
result = (
|
||||
model_query(context, models.PeerGroupAssociation)
|
||||
.filter_by(deleted=0)
|
||||
@ -1682,7 +1682,7 @@ def peer_group_association_get_all(context):
|
||||
@require_context
|
||||
def peer_group_association_get_by_peer_group_and_system_peer_id(
|
||||
context, peer_group_id, system_peer_id
|
||||
):
|
||||
) -> models.PeerGroupAssociation:
|
||||
try:
|
||||
result = (
|
||||
model_query(context, models.PeerGroupAssociation)
|
||||
@ -1705,7 +1705,9 @@ def peer_group_association_get_by_peer_group_and_system_peer_id(
|
||||
|
||||
|
||||
@require_context
|
||||
def peer_group_association_get_by_peer_group_id(context, peer_group_id):
|
||||
def peer_group_association_get_by_peer_group_id(
|
||||
context, peer_group_id
|
||||
) -> models.PeerGroupAssociation:
|
||||
result = (
|
||||
model_query(context, models.PeerGroupAssociation)
|
||||
.filter_by(deleted=0)
|
||||
@ -1718,7 +1720,9 @@ def peer_group_association_get_by_peer_group_id(context, peer_group_id):
|
||||
|
||||
|
||||
@require_context
|
||||
def peer_group_association_get_by_system_peer_id(context, system_peer_id):
|
||||
def peer_group_association_get_by_system_peer_id(
|
||||
context, system_peer_id
|
||||
) -> models.PeerGroupAssociation:
|
||||
result = (
|
||||
model_query(context, models.PeerGroupAssociation)
|
||||
.filter_by(deleted=0)
|
||||
|
@ -3,8 +3,10 @@
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import threading
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fm_api import constants as fm_const
|
||||
from fm_api import fm_api
|
||||
@ -12,14 +14,20 @@ from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
from dccommon import consts as dccommon_consts
|
||||
from dccommon.drivers.openstack.dcmanager_v1 import DcmanagerClient
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common import context
|
||||
from dcmanager.common.i18n import _
|
||||
from dcmanager.common import manager
|
||||
from dcmanager.common import utils
|
||||
from dcmanager.db import api as db_api
|
||||
from dcmanager.db.sqlalchemy import models
|
||||
from dcmanager.manager.system_peer_manager import SystemPeerManager
|
||||
|
||||
# Use TYPE_CHECKING to avoid circular import
|
||||
if TYPE_CHECKING:
|
||||
from dcmanager.manager.subcloud_manager import SubcloudManager
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@ -27,7 +35,9 @@ LOG = logging.getLogger(__name__)
|
||||
class PeerGroupAuditManager(manager.Manager):
|
||||
"""Manages audit related tasks."""
|
||||
|
||||
def __init__(self, subcloud_manager, peer_group_id, *args, **kwargs):
|
||||
def __init__(
|
||||
self, subcloud_manager: SubcloudManager, peer_group_id: int, *args, **kwargs
|
||||
):
|
||||
LOG.debug(_("PeerGroupAuditManager initialization..."))
|
||||
super().__init__(service_name="peer_group_audit_manager", *args, **kwargs)
|
||||
self.context = context.get_admin_context()
|
||||
@ -52,8 +62,8 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
|
||||
@staticmethod
|
||||
def _get_association_sync_status_from_peer_site(
|
||||
dc_client, system_peer, peer_group_id
|
||||
):
|
||||
dc_client: DcmanagerClient, system_peer: models.SystemPeer, peer_group_id: int
|
||||
) -> str:
|
||||
try:
|
||||
# Get peer site system peer
|
||||
dc_peer_system_peer = dc_client.get_system_peer(
|
||||
@ -81,8 +91,11 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
)
|
||||
|
||||
def _get_local_subclouds_to_update_and_delete(
|
||||
self, local_peer_group, remote_subclouds, remote_sync_status
|
||||
):
|
||||
self,
|
||||
local_peer_group: models.SubcloudPeerGroup,
|
||||
remote_subclouds: list[dict],
|
||||
remote_sync_status: str,
|
||||
) -> tuple[list[models.Subcloud], list[models.Subcloud], bool]:
|
||||
local_subclouds_to_update = list()
|
||||
local_subclouds_to_delete = list()
|
||||
any_rehome_failed = False
|
||||
@ -112,8 +125,13 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
# indicating any bootstrap values/address updates to
|
||||
# the subcloud on the remote site.
|
||||
if remote_sync_status == consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC:
|
||||
LOG.info(
|
||||
"Peer association is out-of-sync, syncing rehome "
|
||||
f"data of subcloud '{local_subcloud.name}' from "
|
||||
"peer to current site"
|
||||
)
|
||||
self._sync_rehome_data(
|
||||
local_subcloud.id, remote_subcloud.get("rehome_data")
|
||||
local_subcloud, remote_subcloud.get("rehome_data")
|
||||
)
|
||||
elif remote_subcloud.get("deploy-status") in (
|
||||
consts.DEPLOY_STATE_REHOME_FAILED,
|
||||
@ -134,7 +152,7 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
|
||||
return local_subclouds_to_update, local_subclouds_to_delete, any_rehome_failed
|
||||
|
||||
def _set_local_subcloud_to_secondary(self, subcloud):
|
||||
def _set_local_subcloud_to_secondary(self, subcloud: models.Subcloud) -> None:
|
||||
try:
|
||||
LOG.info("Set local subcloud %s to secondary" % subcloud.name)
|
||||
# There will be an exception when unmanage
|
||||
@ -155,18 +173,42 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
)
|
||||
raise e
|
||||
|
||||
def _sync_rehome_data(self, subcloud_id, rehome_data):
|
||||
db_api.subcloud_update(self.context, subcloud_id, rehome_data=rehome_data)
|
||||
def _sync_rehome_data(self, subcloud: models.Subcloud, rehome_data: str) -> None:
|
||||
try:
|
||||
remote_rehome_data = json.loads(rehome_data)
|
||||
local_rehome_data = json.loads(subcloud.rehome_data)
|
||||
|
||||
def audit(self, system_peer, remote_peer_group, local_peer_group):
|
||||
# The systemcontroller_gateway_address can't be synced from the
|
||||
# peer to the local site as it's specific to each site
|
||||
remote_rehome_data["saved_payload"]["systemcontroller_gateway_address"] = (
|
||||
local_rehome_data["saved_payload"]["systemcontroller_gateway_address"]
|
||||
)
|
||||
new_rehome_data = json.dumps(remote_rehome_data)
|
||||
|
||||
db_api.subcloud_update(
|
||||
self.context, subcloud.id, rehome_data=new_rehome_data
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.error(
|
||||
"Unable to sync rehome data of subcloud "
|
||||
f"'{subcloud.name}' from peer to current site: {str(e)}"
|
||||
)
|
||||
raise
|
||||
|
||||
def audit(
|
||||
self,
|
||||
system_peer: models.SystemPeer,
|
||||
remote_peer_group: dict,
|
||||
local_peer_group: models.SubcloudPeerGroup,
|
||||
) -> None:
|
||||
if local_peer_group.migration_status == consts.PEER_GROUP_MIGRATING:
|
||||
LOG.info("Local peer group in migrating state, quit audit")
|
||||
return
|
||||
|
||||
LOG.info(
|
||||
"Auditing remote subcloud peer group:[%s] migration_status:[%s] "
|
||||
"group_priority[%s], local subcloud peer group:[%s] "
|
||||
"migration_status:[%s] group_priority[%s]"
|
||||
"Auditing remote subcloud peer group: [%s], migration_status: [%s], "
|
||||
"group_priority: [%s], local subcloud peer group: [%s], "
|
||||
"migration_status: [%s], group_priority: [%s]"
|
||||
% (
|
||||
remote_peer_group.get("peer_group_name"),
|
||||
remote_peer_group.get("migration_status"),
|
||||
@ -277,8 +319,8 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
)
|
||||
LOG.exception(
|
||||
f"Failed to delete local subcloud [{subcloud.name}] that does "
|
||||
"not exist under the same subcloud_peer_group on peer site, "
|
||||
f"err: {e}"
|
||||
"not exist under the same subcloud_peer_group on peer site "
|
||||
f"{system_peer.peer_name}, err: {e}"
|
||||
)
|
||||
raise e
|
||||
|
||||
@ -313,7 +355,12 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
# If remote peer group migration_status is 'None'
|
||||
self.require_audit_flag = False
|
||||
|
||||
def _clear_or_raise_alarm(self, system_peer, local_peer_group, remote_peer_group):
|
||||
def _clear_or_raise_alarm(
|
||||
self,
|
||||
system_peer: models.SystemPeer,
|
||||
local_peer_group: models.SubcloudPeerGroup,
|
||||
remote_peer_group: dict,
|
||||
) -> None:
|
||||
# If local subcloud peer group's group_priority is
|
||||
# lower than remote subcloud peer group's group_priority,
|
||||
# an alarm will be raised.
|
||||
@ -325,7 +372,7 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
if local_peer_group.group_priority < remote_peer_group.get("group_priority"):
|
||||
LOG.warning(
|
||||
f"Alarm: local subcloud peer group [{local_peer_group.peer_group_name}]"
|
||||
f" is managed by remote system [{system_peer.peer_name}]"
|
||||
f" is managed by remote system peer [{system_peer.peer_name}]"
|
||||
)
|
||||
try:
|
||||
fault = fm_api.Fault(
|
||||
@ -336,7 +383,7 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
|
||||
reason_text=(
|
||||
"Subcloud peer group (peer_group_name=%s) is managed by "
|
||||
"remote system (peer_uuid=%s) with a lower priority."
|
||||
"remote system peer (peer_uuid=%s) with a lower priority."
|
||||
% (local_peer_group.peer_group_name, system_peer.peer_uuid)
|
||||
),
|
||||
alarm_type=fm_const.FM_ALARM_TYPE_0,
|
||||
@ -375,12 +422,12 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
try:
|
||||
self.audit(system_peer, remote_peer_group, local_peer_group)
|
||||
except Exception as e:
|
||||
LOG.exception("audit error occured: %s" % e)
|
||||
LOG.exception("audit error occurred: %s" % e)
|
||||
|
||||
def stop(self):
|
||||
if self.thread:
|
||||
self.thread.join()
|
||||
LOG.info(f"stopped peer group {self.peer_group_id} audit thread")
|
||||
LOG.info(f"Stopped peer group {self.peer_group_id} audit thread")
|
||||
else:
|
||||
LOG.info(f"No peer group {self.peer_group_id} audit thread to stop")
|
||||
|
||||
@ -402,12 +449,14 @@ class PeerGroupAuditManager(manager.Manager):
|
||||
):
|
||||
LOG.info(
|
||||
f"Audit peer group [{local_peer_group.peer_group_name}] "
|
||||
f"with remote system {system_peer.peer_name}"
|
||||
f"with remote system peer {system_peer.peer_name}"
|
||||
)
|
||||
self.start(system_peer, remote_peer_group, local_peer_group)
|
||||
|
||||
@staticmethod
|
||||
def send_audit_peer_group(system_peers, peer_group):
|
||||
def send_audit_peer_group(
|
||||
system_peers: list[models.SystemPeer], peer_group: models.SubcloudPeerGroup
|
||||
):
|
||||
if not system_peers:
|
||||
return
|
||||
local_system = utils.get_local_system()
|
||||
|
@ -9,6 +9,7 @@ import threading
|
||||
|
||||
from fm_api import constants as fm_const
|
||||
from fm_api import fm_api
|
||||
from keystoneauth1 import exceptions as ks_exceptions
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
@ -16,15 +17,58 @@ from dcmanager.common import consts
|
||||
from dcmanager.common import context
|
||||
from dcmanager.common import manager
|
||||
from dcmanager.db import api as db_api
|
||||
from dcmanager.db.sqlalchemy import models
|
||||
from dcmanager.manager import peer_group_audit_manager as pgam
|
||||
from dcmanager.manager.subcloud_manager import SubcloudManager
|
||||
from dcmanager.manager.system_peer_manager import SystemPeerManager
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
# Upon detecting that the secondary site is REACHABLE again, the PGA sync_status
|
||||
# will be set for BOTH sites by the primary site monitor thread as follows:
|
||||
SECONDARY_SITE_REACHABLE_TRANSITIONS = {
|
||||
# The UNKNOWN and FAILED are set when the secondary site becomes unreachable
|
||||
consts.ASSOCIATION_SYNC_STATUS_UNKNOWN: consts.ASSOCIATION_SYNC_STATUS_IN_SYNC,
|
||||
consts.ASSOCIATION_SYNC_STATUS_FAILED: consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC,
|
||||
# The SYNCING and IN_SYNC status can happen when first creating the peer and the
|
||||
# association. The association creation will set the status to SYNCING/IN_SYNC,
|
||||
# and the monitor thread might detect the peer availability after that, so we
|
||||
# don't modify the status
|
||||
consts.ASSOCIATION_SYNC_STATUS_IN_SYNC: consts.ASSOCIATION_SYNC_STATUS_IN_SYNC,
|
||||
consts.ASSOCIATION_SYNC_STATUS_SYNCING: consts.ASSOCIATION_SYNC_STATUS_SYNCING,
|
||||
# A similar situation can happen with OUT_OF_SYNC, if the association/peer group
|
||||
# is updated before the monitor thread runs its first check, so we don't modify
|
||||
# the status
|
||||
consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC: (
|
||||
consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC
|
||||
),
|
||||
}
|
||||
|
||||
# Upon detecting that the secondary site is UNREACHABLE, the PGA sync_status
|
||||
# will be set for the PIRMARY SITE ONLY by the primary site monitor thread as follows:
|
||||
SECONDARY_SITE_UNREACHABLE_TRANSITIONS = {
|
||||
# After the secondary site becomes unreachable, the only valid sync statuses
|
||||
# are UNKNOWN or FAILED
|
||||
consts.ASSOCIATION_SYNC_STATUS_UNKNOWN: consts.ASSOCIATION_SYNC_STATUS_UNKNOWN,
|
||||
consts.ASSOCIATION_SYNC_STATUS_FAILED: consts.ASSOCIATION_SYNC_STATUS_FAILED,
|
||||
consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC: consts.ASSOCIATION_SYNC_STATUS_FAILED,
|
||||
consts.ASSOCIATION_SYNC_STATUS_IN_SYNC: consts.ASSOCIATION_SYNC_STATUS_UNKNOWN,
|
||||
consts.ASSOCIATION_SYNC_STATUS_SYNCING: consts.ASSOCIATION_SYNC_STATUS_FAILED,
|
||||
}
|
||||
|
||||
|
||||
class PeerMonitor(object):
|
||||
def __init__(self, peer, context, subcloud_manager):
|
||||
"""Monitors and manages the connection status of a single DC system peer
|
||||
|
||||
The monitor runs in a separate thread and periodically checks the peer's
|
||||
availability using heartbeat mechanisms. It handles both failure and recovery
|
||||
scenarios, updating system states and managing fault alarms accordingly.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, peer: models.SystemPeer, context, subcloud_manager: SubcloudManager
|
||||
):
|
||||
self.peer = peer
|
||||
self.thread = None
|
||||
self.exit_flag = threading.Event()
|
||||
@ -32,21 +76,22 @@ class PeerMonitor(object):
|
||||
self.context = context
|
||||
self.subcloud_manager = subcloud_manager
|
||||
self.peer_group_id_set = set()
|
||||
self.failure_count = 0
|
||||
# key: peer_group_id
|
||||
# value: PeerGroupAuditManager object
|
||||
self.peer_group_audit_obj_map = dict()
|
||||
self.peer_group_audit_obj_map: dict[int, pgam.PeerGroupAuditManager] = dict()
|
||||
|
||||
def _clear_failure(self):
|
||||
alarm_id = fm_const.FM_ALARM_ID_DC_SYSTEM_PEER_HEARTBEAT_FAILED
|
||||
entity_instance_id = "peer=%s" % self.peer.peer_uuid
|
||||
entity_instance_id = f"peer={self.peer.peer_uuid}"
|
||||
try:
|
||||
fault = self.fm_api.get_fault(alarm_id, entity_instance_id)
|
||||
if fault:
|
||||
self.fm_api.clear_fault(alarm_id, entity_instance_id)
|
||||
except Exception as e:
|
||||
LOG.exception(
|
||||
"Problem clearing fault for peer %s, alarm_id=%s error: %s"
|
||||
% (self.peer.peer_uuid, alarm_id, e)
|
||||
f"Problem clearing fault for peer {self.peer.peer_name}, "
|
||||
f"alarm_id={alarm_id} error: {str(e)}"
|
||||
)
|
||||
|
||||
def _raise_failure(self):
|
||||
@ -114,124 +159,165 @@ class PeerMonitor(object):
|
||||
|
||||
if not dc_peer_subcloud_peer_group_list:
|
||||
LOG.warning(
|
||||
"Resource subcloud peer group of dc:%s not found"
|
||||
% self.peer.manager_endpoint
|
||||
f"No subcloud peer groups found for DC peer: {self.peer.peer_name} "
|
||||
f"(endpoint: {self.peer.manager_endpoint})"
|
||||
)
|
||||
|
||||
except (ks_exceptions.ConnectFailure, ks_exceptions.ConnectTimeout) as e:
|
||||
# Use warning level for common connection failures to reduce log spam
|
||||
LOG.warning(f"Failed to access DC peer {self.peer.peer_name}: {str(e)}")
|
||||
except Exception:
|
||||
LOG.exception("Failed to access the dc: %s" % self.peer.peer_name)
|
||||
LOG.exception(
|
||||
f"Unexpected error while accessing DC peer {self.peer.peer_name}"
|
||||
)
|
||||
|
||||
return failed, dc_peer_subcloud_peer_group_list
|
||||
|
||||
def _update_sync_status_secondary_site_becomes_unreachable(self):
|
||||
def _update_sync_status_secondary_site_becomes_unreachable(self) -> None:
|
||||
"""Update sync status on local site when secondary site becomes unreachable."""
|
||||
# Get associations by system peer
|
||||
associations = SystemPeerManager.get_local_associations(self.context, self.peer)
|
||||
for association in associations:
|
||||
# If the association is not primary, skip it.
|
||||
if association.association_type == consts.ASSOCIATION_TYPE_NON_PRIMARY:
|
||||
LOG.debug(
|
||||
"Skip update the Association sync_status as it is not primary."
|
||||
LOG.info(
|
||||
f"Skip updating local association (id={association.id}) "
|
||||
"sync_status as it's not primary"
|
||||
)
|
||||
continue
|
||||
# If the secondary site is down, set the association sync status
|
||||
# "in-sync" -> "unknown"
|
||||
# "unknown" -> "unknown"
|
||||
# "out-of-sync" -> "failed"
|
||||
# "syncing" -> "failed"
|
||||
# "failed" -> "failed"
|
||||
sync_status = consts.ASSOCIATION_SYNC_STATUS_UNKNOWN
|
||||
message = f"Peer site ({self.peer.peer_name}) is unreachable."
|
||||
if association.sync_status not in [
|
||||
consts.ASSOCIATION_SYNC_STATUS_IN_SYNC,
|
||||
consts.ASSOCIATION_SYNC_STATUS_UNKNOWN,
|
||||
]:
|
||||
sync_status = consts.ASSOCIATION_SYNC_STATUS_FAILED
|
||||
|
||||
try:
|
||||
new_sync_status = SECONDARY_SITE_UNREACHABLE_TRANSITIONS[
|
||||
association.sync_status
|
||||
]
|
||||
except KeyError:
|
||||
# This should never happen
|
||||
LOG.error(
|
||||
f"Unexpected sync status on association id={association.id}: "
|
||||
f"{association.sync_status}. Updating the sync status to "
|
||||
f"{consts.ASSOCIATION_SYNC_STATUS_FAILED}"
|
||||
)
|
||||
new_sync_status = consts.ASSOCIATION_SYNC_STATUS_FAILED
|
||||
|
||||
LOG.info(
|
||||
f"Updating local association (id={association.id}) sync_status "
|
||||
f"from {association.sync_status} to {new_sync_status} due "
|
||||
"to secondary site becoming unreachable"
|
||||
)
|
||||
|
||||
db_api.peer_group_association_update(
|
||||
self.context,
|
||||
association.id,
|
||||
sync_status=sync_status,
|
||||
sync_message=message,
|
||||
sync_status=new_sync_status,
|
||||
sync_message=f"Peer site ({self.peer.peer_name}) is unreachable.",
|
||||
)
|
||||
|
||||
def _update_sync_status_secondary_site_becomes_reachable(self):
|
||||
def _update_sync_status_secondary_site_becomes_reachable(self) -> None:
|
||||
"""Update sync status on both sites when secondary site becomes reachable."""
|
||||
# Get associations by system peer
|
||||
associations = SystemPeerManager.get_local_associations(self.context, self.peer)
|
||||
for association in associations:
|
||||
# If the association is not primary, skip it.
|
||||
if association.association_type == consts.ASSOCIATION_TYPE_NON_PRIMARY:
|
||||
LOG.debug(
|
||||
"Skip update Peer Site Association sync_status as "
|
||||
"current site Association is not primary."
|
||||
LOG.info(
|
||||
"Skip updating association sync_status on both sites as "
|
||||
"current site association is not primary."
|
||||
)
|
||||
continue
|
||||
# Upon detecting that the secondary site is reachable again,
|
||||
# the PGA sync_status will be set for both sites by the primary
|
||||
# site monitor thread as follows:
|
||||
# "unknown" -> "in-sync"
|
||||
# "failed" -> "out-of-sync"
|
||||
sync_status = consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC
|
||||
if association.sync_status == consts.ASSOCIATION_SYNC_STATUS_UNKNOWN:
|
||||
sync_status = consts.ASSOCIATION_SYNC_STATUS_IN_SYNC
|
||||
|
||||
try:
|
||||
new_sync_status = SECONDARY_SITE_REACHABLE_TRANSITIONS[
|
||||
association.sync_status
|
||||
]
|
||||
except KeyError:
|
||||
# This should never happen
|
||||
LOG.error(
|
||||
f"Unexpected sync status on association id={association.id}: "
|
||||
f"{association.sync_status}. Updating the sync status to "
|
||||
f"{consts.ASSOCIATION_SYNC_STATUS_FAILED}"
|
||||
)
|
||||
new_sync_status = consts.ASSOCIATION_SYNC_STATUS_FAILED
|
||||
|
||||
dc_local_pg = db_api.subcloud_peer_group_get(
|
||||
self.context, association.peer_group_id
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
f"Updating local association (id={association.id}) sync_status "
|
||||
f"from {association.sync_status} to {new_sync_status} due "
|
||||
"to secondary site becoming reachable"
|
||||
)
|
||||
|
||||
SystemPeerManager.update_sync_status(
|
||||
self.context,
|
||||
self.peer,
|
||||
sync_status,
|
||||
new_sync_status,
|
||||
dc_local_pg,
|
||||
association=association,
|
||||
)
|
||||
|
||||
def _do_monitor_peer(self):
|
||||
failure_count = 0
|
||||
LOG.info("Start monitoring thread for peer %s" % self.peer.peer_name)
|
||||
UNAVAILABLE_STATE = consts.SYSTEM_PEER_AVAILABILITY_STATE_UNAVAILABLE
|
||||
AVAILABLE_STATE = consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE
|
||||
# Do the actual peer monitor.
|
||||
def _update_peer_state(self, state: str) -> models.SystemPeer:
|
||||
"""Update peer availability state in database."""
|
||||
return db_api.system_peer_update(
|
||||
self.context, self.peer.id, availability_state=state
|
||||
)
|
||||
|
||||
def _handle_peer_failure(self) -> None:
|
||||
"""Handle peer failure by raising alarms and updating states."""
|
||||
self.failure_count += 1
|
||||
if self.failure_count >= self.peer.heartbeat_failure_threshold:
|
||||
LOG.warning(
|
||||
f"DC peer '{self.peer.peer_name}' heartbeat failed, raising alarm"
|
||||
)
|
||||
self._raise_failure()
|
||||
self._update_peer_state(consts.SYSTEM_PEER_AVAILABILITY_STATE_UNAVAILABLE)
|
||||
self._update_sync_status_secondary_site_becomes_unreachable()
|
||||
self.failure_count = 0
|
||||
self._set_require_audit_flag_to_associated_peer_groups()
|
||||
|
||||
def _handle_peer_recovery(self, remote_pg_list: list) -> None:
|
||||
"""Handle peer recovery by clearing alarms and restoring states."""
|
||||
self.failure_count = 0
|
||||
self._audit_local_peer_groups(remote_pg_list)
|
||||
if (
|
||||
self.peer.availability_state
|
||||
!= consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE
|
||||
):
|
||||
LOG.info(f"DC peer '{self.peer.peer_name}' is back online, clearing alarm")
|
||||
self._update_peer_state(consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE)
|
||||
self._update_sync_status_secondary_site_becomes_reachable()
|
||||
self._clear_failure()
|
||||
|
||||
def _check_peer_status(self) -> None:
|
||||
"""Perform a single peer status check and handle the result."""
|
||||
self.peer = db_api.system_peer_get(self.context, self.peer.id)
|
||||
failed, remote_pg_list = self._heartbeat_check_via_get_peer_group_list()
|
||||
|
||||
if failed:
|
||||
self._handle_peer_failure()
|
||||
else:
|
||||
self._handle_peer_recovery(remote_pg_list)
|
||||
|
||||
def _do_monitor_peer(self) -> None:
|
||||
"""Monitor peer connectivity and handle state changes."""
|
||||
LOG.info(f"Start monitoring thread for peer '{self.peer.peer_name}'")
|
||||
|
||||
# Run first check immediately
|
||||
try:
|
||||
self._check_peer_status()
|
||||
except Exception as e:
|
||||
LOG.exception(f"Initial check failed for peer '{self.peer.peer_name}': {e}")
|
||||
|
||||
# Continue with periodic checks
|
||||
while not self.exit_flag.wait(timeout=self.peer.heartbeat_interval):
|
||||
try:
|
||||
# Get system peer from DB
|
||||
self.peer = db_api.system_peer_get(self.context, self.peer.id)
|
||||
failed, remote_pg_list = self._heartbeat_check_via_get_peer_group_list()
|
||||
if failed:
|
||||
failure_count += 1
|
||||
if failure_count >= self.peer.heartbeat_failure_threshold:
|
||||
# heartbeat_failure_threshold reached.
|
||||
LOG.warning(
|
||||
"DC %s heartbeat failed, Raising alarm"
|
||||
% self.peer.peer_name
|
||||
)
|
||||
self._raise_failure()
|
||||
db_api.system_peer_update(
|
||||
self.context,
|
||||
self.peer.id,
|
||||
availability_state=UNAVAILABLE_STATE,
|
||||
)
|
||||
# pylint: disable=line-too-long
|
||||
self._update_sync_status_secondary_site_becomes_unreachable()
|
||||
failure_count = 0
|
||||
self._set_require_audit_flag_to_associated_peer_groups()
|
||||
else:
|
||||
failure_count = 0
|
||||
self._audit_local_peer_groups(remote_pg_list)
|
||||
if self.peer.availability_state != AVAILABLE_STATE:
|
||||
db_api.system_peer_update(
|
||||
self.context,
|
||||
self.peer.id,
|
||||
availability_state=AVAILABLE_STATE,
|
||||
)
|
||||
# pylint: disable=line-too-long
|
||||
self._update_sync_status_secondary_site_becomes_reachable()
|
||||
LOG.info("DC %s back online, clear alarm" % self.peer.peer_name)
|
||||
self._clear_failure()
|
||||
self._check_peer_status()
|
||||
except Exception as e:
|
||||
LOG.exception(
|
||||
"Got exception monitoring peer %s error: %s"
|
||||
% (self.peer.peer_name, e)
|
||||
f"Unexpected error monitoring peer '{self.peer.peer_name}': {e}"
|
||||
)
|
||||
LOG.info(
|
||||
"Caught graceful exit signal for peer monitor %s" % self.peer.peer_name
|
||||
)
|
||||
|
||||
LOG.info(f"Gracefully exiting monitor thread for peer '{self.peer.peer_name}'")
|
||||
|
||||
def _audit_local_peer_groups(self, remote_pg_list):
|
||||
# Generate a dict index by remote peer group name
|
||||
@ -266,7 +352,9 @@ class PeerMonitor(object):
|
||||
for pgam_obj in self.peer_group_audit_obj_map.values():
|
||||
pgam_obj.require_audit_flag = True
|
||||
|
||||
def audit_specific_local_peer_group(self, peer_group, remote_peer_group):
|
||||
def audit_specific_local_peer_group(
|
||||
self, peer_group: models.SubcloudPeerGroup, remote_peer_group
|
||||
):
|
||||
msg = None
|
||||
if peer_group.id in self.peer_group_audit_obj_map:
|
||||
pgam_obj = self.peer_group_audit_obj_map[peer_group.id]
|
||||
@ -324,17 +412,16 @@ class PeerMonitor(object):
|
||||
class PeerMonitorManager(manager.Manager):
|
||||
"""Manages tasks related to peer monitor."""
|
||||
|
||||
def __init__(self, subcloud_manager):
|
||||
def __init__(self, subcloud_manager: SubcloudManager):
|
||||
LOG.debug("PeerMonitorManager initialization...")
|
||||
|
||||
super(PeerMonitorManager, self).__init__(service_name="peer_monitor_manager")
|
||||
self.peer_monitor = dict()
|
||||
self.context = context.get_admin_context()
|
||||
self.subcloud_manager = subcloud_manager
|
||||
|
||||
# key: system_peer_id
|
||||
# value: PeerMonitor object
|
||||
self.peer_monitor_thread_map = dict()
|
||||
self.peer_monitor_thread_map: dict[int, PeerMonitor] = dict()
|
||||
|
||||
def _remove_peer_monitor_task(self, system_peer_id):
|
||||
peer_mon_obj = self.peer_monitor_thread_map[system_peer_id]
|
||||
|
@ -3674,6 +3674,11 @@ class SubcloudManager(manager.Manager):
|
||||
and systemcontroller_gateway_ip.split(",")[0]
|
||||
!= subcloud.systemcontroller_gateway_ip
|
||||
):
|
||||
LOG.info(
|
||||
f"The systemcontroller_gateway_ip for subcloud {subcloud.name} "
|
||||
f"was updated from {subcloud.systemcontroller_gateway_ip} to "
|
||||
f"{systemcontroller_gateway_ip.split(',')[0]}. Replacing routes..."
|
||||
)
|
||||
m_ks_client = OpenStackDriver(
|
||||
region_name=dccommon_consts.DEFAULT_REGION_NAME,
|
||||
region_clients=None,
|
||||
|
@ -9,6 +9,7 @@ from contextlib import nullcontext
|
||||
import functools
|
||||
import json
|
||||
import tempfile
|
||||
from typing import Optional
|
||||
|
||||
from eventlet import greenpool
|
||||
from oslo_log import log as logging
|
||||
@ -54,7 +55,9 @@ class SystemPeerManager(manager.Manager):
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_local_associations(ctx, peer, local_pg=None):
|
||||
def get_local_associations(
|
||||
ctx, peer: models.SystemPeer, local_pg: models.SubcloudPeerGroup = None
|
||||
) -> list[models.PeerGroupAssociation]:
|
||||
if local_pg is None:
|
||||
# Get associations by system peer id
|
||||
return db_api.peer_group_association_get_by_system_peer_id(ctx, peer.id)
|
||||
@ -93,8 +96,12 @@ class SystemPeerManager(manager.Manager):
|
||||
"""
|
||||
|
||||
def _update_association_on_peer_site(
|
||||
peer, sync_status, local_pg, remote_pg, message
|
||||
):
|
||||
peer: models.SystemPeer,
|
||||
sync_status: str,
|
||||
local_pg: models.SubcloudPeerGroup,
|
||||
remote_pg: dict,
|
||||
message: str,
|
||||
) -> tuple[str, str]:
|
||||
try:
|
||||
# Get peer site dcmanager client
|
||||
dc_client = SystemPeerManager.get_peer_dc_client(peer)
|
||||
@ -118,21 +125,21 @@ class SystemPeerManager(manager.Manager):
|
||||
|
||||
# Update peer site association sync_status only if the
|
||||
# sync_status is different from the current sync_status
|
||||
if dc_peer_association.get("sync_status") != sync_status:
|
||||
if dc_peer_association.get("sync-status") != sync_status:
|
||||
# Update peer site association sync_status
|
||||
dc_peer_association_id = dc_peer_association.get("id")
|
||||
dc_client.update_peer_group_association_sync_status(
|
||||
dc_peer_association_id, sync_status
|
||||
)
|
||||
LOG.info(
|
||||
f"Updated Peer site {dc_peer_system_peer.get('id')} "
|
||||
f"Peer Group Association {dc_peer_association_id} "
|
||||
f"sync_status to {sync_status}."
|
||||
f"Updated peer site '{dc_peer_system_peer.get('peer-name')}' "
|
||||
f"peer group association {dc_peer_association_id} "
|
||||
f"sync_status to '{sync_status}'"
|
||||
)
|
||||
except Exception as e:
|
||||
message = (
|
||||
f"Failed to Update Peer Site ({peer.peer_uuid}) "
|
||||
f"Association sync_status to {sync_status}."
|
||||
f"Failed to update peer site '{peer.peer_name}' "
|
||||
f"association sync_status to '{sync_status}'"
|
||||
)
|
||||
LOG.exception(f"{message} Error: {e}")
|
||||
sync_status = consts.ASSOCIATION_SYNC_STATUS_FAILED
|
||||
@ -146,9 +153,9 @@ class SystemPeerManager(manager.Manager):
|
||||
|
||||
for association in associations:
|
||||
if association.association_type == consts.ASSOCIATION_TYPE_NON_PRIMARY:
|
||||
LOG.debug(
|
||||
f"Skip update Peer Site association sync_status to {sync_status} "
|
||||
"as current site Association is not primary."
|
||||
LOG.info(
|
||||
"Skipping updating peer and local site association "
|
||||
f"sync_status to '{sync_status}' as local site is not primary"
|
||||
)
|
||||
continue
|
||||
|
||||
@ -165,13 +172,16 @@ class SystemPeerManager(manager.Manager):
|
||||
association.sync_status == sync_status
|
||||
and sync_status != consts.ASSOCIATION_SYNC_STATUS_FAILED
|
||||
):
|
||||
LOG.debug(
|
||||
"Skip update current site association sync_status to "
|
||||
f"{sync_status} as current site Association is already "
|
||||
"in the same status."
|
||||
LOG.info(
|
||||
f"Local association {association.id} sync_status "
|
||||
f"already set to '{sync_status}', skipping update"
|
||||
)
|
||||
continue
|
||||
# Update primary site association sync_status
|
||||
LOG.info(
|
||||
f"Updating local association {association.id} sync_status "
|
||||
f"from '{association.sync_status}' to '{sync_status}'"
|
||||
)
|
||||
db_api.peer_group_association_update(
|
||||
ctx, association.id, sync_status=sync_status, sync_message=message
|
||||
)
|
||||
@ -222,10 +232,11 @@ class SystemPeerManager(manager.Manager):
|
||||
dccommon_consts.SYSTEM_CONTROLLER_NAME,
|
||||
p_ks_client.session,
|
||||
endpoint=dc_endpoint,
|
||||
peer=peer,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_peer_subcloud(dc_client, subcloud_name):
|
||||
def get_peer_subcloud(dc_client: DcmanagerClient, subcloud_name: str) -> dict:
|
||||
"""Get subcloud on peer site if exist.
|
||||
|
||||
:param dc_client: the dcmanager client object
|
||||
@ -235,7 +246,10 @@ class SystemPeerManager(manager.Manager):
|
||||
peer_subcloud = dc_client.get_subcloud(subcloud_name)
|
||||
return peer_subcloud
|
||||
except dccommon_exceptions.SubcloudNotFound:
|
||||
LOG.warn(f"Subcloud {subcloud_name} does not exist on peer site.")
|
||||
LOG.warn(
|
||||
f"Subcloud '{subcloud_name}' does not exist on "
|
||||
f"peer site '{dc_client.peer.peer_name}'"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_subcloud_deploy_status(subcloud):
|
||||
@ -258,7 +272,9 @@ class SystemPeerManager(manager.Manager):
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def delete_peer_secondary_subcloud(dc_client, subcloud_ref):
|
||||
def delete_peer_secondary_subcloud(
|
||||
dc_client: DcmanagerClient, subcloud_ref: str
|
||||
) -> None:
|
||||
"""Delete secondary subcloud on peer site.
|
||||
|
||||
:param dc_client: the dcmanager client object
|
||||
@ -284,14 +300,17 @@ class SystemPeerManager(manager.Manager):
|
||||
return
|
||||
|
||||
dc_client.delete_subcloud(subcloud_ref)
|
||||
LOG.info(f"Deleted Subcloud {subcloud_ref} on peer site.")
|
||||
LOG.info(
|
||||
f"Deleted subcloud '{subcloud_ref}' on peer site "
|
||||
f"'{dc_client.peer.peer_name}'"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _run_parallel_group_operation(op_type, op_function, thread_pool, subclouds):
|
||||
"""Run parallel group operation on subclouds."""
|
||||
failed_subclouds = []
|
||||
processed = 0
|
||||
error_msg = {} # Dictinary to store error message for each subcloud
|
||||
error_msg = {} # Dictionary to store error message for each subcloud
|
||||
|
||||
for subcloud, success in thread_pool.imap(op_function, subclouds):
|
||||
processed += 1
|
||||
@ -365,9 +384,9 @@ class SystemPeerManager(manager.Manager):
|
||||
region_name, files, data, is_region_name=True
|
||||
)
|
||||
LOG.info(
|
||||
f"Updated Subcloud {dc_peer_subcloud.get('name')} "
|
||||
f"Updated subcloud '{dc_peer_subcloud.get('name')}' "
|
||||
f"(region_name: {dc_peer_subcloud.get('region-name')}) "
|
||||
"on peer site."
|
||||
f"on peer site '{dc_client.peer.peer_name}'"
|
||||
)
|
||||
else:
|
||||
# Create subcloud on peer site if not exist
|
||||
@ -375,13 +394,15 @@ class SystemPeerManager(manager.Manager):
|
||||
files, data
|
||||
)
|
||||
LOG.info(
|
||||
f"Created Subcloud {dc_peer_subcloud.get('name')} "
|
||||
f"Created subcloud '{dc_peer_subcloud.get('name')}' "
|
||||
f"(region_name: {dc_peer_subcloud.get('region-name')}) "
|
||||
"on peer site."
|
||||
f"on peer site '{dc_client.peer.peer_name}'"
|
||||
)
|
||||
LOG.debug(
|
||||
f"Updating subcloud {subcloud_name} (region_name: {region_name}) "
|
||||
f"with subcloud peer group id {dc_peer_pg_id} on peer site."
|
||||
LOG.info(
|
||||
f"Updating subcloud '{subcloud_name}' (region_name: "
|
||||
f"{dc_peer_subcloud.get('region-name')}) with subcloud "
|
||||
f"peer group id {dc_peer_pg_id} on peer site "
|
||||
f"'{dc_client.peer.peer_name}'"
|
||||
)
|
||||
# Update subcloud associated peer group on peer site.
|
||||
# The peer_group update will check the header and should
|
||||
@ -413,12 +434,15 @@ class SystemPeerManager(manager.Manager):
|
||||
except Exception as e:
|
||||
subcloud.msg = str(e) # Store error message for subcloud
|
||||
LOG.error(
|
||||
f"Failed to add/update Subcloud {subcloud_name} "
|
||||
f"(region_name: {region_name}) on peer site: {str(e)}"
|
||||
f"Failed to add/update subcloud '{subcloud_name}' "
|
||||
f"(region_name: {region_name}) on peer site "
|
||||
f"'{dc_client.peer.peer_name}': {str(e)}"
|
||||
)
|
||||
return subcloud, False
|
||||
|
||||
def _delete_subcloud(self, dc_client, subcloud):
|
||||
def _delete_subcloud(
|
||||
self, dc_client: DcmanagerClient, subcloud: models.Subcloud
|
||||
) -> tuple[models.Subcloud, bool]:
|
||||
"""Delete subcloud on peer site in parallel."""
|
||||
try:
|
||||
subcloud_name = subcloud.get("name")
|
||||
@ -427,11 +451,12 @@ class SystemPeerManager(manager.Manager):
|
||||
except Exception as e:
|
||||
subcloud.msg = str(e)
|
||||
LOG.exception(
|
||||
f"Failed to delete Subcloud {subcloud_name} on peer site: {str(e)}"
|
||||
f"Failed to delete subcloud '{subcloud_name}' on "
|
||||
f"peer site '{dc_client.peer.peer_name}': {str(e)}"
|
||||
)
|
||||
return subcloud, False
|
||||
|
||||
def _is_valid_for_subcloud_sync(self, subcloud):
|
||||
def _is_valid_for_subcloud_sync(self, subcloud: models.Subcloud) -> str:
|
||||
"""Verify subcloud data for sync."""
|
||||
subcloud_name = subcloud.get("name")
|
||||
region_name = subcloud.get("region_name")
|
||||
@ -485,10 +510,12 @@ class SystemPeerManager(manager.Manager):
|
||||
|
||||
return VERIFY_SUBCLOUD_SYNC_VALID
|
||||
|
||||
def _validate_subclouds_for_sync(self, subclouds, dc_client):
|
||||
def _validate_subclouds_for_sync(
|
||||
self, subclouds: list[models.Subcloud], dc_client: DcmanagerClient
|
||||
) -> tuple[list[models.Subcloud], dict[str, str]]:
|
||||
"""Validate subclouds for sync."""
|
||||
valid_subclouds = []
|
||||
error_msg = {} # Dictinary to store error message for each subcloud
|
||||
error_msg = {} # Dictionary to store error message for each subcloud
|
||||
|
||||
for subcloud in subclouds:
|
||||
subcloud_name = subcloud.get("name")
|
||||
@ -510,8 +537,8 @@ class SystemPeerManager(manager.Manager):
|
||||
peer_subcloud = self.get_peer_subcloud(dc_client, subcloud_name)
|
||||
if not peer_subcloud:
|
||||
LOG.info(
|
||||
f"Subcloud {subcloud_name} (region_name: {region_name}) does "
|
||||
"not exist on peer site."
|
||||
f"Subcloud '{subcloud_name}' (region_name: {region_name}) does "
|
||||
f"not exist on peer site '{dc_client.peer.peer_name}'"
|
||||
)
|
||||
valid_subclouds.append(subcloud)
|
||||
continue
|
||||
@ -541,7 +568,9 @@ class SystemPeerManager(manager.Manager):
|
||||
|
||||
return valid_subclouds, error_msg
|
||||
|
||||
def _sync_subclouds(self, context, peer, dc_local_pg_id, dc_peer_pg_id):
|
||||
def _sync_subclouds(
|
||||
self, context, peer: models.SystemPeer, dc_local_pg_id: int, dc_peer_pg_id: int
|
||||
) -> dict[str, str]:
|
||||
"""Sync subclouds of local peer group to peer site.
|
||||
|
||||
:param context: request context object
|
||||
@ -572,7 +601,10 @@ class SystemPeerManager(manager.Manager):
|
||||
)
|
||||
|
||||
error_msg.update(sync_error_msg)
|
||||
LOG.info("Subcloud peer sync operation finished")
|
||||
LOG.info(
|
||||
f"Completed subcloud peer sync with peer '{peer.peer_name}': processed "
|
||||
f"{len(subclouds_to_sync)} subclouds, {len(failed_subclouds)} failed"
|
||||
)
|
||||
|
||||
dc_local_region_names = set()
|
||||
for subcloud in subclouds:
|
||||
@ -591,7 +623,10 @@ class SystemPeerManager(manager.Manager):
|
||||
dc_peer_subcloud_diff_names = dc_peer_region_names - dc_local_region_names
|
||||
for subcloud_to_delete in dc_peer_subcloud_diff_names:
|
||||
try:
|
||||
LOG.debug(f"Deleting Subcloud name {subcloud_to_delete} on peer site.")
|
||||
LOG.info(
|
||||
f"Deleting subcloud '{subcloud_to_delete}' on "
|
||||
f"peer site '{dc_client.peer.peer_name}'"
|
||||
)
|
||||
self.delete_peer_secondary_subcloud(dc_client, subcloud_to_delete)
|
||||
except Exception as e:
|
||||
msg = f"Subcloud delete failed: {str(e)}"
|
||||
@ -664,7 +699,7 @@ class SystemPeerManager(manager.Manager):
|
||||
context, peer_group_id
|
||||
)
|
||||
if not associations:
|
||||
LOG.debug("No association found for peer group %s" % peer_group_id)
|
||||
LOG.info(f"No association found for peer group '{peer_group_id}'")
|
||||
else:
|
||||
for association in associations:
|
||||
if sync_status == consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC:
|
||||
@ -722,7 +757,7 @@ class SystemPeerManager(manager.Manager):
|
||||
sync_status=new_sync_status,
|
||||
sync_message=new_sync_message,
|
||||
)
|
||||
LOG.debug(
|
||||
LOG.info(
|
||||
f"Updated Local Peer Group Association {association.id} "
|
||||
f"sync_status to {new_sync_status}."
|
||||
)
|
||||
@ -786,8 +821,11 @@ class SystemPeerManager(manager.Manager):
|
||||
return success_peer_ids, failed_peer_ids
|
||||
|
||||
def _get_non_primary_association(
|
||||
self, dc_client, dc_peer_system_peer_id, dc_peer_pg_id
|
||||
):
|
||||
self,
|
||||
dc_client: DcmanagerClient,
|
||||
dc_peer_system_peer_id: int,
|
||||
dc_peer_pg_id: int,
|
||||
) -> Optional[dict]:
|
||||
"""Get non-primary Association from peer site."""
|
||||
try:
|
||||
return dc_client.get_peer_group_association_with_peer_id_and_pg_id(
|
||||
@ -795,18 +833,23 @@ class SystemPeerManager(manager.Manager):
|
||||
)
|
||||
except dccommon_exceptions.PeerGroupAssociationNotFound:
|
||||
LOG.error(
|
||||
"Peer Group association does not exist on peer site. "
|
||||
f"Peer Group ID: {dc_peer_pg_id}, Peer System Peer ID: "
|
||||
f"{dc_peer_system_peer_id}"
|
||||
"Peer group association does not exist on peer site "
|
||||
f"'{dc_client.peer.peer_name}'. Peer group ID: {dc_peer_pg_id}, "
|
||||
f"peer system peer ID: {dc_peer_system_peer_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
def _get_peer_site_pg_by_name(self, dc_client, peer_group_name):
|
||||
def _get_peer_site_pg_by_name(
|
||||
self, dc_client: DcmanagerClient, peer_group_name: str
|
||||
) -> Optional[dict]:
|
||||
"""Get remote Peer Group from peer site by name."""
|
||||
try:
|
||||
return dc_client.get_subcloud_peer_group(peer_group_name)
|
||||
except dccommon_exceptions.SubcloudPeerGroupNotFound:
|
||||
LOG.error(f"Peer Group {peer_group_name} does not exist on peer site.")
|
||||
LOG.error(
|
||||
f"Peer group '{peer_group_name}' does not exist on "
|
||||
f"peer site '{dc_client.peer.peer_name}'"
|
||||
)
|
||||
return None
|
||||
|
||||
def _get_peer_site_system_peer(self, dc_client, peer_uuid=None):
|
||||
@ -838,11 +881,6 @@ class SystemPeerManager(manager.Manager):
|
||||
:param association_id: id of association to sync
|
||||
:param sync_subclouds: Enabled to sync subclouds to peer site
|
||||
"""
|
||||
LOG.info(
|
||||
f"Synchronize the association {association_id} of the "
|
||||
"Subcloud Peer Group with the System Peer pointing to the "
|
||||
"peer site."
|
||||
)
|
||||
|
||||
association = db_api.peer_group_association_get(context, association_id)
|
||||
peer = db_api.system_peer_get(context, association.system_peer_id)
|
||||
@ -850,14 +888,19 @@ class SystemPeerManager(manager.Manager):
|
||||
peer_group_name = dc_local_pg.peer_group_name
|
||||
dc_peer_association_id = None
|
||||
|
||||
LOG.info(
|
||||
f"Syncing peer group association {association_id} that links subcloud "
|
||||
f"peer group '{peer_group_name}' with system peer '{peer.peer_name}'"
|
||||
)
|
||||
|
||||
try:
|
||||
# Check if the system_uuid of the peer site matches with the
|
||||
# peer_uuid
|
||||
system = self.get_peer_sysinv_client(peer).get_system()
|
||||
if system.uuid != peer.peer_uuid:
|
||||
LOG.error(
|
||||
f"Peer site system uuid {system.uuid} does not match "
|
||||
f"with the peer_uuid {peer.peer_uuid}"
|
||||
f"Peer site '{peer.peer_name}' system uuid {system.uuid} does "
|
||||
f"not match the configured peer_uuid: {peer.peer_uuid}"
|
||||
)
|
||||
raise exceptions.PeerGroupAssociationTargetNotMatch(uuid=system.uuid)
|
||||
|
||||
@ -873,7 +916,8 @@ class SystemPeerManager(manager.Manager):
|
||||
|
||||
if dc_peer_system_peer is None:
|
||||
failed_message = (
|
||||
f"System Peer {local_system_uuid} does not exist on peer site."
|
||||
f"A system peer with UUID of {local_system_uuid} "
|
||||
f"does not exist on peer site '{peer.peer_name}'"
|
||||
)
|
||||
return db_api.peer_group_association_db_model_to_dict(
|
||||
self._update_sync_status_to_failed(
|
||||
@ -896,7 +940,7 @@ class SystemPeerManager(manager.Manager):
|
||||
dc_peer_pg = dc_client.add_subcloud_peer_group(**peer_group_kwargs)
|
||||
LOG.info(
|
||||
f"Created Subcloud Peer Group {peer_group_name} on "
|
||||
f"peer site. ID is {dc_peer_pg.get('id')}."
|
||||
f"peer site '{peer.peer_name}'. ID is {dc_peer_pg.get('id')}."
|
||||
)
|
||||
dc_peer_pg_id = dc_peer_pg.get("id")
|
||||
dc_peer_pg_priority = dc_peer_pg.get("group_priority")
|
||||
@ -921,7 +965,7 @@ class SystemPeerManager(manager.Manager):
|
||||
)
|
||||
LOG.info(
|
||||
"Created 'non-primary' Peer Group Association "
|
||||
f"{dc_peer_association.get('id')} on peer site."
|
||||
f"{dc_peer_association.get('id')} on peer site '{peer.peer_name}'"
|
||||
)
|
||||
dc_peer_association_id = dc_peer_association.get("id")
|
||||
|
||||
@ -943,7 +987,7 @@ class SystemPeerManager(manager.Manager):
|
||||
)
|
||||
LOG.info(
|
||||
f"Updated Subcloud Peer Group {peer_group_name} on "
|
||||
f"peer site, ID is {dc_peer_pg.get('id')}."
|
||||
f"peer site '{peer.peer_name}', ID is {dc_peer_pg.get('id')}"
|
||||
)
|
||||
|
||||
association_update = {
|
||||
@ -998,7 +1042,6 @@ class SystemPeerManager(manager.Manager):
|
||||
:param context: request context object.
|
||||
:param association_id: id of association to delete
|
||||
"""
|
||||
LOG.info(f"Deleting association peer group {association_id}.")
|
||||
|
||||
# Retrieve the peer group association details from the database
|
||||
association = db_api.peer_group_association_get(context, association_id)
|
||||
@ -1006,6 +1049,11 @@ class SystemPeerManager(manager.Manager):
|
||||
dc_local_pg = db_api.subcloud_peer_group_get(context, association.peer_group_id)
|
||||
peer_group_name = dc_local_pg.peer_group_name
|
||||
|
||||
LOG.info(
|
||||
f"Deleting peer group association ({association_id=}, "
|
||||
f"peer={peer.peer_name}, peer_group_id={dc_local_pg.id})"
|
||||
)
|
||||
|
||||
try:
|
||||
# Check if the system_uuid of the peer site matches with the
|
||||
# peer_uuid
|
||||
@ -1035,7 +1083,7 @@ class SystemPeerManager(manager.Manager):
|
||||
# be deleted
|
||||
LOG.warning(
|
||||
f"Subcloud Peer Group {peer_group_name} does "
|
||||
"not exist on peer site."
|
||||
f"not exist on peer site '{peer.peer_name}'"
|
||||
)
|
||||
return self._delete_primary_association(context, association_id)
|
||||
|
||||
@ -1045,7 +1093,7 @@ class SystemPeerManager(manager.Manager):
|
||||
if dc_peer_pg_priority == 0:
|
||||
LOG.error(
|
||||
f"Failed to delete peer_group_association. Peer Group: "
|
||||
f"{peer_group_name} has priority 0 on peer site."
|
||||
f"{peer_group_name} has priority 0 on peer site '{peer.peer_name}'"
|
||||
)
|
||||
raise exceptions.SubcloudPeerGroupHasWrongPriority(
|
||||
priority=dc_peer_pg_priority
|
||||
@ -1064,11 +1112,13 @@ class SystemPeerManager(manager.Manager):
|
||||
if delete_error_msg:
|
||||
LOG.error(
|
||||
"Failed to delete subcloud(s) from the Subcloud Peer Group "
|
||||
f"{peer_group_name} on peer site: {json.dumps(delete_error_msg)}"
|
||||
f"'{peer_group_name}' on peer site '{peer.peer_name}': "
|
||||
f"{json.dumps(delete_error_msg)}"
|
||||
)
|
||||
sync_message = (
|
||||
f"Deletion of {list(delete_error_msg.keys())} from the "
|
||||
f"Subcloud Peer Group: {peer_group_name} on the peer site failed."
|
||||
f"Subcloud Peer Group: {peer_group_name} on the peer site "
|
||||
f"'{peer.peer_name}' failed."
|
||||
)
|
||||
self._update_sync_status_to_failed(
|
||||
context, association_id, sync_message
|
||||
@ -1080,12 +1130,14 @@ class SystemPeerManager(manager.Manager):
|
||||
try:
|
||||
dc_client.delete_subcloud_peer_group(peer_group_name)
|
||||
LOG.info(
|
||||
f"Deleted Subcloud Peer Group {peer_group_name} on peer site."
|
||||
f"Deleted Subcloud Peer Group {peer_group_name} on "
|
||||
f"peer site '{peer.peer_name}'"
|
||||
)
|
||||
except dccommon_exceptions.SubcloudPeerGroupDeleteFailedAssociated:
|
||||
LOG.error(
|
||||
f"Subcloud Peer Group {peer_group_name} delete failed "
|
||||
"as it is associated with System Peer on peer site."
|
||||
f"as it is associated with System Peer on peer site "
|
||||
f"'{peer.peer_name}'"
|
||||
)
|
||||
return self._delete_primary_association(context, association_id)
|
||||
dc_peer_system_peer_id = dc_peer_system_peer.get("id")
|
||||
@ -1100,18 +1152,21 @@ class SystemPeerManager(manager.Manager):
|
||||
dc_client.delete_peer_group_association(dc_peer_association_id)
|
||||
elif dc_peer_association is None:
|
||||
LOG.warning(
|
||||
f"PeerGroupAssociation does not exist on peer site. "
|
||||
f"Peer Group ID: {dc_peer_pg_id}, peer site System "
|
||||
f"Peer ID: {dc_peer_system_peer_id}"
|
||||
f"PeerGroupAssociation does not exist on peer site "
|
||||
f"'{peer.peer_name}'. Peer Group ID: {dc_peer_pg_id}, "
|
||||
f"peer site System Peer ID: {dc_peer_system_peer_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
dc_client.delete_subcloud_peer_group(peer_group_name)
|
||||
LOG.info(f"Deleted Subcloud Peer Group {peer_group_name} on peer site.")
|
||||
LOG.info(
|
||||
f"Deleted Subcloud Peer Group {peer_group_name} on "
|
||||
f"peer site '{peer.peer_name}'"
|
||||
)
|
||||
except dccommon_exceptions.SubcloudPeerGroupDeleteFailedAssociated:
|
||||
failed_message = (
|
||||
f"Subcloud Peer Group {peer_group_name} delete failed as it "
|
||||
"is associated with system peer on peer site."
|
||||
f"is associated with system peer on peer site '{peer.peer_name}'"
|
||||
)
|
||||
self._update_sync_status_to_failed(
|
||||
context, association_id, failed_message
|
||||
|
@ -107,26 +107,57 @@ class TestPeerMonitor(base.DCManagerTestCase):
|
||||
)
|
||||
|
||||
def test_update_sync_status_and_association_is_non_primary(self):
|
||||
# Delete the primary association created during setUp
|
||||
db_api.peer_group_association_destroy(self.ctx, self.association.id)
|
||||
|
||||
# Create a non-primary association
|
||||
association = self.system_peer_manager.create_peer_group_association_static(
|
||||
self.ctx,
|
||||
system_peer_id=self.peer.id,
|
||||
peer_group_id=self.peer_group2.id,
|
||||
association_type=consts.ASSOCIATION_TYPE_NON_PRIMARY,
|
||||
sync_status=consts.ASSOCIATION_SYNC_STATUS_UNKNOWN,
|
||||
)
|
||||
self.mock_get_peer_dc_client().get_subcloud_peer_group.return_value = {
|
||||
"id": FAKE_SITE1_PEER_GROUP_ID
|
||||
}
|
||||
|
||||
# Test the case where the association is non-primary
|
||||
self.peer_monitor._update_sync_status_secondary_site_becomes_reachable()
|
||||
self.mock_get_peer_dc_client().get_subcloud_peer_group.assert_called_once_with(
|
||||
self.peer_group1.peer_group_name
|
||||
# Mock update_sync_status
|
||||
mock_patch = mock.patch.object(
|
||||
peer_monitor_manager.SystemPeerManager, "update_sync_status"
|
||||
)
|
||||
mock_update_sync_status = mock_patch.start()
|
||||
self.addCleanup(mock_patch.stop)
|
||||
|
||||
self.peer_monitor._update_sync_status_secondary_site_becomes_reachable()
|
||||
|
||||
# Assert that the association sync status was not updated
|
||||
mock_update_sync_status.assert_not_called()
|
||||
association_new = db_api.peer_group_association_get(self.ctx, association.id)
|
||||
self.assertEqual(
|
||||
consts.ASSOCIATION_SYNC_STATUS_UNKNOWN, association_new.sync_status
|
||||
)
|
||||
|
||||
def test_update_sync_status_and_association_is_in_sync(self):
|
||||
# Mock update_sync_status
|
||||
mock_patch = mock.patch.object(
|
||||
peer_monitor_manager.SystemPeerManager, "update_sync_status"
|
||||
)
|
||||
mock_update_sync_status = mock_patch.start()
|
||||
self.addCleanup(mock_patch.stop)
|
||||
|
||||
self.peer_monitor._update_sync_status_secondary_site_becomes_reachable()
|
||||
|
||||
# Assert that the association sync status was not updated as it's
|
||||
# already in-sync
|
||||
association_new = db_api.peer_group_association_get(
|
||||
self.ctx, self.association.id
|
||||
)
|
||||
self.assertEqual(
|
||||
consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, association_new.sync_status
|
||||
)
|
||||
|
||||
# But the update_sync_status still has to be triggered so it handles any
|
||||
# sync message update
|
||||
mock_update_sync_status.assert_called_once()
|
||||
|
||||
def test_update_sync_status_secondary_site_becomes_reachable(self):
|
||||
self.mock_get_local_system.return_value = test_system_peer_manager.FakeSystem(
|
||||
FAKE_SITE0_SYSTEM_UUID
|
||||
@ -286,7 +317,7 @@ class TestPeerMonitor(base.DCManagerTestCase):
|
||||
mock_event.side_effect = [False, False, True]
|
||||
self.peer_monitor._do_monitor_peer()
|
||||
self.mock_log.exception.assert_called_with(
|
||||
f"Problem clearing fault for peer {self.peer.peer_uuid}, alarm_id="
|
||||
f"Problem clearing fault for peer {self.peer.peer_name}, alarm_id="
|
||||
f"{fm_const.FM_ALARM_ID_DC_SYSTEM_PEER_HEARTBEAT_FAILED} error: boom"
|
||||
)
|
||||
|
||||
@ -319,7 +350,7 @@ class TestPeerMonitor(base.DCManagerTestCase):
|
||||
mock_event.side_effect = [False, False, True]
|
||||
self.peer_monitor._do_monitor_peer()
|
||||
self.mock_log.exception.assert_called_with(
|
||||
"Got exception monitoring peer PeerSite1 error: boom"
|
||||
"Unexpected error monitoring peer 'PeerSite1': boom"
|
||||
)
|
||||
|
||||
def test_heartbeat_check_via_get_peer_group_list_pg_not_found(self):
|
||||
@ -327,8 +358,8 @@ class TestPeerMonitor(base.DCManagerTestCase):
|
||||
ret = self.peer_monitor._heartbeat_check_via_get_peer_group_list()
|
||||
self.mock_get_peer_dc_client.assert_called()
|
||||
self.mock_log.warning.assert_called_once_with(
|
||||
"Resource subcloud peer group of dc:"
|
||||
"http://128.128.128.128:5000/v3 not found"
|
||||
"No subcloud peer groups found for DC peer: PeerSite1 "
|
||||
"(endpoint: http://128.128.128.128:5000/v3)"
|
||||
)
|
||||
self.assertEqual((False, []), ret)
|
||||
|
||||
|
@ -143,6 +143,7 @@ class TestSystemPeerManager(base.DCManagerTestCase):
|
||||
|
||||
mock_patch = mock.patch.object(system_peer_manager, "DcmanagerClient")
|
||||
self.mock_dc_client = mock_patch.start()
|
||||
self.mock_dc_client().peer.peer_name = "SystemPeer1"
|
||||
self.addCleanup(mock_patch.stop)
|
||||
|
||||
def _mock_system_peer_manager_peersitedriver(self):
|
||||
@ -474,7 +475,7 @@ class TestSystemPeerManager(base.DCManagerTestCase):
|
||||
"dcmanager.manager.system_peer_manager.SystemPeerManager.get_peer_dc_client"
|
||||
)
|
||||
def test_update_sync_status_exception(self, mock_client):
|
||||
mock_client.return_value = Exception("boom")
|
||||
mock_client.side_effect = Exception("boom")
|
||||
self.spm.update_sync_status(
|
||||
self.ctx, self.peer, consts.ASSOCIATION_SYNC_STATUS_IN_SYNC
|
||||
)
|
||||
@ -673,8 +674,8 @@ class TestSystemPeerManager(base.DCManagerTestCase):
|
||||
self.ctx, self.peer, self.peer_group.id, FAKE_SITE1_PEER_GROUP_ID
|
||||
)
|
||||
self.mock_log.error.assert_called_once_with(
|
||||
f"Failed to add/update Subcloud {subcloud1.name} "
|
||||
f"(region_name: {subcloud1.region_name}) on peer site: boom"
|
||||
f"Failed to add/update subcloud '{subcloud1.name}' "
|
||||
f"(region_name: {subcloud1.region_name}) on peer site 'SystemPeer1': boom"
|
||||
)
|
||||
|
||||
def test_sync_subclouds_delete_subcloud_exception(self):
|
||||
@ -743,8 +744,11 @@ class TestSystemPeerManager(base.DCManagerTestCase):
|
||||
self.mock_dc_client().get_subcloud.side_effect = [peer_subcloud4]
|
||||
self.peer_group_association.return_value = {"id": FAKE_SITE1_ASSOCIATION_ID}
|
||||
self.spm.delete_peer_group_association(self.ctx, self.association.id)
|
||||
Calls = [
|
||||
mock.call("Deleting association peer group 1."),
|
||||
calls = [
|
||||
mock.call(
|
||||
"Deleting peer group association (association_id=1, "
|
||||
"peer=SystemPeer1, peer_group_id=1)"
|
||||
),
|
||||
mock.call(
|
||||
f"Ignoring delete Peer Site Subcloud {subcloud.name} as "
|
||||
"is not in secondary or rehome failed state."
|
||||
@ -755,10 +759,10 @@ class TestSystemPeerManager(base.DCManagerTestCase):
|
||||
),
|
||||
mock.call(
|
||||
f"Deleted Subcloud Peer Group {self.peer_group.peer_group_name} "
|
||||
"on peer site."
|
||||
"on peer site 'SystemPeer1'"
|
||||
),
|
||||
]
|
||||
self.mock_log.info.assert_has_calls(Calls)
|
||||
self.mock_log.info.assert_has_calls(calls)
|
||||
|
||||
def test_delete_peer_group_association_uuid_does_not_match(self):
|
||||
peer = self.create_system_peer_static(
|
||||
@ -806,7 +810,7 @@ class TestSystemPeerManager(base.DCManagerTestCase):
|
||||
associations = db_api.peer_group_association_get_all(self.ctx)
|
||||
self.assertEqual(1, len(associations))
|
||||
self.mock_log.exception.assert_called_once_with(
|
||||
"Failed to delete Subcloud subcloud1 on peer site: boom"
|
||||
"Failed to delete subcloud 'subcloud1' on peer site 'SystemPeer1': boom"
|
||||
)
|
||||
|
||||
def test_delete_peer_group_association_failed(self):
|
||||
@ -820,8 +824,8 @@ class TestSystemPeerManager(base.DCManagerTestCase):
|
||||
self.association.id,
|
||||
)
|
||||
self.mock_log.error.assert_called_once_with(
|
||||
f"Subcloud Peer Group {self.peer_group.peer_group_name} "
|
||||
"delete failed as it is associated with system peer on peer site."
|
||||
f"Subcloud Peer Group {self.peer_group.peer_group_name} delete "
|
||||
"failed as it is associated with system peer on peer site 'SystemPeer1'"
|
||||
)
|
||||
|
||||
def test_delete_peer_group_association_subcloud_pg_notfound(self):
|
||||
@ -831,7 +835,7 @@ class TestSystemPeerManager(base.DCManagerTestCase):
|
||||
self.spm.delete_peer_group_association(self.ctx, self.association.id)
|
||||
self.mock_log.warning.assert_called_once_with(
|
||||
f"Subcloud Peer Group {self.peer_group.peer_group_name} "
|
||||
"does not exist on peer site."
|
||||
"does not exist on peer site 'SystemPeer1'"
|
||||
)
|
||||
associations = db_api.peer_group_association_get_all(self.ctx)
|
||||
self.assertEqual(0, len(associations))
|
||||
|
Loading…
Reference in New Issue
Block a user