Optimize subcloud state manager's queries

This commit optimizes subcloud state manager's queries used to perform a
bulk update for a subcloud's availability and endpoint status, which
were being used in [1].
Previously, each database query was made separately for every update
for either the availability status and/or the endpoint(s) during the
audit process, which resulted in duplicated state and database calls.
In [1] the RPC state calls were significantly reduced by creating a
single request for each subcloud and. Consequently, all queries that
were made in the database in separate steps of the process started to be
executed at once, resulting in approximately 23 [2] queries per subcloud
for a complete audit. With this commit, the maximum number of database
transactions is reduced from 23 to 3.

Test plan:
1. PASS: Unmanage a subcloud and verify that all of its endpoints' sync
   status become unknown.
2. PASS: Manage a subcloud and verify that all of its endpoints' sync
   status become in-sync
3. PASS: Apply a patch in the system controller and verify that all of
   the subclouds' patching sync status becomes out-of-sync.
4. PASS: Apply the patch in the subclouds and verify that their patching
   sync status becomes in-sync.
5. PASS: Verify that the hourly unconditional update for the subcloud's
   availability status updates the database

[1] https://review.opendev.org/c/starlingx/distcloud/+/922058
[2] Analysis of the number of requests considering dcmanager's audit
Subcloud becoming online:
- subcloud_get_by_region_name: 1
- fm's get_fault: 1
- fm's set_fault or clear_fault: 1
- subcloud_update: 1
- for each endpoint audited by dcmanager (7):
    - subcloud_get_by_region_name: 1 (removed)
    - subcloud_get_with_status: 1 (removed)
    - subcloud_endpoint_status_db_model_to_dict: 9 (removed)
    - subcloud_status_update: 1 (changed to one query for all endpoints)
    - fm's get_fault: 1
    - fm's set_fault or clear_fault: 1
Total:
    - 39 queries that are now 19, considering fm's database.
    - 23 queries that are now 3 in dcmanager's database

Note that the totals does not include the db_model_to_dict request
because it does not query the database.

Subcloud becoming offline:
- subcloud_get_by_region_name: 2 reduced to 1
- fm's get_fault: 10 (one for each endpoint and the availability)
- fm's set_fault or clear_fault: 10
- subcloud_get_with_status: 1 (removed)
- subcloud_endpoint_status_db_model_to_dict: 9 (removed)
- subcloud_status_update_endpoints: 1
- subcloud_update: 1

Story: 2011106
Task: 50433

Change-Id: I34b8604bf445cc0ebdc02c5959a919221e62de5a
Signed-off-by: Raphael Lima <Raphael.Lima@windriver.com>
This commit is contained in:
Raphael Lima
2024-06-21 17:57:15 -03:00
parent 7cfb86f0cc
commit 8797888d14
8 changed files with 428 additions and 157 deletions

View File

@@ -321,16 +321,8 @@ class SubcloudAuditWorkerManager(manager.Manager):
subcloud_management_ip = subcloud.management_start_ip
audits_done = list()
failures = list()
batch_request_data = {
"availability": None,
dccommon_consts.ENDPOINT_TYPE_PATCHING: None,
dccommon_consts.ENDPOINT_TYPE_LOAD: None,
dccommon_consts.ENDPOINT_TYPE_FIRMWARE: None,
dccommon_consts.ENDPOINT_TYPE_KUBERNETES: None,
dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA: None,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE: None
}
availability_data = dict()
endpoint_data = dict()
# Set defaults to None and disabled so we will still set disabled
# status if we encounter an error.
@@ -427,12 +419,10 @@ class SubcloudAuditWorkerManager(manager.Manager):
LOG.debug('Setting new availability status: %s '
'on subcloud: %s' %
(avail_to_set, subcloud_name))
batch_request_data.update({
"availability": {
"availability_status": avail_to_set,
"update_state_only": False,
"audit_fail_count": audit_fail_count
}
availability_data.update({
"availability_status": avail_to_set,
"update_state_only": False,
"audit_fail_count": audit_fail_count
})
elif audit_fail_count != subcloud.audit_fail_count:
@@ -448,12 +438,10 @@ class SubcloudAuditWorkerManager(manager.Manager):
# subcloud as an audit.
LOG.debug('Updating subcloud state unconditionally for subcloud %s'
% subcloud_name)
batch_request_data.update({
"availability": {
"availability_status": avail_status_current,
"update_state_only": True,
"audit_fail_count": None
}
availability_data.update({
"availability_status": avail_status_current,
"update_state_only": True,
"audit_fail_count": None
})
# If subcloud is managed and online and the identity was synced once,
@@ -470,7 +458,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
# If we have patch audit data, audit the subcloud
if do_patch_audit and patch_audit_data:
try:
batch_request_data[dccommon_consts.ENDPOINT_TYPE_PATCHING] = (
endpoint_data[dccommon_consts.ENDPOINT_TYPE_PATCHING] = (
self.patch_audit.subcloud_patch_audit(
keystone_client.session, sysinv_client,
subcloud_management_ip, subcloud_name, subcloud_region,
@@ -484,7 +472,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
# Perform load audit
if do_load_audit and patch_audit_data:
try:
batch_request_data[dccommon_consts.ENDPOINT_TYPE_LOAD] = (
endpoint_data[dccommon_consts.ENDPOINT_TYPE_LOAD] = (
self.patch_audit.subcloud_load_audit(
sysinv_client, subcloud_name, patch_audit_data
)
@@ -496,7 +484,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
# Perform firmware audit
if do_firmware_audit:
try:
batch_request_data[dccommon_consts.ENDPOINT_TYPE_FIRMWARE] = (
endpoint_data[dccommon_consts.ENDPOINT_TYPE_FIRMWARE] = (
self.firmware_audit.subcloud_firmware_audit(
sysinv_client, subcloud_name, firmware_audit_data
)
@@ -508,7 +496,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
# Perform kubernetes audit
if do_kubernetes_audit:
try:
batch_request_data[dccommon_consts.ENDPOINT_TYPE_KUBERNETES] = (
endpoint_data[dccommon_consts.ENDPOINT_TYPE_KUBERNETES] = (
self.kubernetes_audit.subcloud_kubernetes_audit(
sysinv_client, subcloud_name, kubernetes_audit_data
)
@@ -520,7 +508,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
# Perform kube rootca update audit
if do_kube_rootca_update_audit:
try:
batch_request_data[dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA] = (
endpoint_data[dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA] = (
self.kube_rootca_update_audit.subcloud_kube_rootca_audit(
sysinv_client, fm_client, subcloud,
kube_rootca_update_audit_data
@@ -545,7 +533,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
# Perform software audit
if do_software_audit:
try:
batch_request_data[dccommon_consts.ENDPOINT_TYPE_SOFTWARE] = (
endpoint_data[dccommon_consts.ENDPOINT_TYPE_SOFTWARE] = (
self.software_audit.subcloud_software_audit(
keystone_client, subcloud_management_ip,
subcloud_name, subcloud_region, software_audit_data
@@ -556,19 +544,23 @@ class SubcloudAuditWorkerManager(manager.Manager):
LOG.exception(failmsg % (subcloud.name, 'software'))
failures.append('software')
if any(batch_request_data.values()):
# If a value is not None, an update should be sent to dcmanager-state
if availability_data or (endpoint_data and any(endpoint_data.values())):
# If a value is not None, an update should be sent to the rpc client
try:
self.state_rpc_client.\
batch_update_subcloud_availability_and_endpoint_status(
bulk_update_subcloud_availability_and_endpoint_status(
self.context, subcloud_name, subcloud_region,
batch_request_data
availability_data, endpoint_data
)
LOG.info('Notifying dcmanager-state, subcloud:%s, batch '
'availability and endpoint status update' % subcloud_name)
LOG.debug(
f'Notifying dcmanager-state, subcloud: {subcloud_name}, bulk '
'availability and endpoint status update'
)
except Exception:
LOG.exception('Failed to notify dcmanager-state of subcloud '
'batch availability and endpoint status update, '
'subcloud: %s' % subcloud_name)
LOG.exception(
'Failed to notify dcmanager-state of subcloud batch '
'availability and endpoint status update, '
f'subcloud: {subcloud_name}'
)
return audits_done, failures

View File

@@ -296,6 +296,14 @@ def subcloud_status_update_endpoints(context, subcloud_id,
endpoint_type_list, sync_status)
def subcloud_status_bulk_update_endpoints(context, subcloud_id, endpoint_list):
"""Update the status of the specified endpoints for a subcloud"""
return IMPL.subcloud_status_bulk_update_endpoints(
context, subcloud_id, endpoint_list
)
def subcloud_status_destroy_all(context, subcloud_id):
"""Destroy all the statuses for a subcloud

View File

@@ -30,12 +30,14 @@ from oslo_log import log as logging
from oslo_utils import strutils
from oslo_utils import uuidutils
import sqlalchemy
from sqlalchemy import bindparam
from sqlalchemy import desc
from sqlalchemy import or_
from sqlalchemy.orm import exc
from sqlalchemy.orm import joinedload_all
from sqlalchemy.orm import load_only
from sqlalchemy.sql.expression import true
from sqlalchemy import update
from dccommon import consts as dccommon_consts
from dcmanager.common import consts
@@ -551,6 +553,14 @@ def subcloud_status_get_all(context, subcloud_id):
filter(models.Subcloud.id == subcloud_id).all()
@require_context
def _subcloud_status_get_by_endpoint_types(context, subcloud_id, endpoint_types):
return model_query(context, models.SubcloudStatus). \
filter_by(deleted=0). \
filter(models.SubcloudStatus.subcloud_id == subcloud_id).\
filter(models.SubcloudStatus.endpoint_type.in_(endpoint_types)).all()
@require_context
def subcloud_status_get_all_by_name(context, name):
return model_query(context, models.SubcloudStatus). \
@@ -620,6 +630,46 @@ def subcloud_status_update_endpoints(context, subcloud_id,
return result
@require_admin_context
def subcloud_status_bulk_update_endpoints(context, subcloud_id, endpoint_list):
"""Update the status of the specified endpoints for a subcloud
Will raise if subcloud status does not exist.
"""
# Retrieves the subcloud status' data for all of the endpoints in endpoint_lst
subcloud_statuses = _subcloud_status_get_by_endpoint_types(
context, subcloud_id, endpoint_list.keys()
)
# Create a list with the id of each subcloud status that needs to be updated and
# its respective sync_status
update_list = list()
for subcloud_status in subcloud_statuses:
update_list.append({
"_id": subcloud_status.id,
"sync_status": endpoint_list[subcloud_status.endpoint_type]
})
# Bindparam associates keys from update_list to columns in the database
# query. This way, for each of the items that needs update, it's possible to
# set a specific sync_status, i.e. the query is capable of updating many
# endpoints with each of them having one of three values:
# in-sync, out-of-sync and unknown.
with write_session() as session:
statement = update(models.SubcloudStatus).\
where(models.SubcloudStatus.id == bindparam("_id")).\
values(sync_status=bindparam("sync_status"))
result = session.execute(statement, update_list)
if not result:
raise exception.SubcloudStatusNotFound(
subcloud_id=subcloud_id, endpoint_type="any"
)
return result
@require_admin_context
def subcloud_status_destroy_all(context, subcloud_id):
with write_session() as session:

View File

@@ -69,15 +69,17 @@ class SubcloudStateClient(RPCClient):
consts.TOPIC_DC_MANAGER_STATE,
self.BASE_RPC_API_VERSION)
def batch_update_subcloud_availability_and_endpoint_status(
self, ctxt, subcloud_name, subcloud_region, availability_and_endpoint_data
def bulk_update_subcloud_availability_and_endpoint_status(
self, ctxt, subcloud_name, subcloud_region, availability_data,
endpoint_data
):
# Note: This is an asynchronous operation.
return self.cast(ctxt, self.make_msg(
'batch_update_subcloud_availability_and_endpoint_status',
'bulk_update_subcloud_availability_and_endpoint_status',
subcloud_name=subcloud_name,
subcloud_region=subcloud_region,
availability_and_endpoint_data=availability_and_endpoint_data)
availability_data=availability_data,
endpoint_data=endpoint_data)
)
def update_subcloud_availability(self, ctxt,

View File

@@ -191,16 +191,17 @@ class DCManagerStateService(service.Service):
audit_fail_count,
)
def batch_update_subcloud_availability_and_endpoint_status(
self, context, subcloud_name, subcloud_region, availability_and_endpoint_data
def bulk_update_subcloud_availability_and_endpoint_status(
self, context, subcloud_name, subcloud_region, availability_data,
endpoint_data
):
LOG.info(
"Handling batch_update_subcloud_availability_and_endpoint_status request "
"Handling bulk_update_subcloud_availability_and_endpoint_status request "
f"for subcloud: {subcloud_name}"
)
self.subcloud_state_manager.\
batch_update_subcloud_availability_and_endpoint_status(
context, subcloud_name, subcloud_region,
availability_and_endpoint_data
bulk_update_subcloud_availability_and_endpoint_status(
context, subcloud_name, subcloud_region, availability_data,
endpoint_data
)

View File

@@ -19,6 +19,7 @@
from fm_api import constants as fm_const
from fm_api import fm_api
from oslo_concurrency import lockutils
from oslo_log import log as logging
from dccommon import consts as dccommon_consts
@@ -34,13 +35,14 @@ from dcorch.rpc import client as dcorch_rpc_client
LOG = logging.getLogger(__name__)
ALARM_OUT_OF_SYNC = fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC
LOCK_NAME = "dc-audit-bulk-update"
def sync_update_subcloud_endpoint_status(func):
"""Synchronized lock decorator for _update_subcloud_endpoint_status. """
"""Synchronized lock decorator for _update_subcloud_endpoint_status."""
def _get_lock_and_call(*args, **kwargs):
"""Get a single fair lock per subcloud based on subcloud region. """
"""Get a single fair lock per subcloud based on subcloud region."""
# subcloud region is the 3rd argument to
# _update_subcloud_endpoint_status()
@@ -264,6 +266,43 @@ class SubcloudStateManager(manager.Manager):
else:
LOG.error("Subcloud not found:%s" % subcloud_id)
def _should_update_endpoint_status(self, subcloud, endpoint_type, sync_status):
"""Verifies if the subcloud's endpoint should have its sync status updated"""
# Rules for updating sync status:
#
# For secondary subclouds, only update if the new sync_status is
# 'unknown'
#
# For others, always update if not in-sync.
#
# Otherwise, only update the sync status if managed and online
# (unless dc-cert).
#
# Most endpoints are audited only when the subcloud is managed and
# online. An exception is the dc-cert endpoint, which is audited
# whenever the subcloud is online (managed or unmanaged).
#
# This means if a subcloud is going offline or unmanaged, then
# the sync status update must be done first.
#
is_in_sync = sync_status == dccommon_consts.SYNC_STATUS_IN_SYNC
is_online = subcloud.availability_status == \
dccommon_consts.AVAILABILITY_ONLINE
is_managed = subcloud.management_state == \
dccommon_consts.MANAGEMENT_MANAGED
is_endpoint_type_dc_cert = endpoint_type == \
dccommon_consts.ENDPOINT_TYPE_DC_CERT
is_secondary = subcloud.deploy_status == consts.DEPLOY_STATE_SECONDARY
is_sync_unknown = sync_status == dccommon_consts.SYNC_STATUS_UNKNOWN
is_secondary_and_sync_unknown = is_secondary and is_sync_unknown
return (
(not is_in_sync
or (is_online and (is_managed or is_endpoint_type_dc_cert)))
and not is_secondary
) or is_secondary_and_sync_unknown
@sync_update_subcloud_endpoint_status
def _update_subcloud_endpoint_status(
self, context,
@@ -297,47 +336,13 @@ class SubcloudStateManager(manager.Manager):
LOG.exception(e)
raise e
# Rules for updating sync status:
#
# For secondary subclouds, only update if the new sync_status is
# 'unknown'
#
# For others, always update if not in-sync.
#
# Otherwise, only update the sync status if managed and online
# (unless dc-cert).
#
# Most endpoints are audited only when the subcloud is managed and
# online. An exception is the dc-cert endpoint, which is audited
# whenever the subcloud is online (managed or unmanaged).
#
# This means if a subcloud is going offline or unmanaged, then
# the sync status update must be done first.
#
is_in_sync = sync_status == dccommon_consts.SYNC_STATUS_IN_SYNC
is_online = subcloud.availability_status == \
dccommon_consts.AVAILABILITY_ONLINE
is_managed = subcloud.management_state == \
dccommon_consts.MANAGEMENT_MANAGED
is_endpoint_type_dc_cert = endpoint_type == \
dccommon_consts.ENDPOINT_TYPE_DC_CERT
is_secondary = subcloud.deploy_status == consts.DEPLOY_STATE_SECONDARY
is_sync_unknown = sync_status == dccommon_consts.SYNC_STATUS_UNKNOWN
is_secondary_and_sync_unknown = is_secondary and is_sync_unknown
if (
(not is_in_sync
or (is_online and (is_managed or is_endpoint_type_dc_cert)))
and not is_secondary
) or is_secondary_and_sync_unknown:
if self._should_update_endpoint_status(subcloud, endpoint_type, sync_status):
# update a single subcloud
try:
self._do_update_subcloud_endpoint_status(context,
subcloud.id,
endpoint_type,
sync_status,
alarmable,
ignore_endpoints)
self._do_update_subcloud_endpoint_status(
context, subcloud.id, endpoint_type, sync_status,
alarmable, ignore_endpoints
)
except Exception as e:
LOG.exception(e)
raise e
@@ -347,23 +352,134 @@ class SubcloudStateManager(manager.Manager):
(subcloud.name, subcloud.availability_status,
subcloud.management_state, endpoint_type, sync_status))
def batch_update_subcloud_availability_and_endpoint_status(
self, context, subcloud_name, subcloud_region, availability_and_endpoint_data
def bulk_update_subcloud_availability_and_endpoint_status(
self, context, subcloud_name, subcloud_region, availability_data,
endpoint_data
):
for key, value in availability_and_endpoint_data.items():
# If the value is None, that means nothing should be done for that key
if value is None:
continue
# This bulk update is executed as part of the audit process in dcmanager and
# its related endpoints. This method is not used by dcorch and cert-mon.
if key == "availability":
self.update_subcloud_availability(
context, subcloud_region, value["availability_status"],
value["update_state_only"], value["audit_fail_count"]
try:
subcloud = db_api.subcloud_get_by_region_name(context, subcloud_region)
except Exception:
LOG.exception(
f"Failed to get subcloud by region name {subcloud_region}"
)
raise
if availability_data:
self.update_subcloud_availability(
context, subcloud_region, availability_data["availability_status"],
availability_data["update_state_only"],
availability_data["audit_fail_count"], subcloud
)
if endpoint_data:
self._bulk_update_subcloud_endpoint_status(
context, subcloud, endpoint_data
)
@lockutils.synchronized(LOCK_NAME)
def _do_bulk_update_subcloud_endpoint_status(
self, context, subcloud, endpoint_list
):
"""Updates an online and managed subcloud's endpoints sync status
:param context: request context object
:param subcloud: subcloud to update
:param endpoint_list: the list of endpoints and its sync status to update
"""
# This bulk update is executed as part of the audit process and, because of
# that, the logic is similar to _do_update_subcloud_endpoint_status but with
# the difference that only the required endpoints will be update and that'll
# happen at once.
LOG.info(
f"Updating endpoints on subcloud: {subcloud.name} "
f"endpoints: {', '.join(endpoint_list.keys())}"
)
for endpoint, sync_status in endpoint_list.items():
entity_instance_id = f"subcloud={subcloud.name}.resource={endpoint}"
fault = self.fm_api.get_fault(ALARM_OUT_OF_SYNC, entity_instance_id)
# TODO(yuxing): batch clear all the out-of-sync alarms of a
# given subcloud if fm_api support it. Be careful with the
# dc-cert endpoint when adding the above; the endpoint
# alarm must remain for offline subclouds.
if (sync_status != dccommon_consts.SYNC_STATUS_OUT_OF_SYNC) and fault:
try:
self.fm_api.clear_fault(ALARM_OUT_OF_SYNC, entity_instance_id)
except Exception as e:
LOG.exception(e)
elif not fault and \
(sync_status == dccommon_consts.SYNC_STATUS_OUT_OF_SYNC):
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
try:
fault = fm_api.Fault(
alarm_id=ALARM_OUT_OF_SYNC,
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=entity_type_id,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
reason_text=("%s %s sync_status is "
"out-of-sync" %
(subcloud.name, endpoint)),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
proposed_repair_action="If problem persists "
"contact next level "
"of support",
service_affecting=False)
self.fm_api.set_fault(fault)
except Exception as e:
LOG.exception(e)
try:
db_api.subcloud_status_bulk_update_endpoints(
context, subcloud.id, endpoint_list,
)
except Exception as e:
LOG.exception(
f"An error occured when updating the subcloud {subcloud.name}'s"
f"endpoint status: {e}"
)
def _bulk_update_subcloud_endpoint_status(
self, context, subcloud, endpoint_list
):
"""Update the sync status of a list of subcloud endpoints
:param context: current context object
:param subcloud: subcloud object
:param endpoint_list: list of endpoints to update and their sync status
"""
endpoints_to_update = dict()
for endpoint_type, sync_status in endpoint_list.items():
if self._should_update_endpoint_status(
subcloud, endpoint_type, sync_status
):
endpoints_to_update.update({endpoint_type: sync_status})
# Update all the necessary endpoints for a single subcloud
if endpoints_to_update:
try:
self._do_bulk_update_subcloud_endpoint_status(
context, subcloud, endpoints_to_update
)
continue
self.update_subcloud_endpoint_status(
context, subcloud_region=subcloud_region, endpoint_type=key,
sync_status=value
except Exception as e:
LOG.exception(e)
raise e
else:
LOG.info(
"Ignoring bulk_update_subcloud_endpoint_status for subcloud: "
f"{subcloud.name} availability: {subcloud.availability_status} "
f"management: {subcloud.management_state} endpoints: "
f"{', '.join(endpoint_list.keys())}"
)
def update_subcloud_endpoint_status(
@@ -461,14 +577,16 @@ class SubcloudStateManager(manager.Manager):
def update_subcloud_availability(self, context, subcloud_region,
availability_status,
update_state_only=False,
audit_fail_count=None):
try:
subcloud = db_api.subcloud_get_by_region_name(context, subcloud_region)
except Exception:
LOG.exception(
"Failed to get subcloud by region name %s" % subcloud_region
)
raise
audit_fail_count=None, subcloud=None):
if subcloud is None:
try:
subcloud = db_api.subcloud_get_by_region_name(context,
subcloud_region)
except Exception:
LOG.exception(
"Failed to get subcloud by region name %s" % subcloud_region
)
raise
if update_state_only:
# Ensure that the status alarm is consistent with the
@@ -502,9 +620,14 @@ class SubcloudStateManager(manager.Manager):
if availability_status == dccommon_consts.AVAILABILITY_OFFLINE:
# Subcloud is going offline, set all endpoint statuses to
# unknown.
self._update_subcloud_endpoint_status(
context, subcloud.region_name, endpoint_type=None,
sync_status=dccommon_consts.SYNC_STATUS_UNKNOWN)
endpoint_list = dict()
for endpoint in dccommon_consts.ENDPOINT_TYPES_LIST:
endpoint_list[endpoint] = dccommon_consts.SYNC_STATUS_UNKNOWN
self._bulk_update_subcloud_endpoint_status(
context, subcloud, endpoint_list
)
try:
updated_subcloud = db_api.subcloud_update(

View File

@@ -364,32 +364,33 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
self.mock_sysinv_client().get_applications.return_value = \
FAKE_APPLICATIONS
self.batch_state_request_data = {
"availability": None,
dccommon_consts.ENDPOINT_TYPE_PATCHING: None,
dccommon_consts.ENDPOINT_TYPE_LOAD: None,
dccommon_consts.ENDPOINT_TYPE_FIRMWARE: None,
dccommon_consts.ENDPOINT_TYPE_KUBERNETES: None,
dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA: None,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE: None
}
self.availability_data = dict()
self.endpoint_data = dict()
def _update_availability(
self, availability_status, update_status_only, audit_fail_count
):
self.batch_state_request_data.update({
"availability": {
"availability_status": availability_status,
"update_state_only": update_status_only,
"audit_fail_count": audit_fail_count
}
self.availability_data.update({
"availability_status": availability_status,
"update_state_only": update_status_only,
"audit_fail_count": audit_fail_count
})
def _set_all_audits_in_sync(self):
for key in self.batch_state_request_data:
if key != "availability":
self.batch_state_request_data[key] = \
dccommon_consts.SYNC_STATUS_IN_SYNC
self.endpoint_data.update({
dccommon_consts.ENDPOINT_TYPE_PATCHING:
dccommon_consts.SYNC_STATUS_IN_SYNC,
dccommon_consts.ENDPOINT_TYPE_LOAD:
dccommon_consts.SYNC_STATUS_IN_SYNC,
dccommon_consts.ENDPOINT_TYPE_FIRMWARE:
dccommon_consts.SYNC_STATUS_IN_SYNC,
dccommon_consts.ENDPOINT_TYPE_KUBERNETES:
dccommon_consts.SYNC_STATUS_IN_SYNC,
dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA:
dccommon_consts.SYNC_STATUS_IN_SYNC,
dccommon_consts.ENDPOINT_TYPE_SOFTWARE:
dccommon_consts.SYNC_STATUS_IN_SYNC
})
@staticmethod
def create_subcloud_static(ctxt, **kwargs):
@@ -477,10 +478,10 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
self._update_availability(dccommon_consts.AVAILABILITY_ONLINE, False, 0)
self._set_all_audits_in_sync()
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_called_once_with(
mock.ANY, subcloud.name, subcloud.region_name,
self.batch_state_request_data
self.availability_data, self.endpoint_data
)
# Verify the _update_subcloud_audit_fail_count is not called
@@ -576,10 +577,10 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the subcloud was set to online
self._update_availability(dccommon_consts.AVAILABILITY_ONLINE, False, 0)
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_called_with(
mock.ANY, subcloud.name, subcloud.region_name,
self.batch_state_request_data
self.availability_data, self.endpoint_data
)
# Verify the _update_subcloud_audit_fail_count is not called
@@ -652,10 +653,10 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the subcloud was set to online
self._update_availability(dccommon_consts.AVAILABILITY_ONLINE, False, 0)
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_called_with(
mock.ANY, subcloud.name, subcloud.region_name,
self.batch_state_request_data
self.availability_data, self.endpoint_data
)
# Verify the _update_subcloud_audit_fail_count is not called
@@ -712,7 +713,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the subcloud state was not updated
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called
@@ -762,10 +763,10 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
dccommon_consts.AVAILABILITY_ONLINE, True, None
)
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_called_with(
mock.ANY, subcloud.name, subcloud.region_name,
self.batch_state_request_data
self.availability_data, self.endpoint_data
)
# Verify the _update_subcloud_audit_fail_count is not called
@@ -880,10 +881,10 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the state was called only for the audits
self._set_all_audits_in_sync()
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_called_once_with(
mock.ANY, subcloud.name, subcloud.region_name,
self.batch_state_request_data
self.availability_data, self.endpoint_data
)
# Update the DB like dcmanager would do.
@@ -914,10 +915,10 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the subcloud state was not called
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_called_once_with(
mock.ANY, subcloud.name, subcloud.region_name,
self.batch_state_request_data
self.availability_data, self.endpoint_data
)
# Verify alarm update is called only once
@@ -989,7 +990,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the subcloud state was not updated
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called
@@ -1108,10 +1109,10 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify that the subcloud was updated to offline
self._update_availability(dccommon_consts.AVAILABILITY_OFFLINE, False, 2)
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_called_with(
mock.ANY, subcloud.name, subcloud.region_name,
self.batch_state_request_data
self.availability_data, self.endpoint_data
)
def test_audit_subcloud_offline_update_audit_fail_count_only(self):
@@ -1173,7 +1174,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the subcloud state was not updated
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_not_called()
# Verify the openstack endpoints were not updated
@@ -1246,7 +1247,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the subcloud state was not updated
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called
@@ -1314,7 +1315,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the subcloud state was not updated
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called
@@ -1381,7 +1382,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify the subcloud state was not updated
self.mock_dcmanager_state_api().\
batch_update_subcloud_availability_and_endpoint_status.\
bulk_update_subcloud_availability_and_endpoint_status.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called

View File

@@ -2250,6 +2250,104 @@ class TestSubcloudUpdate(BaseTestSubcloudManager):
self.mock_dcmanager_api().subcloud_online.\
assert_called_once_with(self.ctx, self.subcloud.region_name)
def test_bulk_update_subcloud_availability_and_endpoint_status(self):
availability_data = {
"availability_status": dccommon_consts.AVAILABILITY_OFFLINE,
"update_state_only": False,
"audit_fail_count": 1
}
endpoint_data = {
dccommon_consts.ENDPOINT_TYPE_LOAD: dccommon_consts.SYNC_STATUS_IN_SYNC,
dccommon_consts.ENDPOINT_TYPE_FIRMWARE:
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
}
endpoints = db_api.subcloud_status_get_all(self.ctx, self.subcloud.id)
db_api.subcloud_update(
self.ctx, self.subcloud.id,
availability_status=dccommon_consts.AVAILABILITY_ONLINE,
management_state=dccommon_consts.MANAGEMENT_MANAGED
)
ssm = subcloud_state_manager.SubcloudStateManager()
ssm.bulk_update_subcloud_availability_and_endpoint_status(
self.ctx, self.subcloud.name, self.subcloud.region_name,
availability_data, endpoint_data
)
updated_subcloud = db_api.subcloud_get(self.ctx, self.subcloud.id)
self.assertEqual(
updated_subcloud.availability_status,
availability_data["availability_status"]
)
new_endpoints = db_api.subcloud_status_get_all(self.ctx, self.subcloud.id)
for index, endpoint in enumerate(endpoints):
self.assertEqual(
endpoint.endpoint_type, new_endpoints[index].endpoint_type
)
if endpoint.endpoint_type in endpoint_data:
self.assertEqual(
new_endpoints[index].sync_status,
endpoint_data[endpoint.endpoint_type]
)
else:
self.assertEqual(
endpoint.sync_status, new_endpoints[index].sync_status
)
@mock.patch.object(
db_api, "subcloud_status_bulk_update_endpoints",
wraps=db_api.subcloud_status_bulk_update_endpoints
)
def test_bulk_update_endpoint_status_when_endpoint_status_is_the_same(
self, mock_db
):
"""Test bulk_update_endpoint_status updates the endpoint with same status
When the endpoint's status in the database is the same as the one it'll be
updated to, ensure that, instead of validating, bulk_update_endpoint_status
sets the same value in the database
"""
db_api.subcloud_update(
self.ctx, self.subcloud.id,
availability_status=dccommon_consts.AVAILABILITY_ONLINE,
management_state=dccommon_consts.MANAGEMENT_MANAGED
)
endpoint_data = {
dccommon_consts.ENDPOINT_TYPE_LOAD: dccommon_consts.SYNC_STATUS_IN_SYNC,
dccommon_consts.ENDPOINT_TYPE_FIRMWARE:
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
}
ssm = subcloud_state_manager.SubcloudStateManager()
ssm.bulk_update_subcloud_availability_and_endpoint_status(
self.ctx, self.subcloud.name, self.subcloud.region_name,
None, endpoint_data
)
self.assertEqual(mock_db.call_count, 1)
# Re-executing the method should result in the same amount of call counts
# for the database query since there are no updates
ssm.bulk_update_subcloud_availability_and_endpoint_status(
self.ctx, self.subcloud.name, self.subcloud.region_name,
None, endpoint_data
)
self.assertEqual(mock_db.call_count, 2)
def test_bulk_update_fails_with_invalid_region(self):
ssm = subcloud_state_manager.SubcloudStateManager()
self.assertRaises(
exceptions.SubcloudRegionNameNotFound,
ssm.bulk_update_subcloud_availability_and_endpoint_status,
self.ctx, self.subcloud.name, "fake", None, None
)
@mock.patch.object(subcloud_state_manager.SubcloudStateManager,
'_raise_or_clear_subcloud_status_alarm')
def test_update_state_only(self, mock_update_status_alarm):
@@ -2336,11 +2434,7 @@ class TestSubcloudUpdate(BaseTestSubcloudManager):
ssm = subcloud_state_manager.SubcloudStateManager()
# create sync statuses for endpoints and set them to in-sync
for endpoint in [dccommon_consts.ENDPOINT_TYPE_PLATFORM,
dccommon_consts.ENDPOINT_TYPE_IDENTITY,
dccommon_consts.ENDPOINT_TYPE_PATCHING,
dccommon_consts.ENDPOINT_TYPE_FM,
dccommon_consts.ENDPOINT_TYPE_NFV]:
for endpoint in dccommon_consts.ENDPOINT_TYPES_LIST:
db_api.subcloud_status_create(
self.ctx, self.subcloud.id, endpoint)
ssm.update_subcloud_endpoint_status(