Apply black formatter to dcmanager/audit

This commit applies the Black format to the `dcmanager/audit`
files to ensure that it adheres to the Black code style guidelines.

Test Plan:
PASS: Success in stx-distcloud-tox-black

Story: 2011149
Task: 50444

Change-Id: I3d4bc5051d92b8d5546b9b31d2b0d532f98212b4
Signed-off-by: Hugo Brito <hugo.brito@windriver.com>
This commit is contained in:
Hugo Brito
2024-06-28 17:32:31 -03:00
parent 185056ac72
commit 7a8694fab1
11 changed files with 441 additions and 326 deletions

View File

@@ -37,10 +37,10 @@ class AlarmAggregation(object):
try:
alarms = fm_client.get_alarm_summary()
alarm_updates = {
'critical_alarms': alarms[0].critical,
'major_alarms': alarms[0].major,
'minor_alarms': alarms[0].minor,
'warnings': alarms[0].warnings
"critical_alarms": alarms[0].critical,
"major_alarms": alarms[0].major,
"minor_alarms": alarms[0].minor,
"warnings": alarms[0].warnings,
}
alarm_updates = cls._set_cloud_status(alarm_updates)
return alarm_updates
@@ -57,12 +57,13 @@ class AlarmAggregation(object):
@staticmethod
def _set_cloud_status(alarm_dict):
if alarm_dict.get('critical_alarms') > 0:
if alarm_dict.get("critical_alarms") > 0:
status = consts.ALARM_CRITICAL_STATUS
elif (alarm_dict.get('major_alarms') > 0) or \
(alarm_dict.get('minor_alarms') > 0):
elif (alarm_dict.get("major_alarms") > 0) or (
alarm_dict.get("minor_alarms") > 0
):
status = consts.ALARM_DEGRADED_STATUS
else:
status = consts.ALARM_OK_STATUS
alarm_dict['cloud_status'] = status
alarm_dict["cloud_status"] = status
return alarm_dict

View File

@@ -299,8 +299,7 @@ class FirmwareAudit(object):
subcloud_device_images = sysinv_client.get_device_images()
if subcloud_device_images:
subcloud_device_images = {
image.uuid: image
for image in subcloud_device_images
image.uuid: image for image in subcloud_device_images
}
msg = f"Device_images: {subcloud_device_images}"
log_subcloud_msg(LOG.debug, msg, subcloud_name)
@@ -380,7 +379,7 @@ class FirmwareAudit(object):
if sync_status:
LOG.info(
f'Firmware audit completed for: {subcloud_name}, requesting sync_status'
f'update to {sync_status}'
f"Firmware audit completed for: {subcloud_name}, requesting sync_status"
f"update to {sync_status}"
)
return sync_status

View File

@@ -175,7 +175,7 @@ class KubernetesAudit(object):
if sync_status:
LOG.info(
f'Kubernetes audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
f"Kubernetes audit completed for: {subcloud_name}, requesting "
f"sync_status update to {sync_status}"
)
return sync_status

View File

@@ -29,9 +29,9 @@ LOG = logging.getLogger(__name__)
class PatchAuditData(object):
def __init__(self, patches, applied_patch_ids,
committed_patch_ids,
software_version):
def __init__(
self, patches, applied_patch_ids, committed_patch_ids, software_version
):
self.patches = patches
self.applied_patch_ids = applied_patch_ids
self.committed_patch_ids = committed_patch_ids
@@ -39,10 +39,10 @@ class PatchAuditData(object):
def to_dict(self):
return {
'patches': self.patches,
'applied_patch_ids': self.applied_patch_ids,
'committed_patch_ids': self.committed_patch_ids,
'software_version': self.software_version,
"patches": self.patches,
"applied_patch_ids": self.applied_patch_ids,
"committed_patch_ids": self.committed_patch_ids,
"software_version": self.software_version,
}
@classmethod
@@ -56,7 +56,7 @@ class PatchAudit(object):
"""Manages tasks related to patch audits."""
def __init__(self, context):
LOG.debug('PatchAudit initialization...')
LOG.debug("PatchAudit initialization...")
self.context = context
self.audit_count = 0
@@ -66,8 +66,10 @@ class PatchAudit(object):
try:
upgrades = sysinv_client.get_upgrades()
except Exception:
LOG.exception('Cannot retrieve upgrade info for subcloud: %s' %
sysinv_client.region_name)
LOG.exception(
"Cannot retrieve upgrade info for subcloud: %s"
% sysinv_client.region_name
)
return upgrades
def get_regionone_audit_data(self):
@@ -84,17 +86,20 @@ class PatchAudit(object):
region_clients=None,
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips,
).keystone_client
patching_endpoint = m_os_ks_client.endpoint_cache.get_endpoint(
'patching')
sysinv_endpoint = m_os_ks_client.endpoint_cache.get_endpoint('sysinv')
patching_endpoint = m_os_ks_client.endpoint_cache.get_endpoint("patching")
sysinv_endpoint = m_os_ks_client.endpoint_cache.get_endpoint("sysinv")
patching_client = PatchingClient(
dccommon_consts.DEFAULT_REGION_NAME, m_os_ks_client.session,
endpoint=patching_endpoint)
dccommon_consts.DEFAULT_REGION_NAME,
m_os_ks_client.session,
endpoint=patching_endpoint,
)
sysinv_client = SysinvClient(
dccommon_consts.DEFAULT_REGION_NAME, m_os_ks_client.session,
endpoint=sysinv_endpoint)
dccommon_consts.DEFAULT_REGION_NAME,
m_os_ks_client.session,
endpoint=sysinv_endpoint,
)
except Exception:
LOG.exception('Failure initializing OS Client, skip patch audit.')
LOG.exception("Failure initializing OS Client, skip patch audit.")
return None
# First query RegionOne to determine what patches should be applied
@@ -104,8 +109,7 @@ class PatchAudit(object):
# Get the active software version in RegionOne as it may be needed
# later for subcloud load audit.
regionone_software_version = \
sysinv_client.get_system().software_version
regionone_software_version = sysinv_client.get_system().software_version
# Build lists of patches that should be applied or committed in all
# subclouds, based on their state in RegionOne. Check repostate
@@ -114,22 +118,35 @@ class PatchAudit(object):
applied_patch_ids = list()
committed_patch_ids = list()
for patch_id in regionone_patches.keys():
if regionone_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_APPLIED:
if (
regionone_patches[patch_id]["repostate"]
== patching_v1.PATCH_STATE_APPLIED
):
applied_patch_ids.append(patch_id)
elif regionone_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_COMMITTED:
elif (
regionone_patches[patch_id]["repostate"]
== patching_v1.PATCH_STATE_COMMITTED
):
committed_patch_ids.append(patch_id)
LOG.debug("RegionOne applied_patch_ids: %s" % applied_patch_ids)
LOG.debug("RegionOne committed_patch_ids: %s" % committed_patch_ids)
return PatchAuditData(regionone_patches, applied_patch_ids,
committed_patch_ids, regionone_software_version)
return PatchAuditData(
regionone_patches,
applied_patch_ids,
committed_patch_ids,
regionone_software_version,
)
def subcloud_patch_audit(
self, keystone_session, sysinv_client, subcloud_management_ip, subcloud_name,
subcloud_region, audit_data
self,
keystone_session,
sysinv_client,
subcloud_management_ip,
subcloud_name,
subcloud_region,
audit_data,
):
LOG.info('Triggered patch audit for: %s.' % subcloud_name)
LOG.info("Triggered patch audit for: %s." % subcloud_name)
try:
patching_endpoint = build_subcloud_endpoint(
@@ -138,22 +155,27 @@ class PatchAudit(object):
patching_client = PatchingClient(
subcloud_region, keystone_session, endpoint=patching_endpoint
)
except (keystone_exceptions.EndpointNotFound,
keystone_exceptions.ConnectFailure,
keystone_exceptions.ConnectTimeout,
IndexError):
LOG.exception("Endpoint for online subcloud %s not found, skip "
"patch audit." % subcloud_name)
except (
keystone_exceptions.EndpointNotFound,
keystone_exceptions.ConnectFailure,
keystone_exceptions.ConnectTimeout,
IndexError,
):
LOG.exception(
"Endpoint for online subcloud %s not found, skip "
"patch audit." % subcloud_name
)
return None
# Retrieve all the patches that are present in this subcloud.
try:
subcloud_patches = patching_client.query()
LOG.debug("Patches for subcloud %s: %s" %
(subcloud_name, subcloud_patches))
LOG.debug("Patches for subcloud %s: %s" % (subcloud_name, subcloud_patches))
except Exception:
LOG.warn('Cannot retrieve patches for subcloud: %s, skip patch '
'audit' % subcloud_name)
LOG.warn(
"Cannot retrieve patches for subcloud: %s, skip patch "
"audit" % subcloud_name
)
return None
# Determine which loads are present in this subcloud. During an
@@ -161,8 +183,10 @@ class PatchAudit(object):
try:
loads = sysinv_client.get_loads()
except Exception:
LOG.exception('Cannot retrieve installed loads for subcloud: %s, '
'skip patch audit' % subcloud_name)
LOG.exception(
"Cannot retrieve installed loads for subcloud: %s, "
"skip patch audit" % subcloud_name
)
return None
installed_loads = utils.get_loads_for_patching(loads)
@@ -177,22 +201,34 @@ class PatchAudit(object):
# subcloud, we use the patchstate because we care whether the
# patch is installed on the hosts.
for patch_id in subcloud_patches.keys():
if subcloud_patches[patch_id]['patchstate'] == \
patching_v1.PATCH_STATE_APPLIED:
if (
subcloud_patches[patch_id]["patchstate"]
== patching_v1.PATCH_STATE_APPLIED
):
if patch_id not in audit_data.applied_patch_ids:
if patch_id not in audit_data.committed_patch_ids:
LOG.debug("Patch %s should not be applied in %s" %
(patch_id, subcloud_name))
LOG.debug(
"Patch %s should not be applied in %s"
% (patch_id, subcloud_name)
)
else:
LOG.debug("Patch %s should be committed in %s" %
(patch_id, subcloud_name))
LOG.debug(
"Patch %s should be committed in %s"
% (patch_id, subcloud_name)
)
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
elif subcloud_patches[patch_id]['patchstate'] == \
patching_v1.PATCH_STATE_COMMITTED:
if (patch_id not in audit_data.committed_patch_ids and
patch_id not in audit_data.applied_patch_ids):
LOG.warn("Patch %s should not be committed in %s" %
(patch_id, subcloud_name))
elif (
subcloud_patches[patch_id]["patchstate"]
== patching_v1.PATCH_STATE_COMMITTED
):
if (
patch_id not in audit_data.committed_patch_ids
and patch_id not in audit_data.applied_patch_ids
):
LOG.warn(
"Patch %s should not be committed in %s"
% (patch_id, subcloud_name)
)
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
else:
# In steady state, all patches should either be applied
@@ -203,36 +239,36 @@ class PatchAudit(object):
# Check that all applied or committed patches in RegionOne are
# present in the subcloud.
for patch_id in audit_data.applied_patch_ids:
if audit_data.patches[patch_id]['sw_version'] in \
installed_loads and patch_id not in \
subcloud_patches:
LOG.debug("Patch %s missing from %s" %
(patch_id, subcloud_name))
if (
audit_data.patches[patch_id]["sw_version"] in installed_loads
and patch_id not in subcloud_patches
):
LOG.debug("Patch %s missing from %s" % (patch_id, subcloud_name))
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
for patch_id in audit_data.committed_patch_ids:
if audit_data.patches[patch_id]['sw_version'] in \
installed_loads and patch_id not in \
subcloud_patches:
LOG.debug("Patch %s missing from %s" %
(patch_id, subcloud_name))
if (
audit_data.patches[patch_id]["sw_version"] in installed_loads
and patch_id not in subcloud_patches
):
LOG.debug("Patch %s missing from %s" % (patch_id, subcloud_name))
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
LOG.info(
f'Patch audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
f"Patch audit completed for: {subcloud_name}, requesting "
f"sync_status update to {sync_status}"
)
return sync_status
def subcloud_load_audit(
self, sysinv_client, subcloud_name, audit_data
):
def subcloud_load_audit(self, sysinv_client, subcloud_name, audit_data):
# Check subcloud software version every other audit cycle
LOG.info('Triggered load audit for: %s.' % subcloud_name)
LOG.info("Triggered load audit for: %s." % subcloud_name)
try:
upgrades = sysinv_client.get_upgrades()
except Exception:
LOG.warn('Cannot retrieve upgrade info for: %s, skip '
'software version audit' % subcloud_name)
LOG.warn(
"Cannot retrieve upgrade info for: %s, skip "
"software version audit" % subcloud_name
)
return None
# audit_data will be a dict due to passing through RPC so objectify it
@@ -241,8 +277,7 @@ class PatchAudit(object):
if not upgrades:
# No upgrade in progress
subcloud_software_version = \
sysinv_client.get_system().software_version
subcloud_software_version = sysinv_client.get_system().software_version
if subcloud_software_version != audit_data.software_version:
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
@@ -252,7 +287,7 @@ class PatchAudit(object):
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
LOG.info(
f'Load audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
f"Load audit completed for: {subcloud_name}, requesting "
f"sync_status update to {sync_status}"
)
return sync_status

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023 Wind River Systems, Inc.
# Copyright (c) 2020-2024 Wind River Systems, Inc.
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
@@ -27,13 +27,14 @@ class ManagerAuditClient(object):
1.0 - Initial version
"""
BASE_RPC_API_VERSION = '1.0'
BASE_RPC_API_VERSION = "1.0"
def __init__(self, timeout=None):
self._client = messaging.get_rpc_client(
timeout=timeout,
topic=consts.TOPIC_DC_MANAGER_AUDIT,
version=self.BASE_RPC_API_VERSION)
version=self.BASE_RPC_API_VERSION,
)
@staticmethod
def make_msg(method, **kwargs):
@@ -56,37 +57,50 @@ class ManagerAuditClient(object):
return client.cast(ctxt, method, **kwargs)
def trigger_firmware_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_firmware_audit'))
return self.cast(ctxt, self.make_msg("trigger_firmware_audit"))
def trigger_kube_rootca_update_audit(self, ctxt):
return self.cast(ctxt,
self.make_msg('trigger_kube_rootca_update_audit'))
return self.cast(ctxt, self.make_msg("trigger_kube_rootca_update_audit"))
def trigger_kubernetes_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_kubernetes_audit'))
return self.cast(ctxt, self.make_msg("trigger_kubernetes_audit"))
def trigger_patch_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_patch_audit'))
return self.cast(ctxt, self.make_msg("trigger_patch_audit"))
def trigger_load_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_load_audit'))
return self.cast(ctxt, self.make_msg("trigger_load_audit"))
def trigger_software_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_software_audit'))
return self.cast(ctxt, self.make_msg("trigger_software_audit"))
def trigger_subcloud_audits(self, ctxt, subcloud_id, exclude_endpoints=None):
return self.cast(ctxt, self.make_msg('trigger_subcloud_audits',
subcloud_id=subcloud_id,
exclude_endpoints=exclude_endpoints))
return self.cast(
ctxt,
self.make_msg(
"trigger_subcloud_audits",
subcloud_id=subcloud_id,
exclude_endpoints=exclude_endpoints,
),
)
def trigger_subcloud_patch_load_audits(self, ctxt, subcloud_id):
return self.cast(ctxt, self.make_msg('trigger_subcloud_patch_load_audits',
subcloud_id=subcloud_id))
return self.cast(
ctxt,
self.make_msg(
"trigger_subcloud_patch_load_audits", subcloud_id=subcloud_id
),
)
def trigger_subcloud_endpoints_update(self, ctxt, subcloud_name, endpoints):
return self.cast(ctxt, self.make_msg('trigger_subcloud_endpoints_update',
subcloud_name=subcloud_name,
endpoints=endpoints))
return self.cast(
ctxt,
self.make_msg(
"trigger_subcloud_endpoints_update",
subcloud_name=subcloud_name,
endpoints=endpoints,
),
)
class ManagerAuditWorkerClient(object):
@@ -97,13 +111,14 @@ class ManagerAuditWorkerClient(object):
"""
# todo(abailey): Does the RPC version need to increment
BASE_RPC_API_VERSION = '1.0'
BASE_RPC_API_VERSION = "1.0"
def __init__(self, timeout=None):
self._client = messaging.get_rpc_client(
timeout=timeout,
topic=consts.TOPIC_DC_MANAGER_AUDIT_WORKER,
version=self.BASE_RPC_API_VERSION)
version=self.BASE_RPC_API_VERSION,
)
@staticmethod
def make_msg(method, **kwargs):
@@ -125,31 +140,44 @@ class ManagerAuditWorkerClient(object):
client = self._client
return client.cast(ctxt, method, **kwargs)
def audit_subclouds(self,
ctxt,
subcloud_ids,
patch_audit_data=None,
firmware_audit_data=None,
kubernetes_audit_data=None,
do_openstack_audit=False,
kube_rootca_update_data=None,
software_audit_data=None):
def audit_subclouds(
self,
ctxt,
subcloud_ids,
patch_audit_data=None,
firmware_audit_data=None,
kubernetes_audit_data=None,
do_openstack_audit=False,
kube_rootca_update_data=None,
software_audit_data=None,
):
"""Tell audit-worker to perform audit on the subclouds with these
subcloud IDs.
subcloud IDs.
"""
return self.cast(ctxt, self.make_msg(
'audit_subclouds',
subcloud_ids=subcloud_ids,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
kubernetes_audit_data=kubernetes_audit_data,
do_openstack_audit=do_openstack_audit,
kube_rootca_update_audit_data=kube_rootca_update_data,
software_audit_data=software_audit_data))
return self.cast(
ctxt,
self.make_msg(
"audit_subclouds",
subcloud_ids=subcloud_ids,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
kubernetes_audit_data=kubernetes_audit_data,
do_openstack_audit=do_openstack_audit,
kube_rootca_update_audit_data=kube_rootca_update_data,
software_audit_data=software_audit_data,
),
)
def update_subcloud_endpoints(self, ctxt, subcloud_name, endpoints):
"""Update endpoints of services for a subcloud region"""
return self.cast(ctxt, self.make_msg(
'update_subcloud_endpoints', subcloud_name=subcloud_name,
endpoints=endpoints), fanout=True, version=self.BASE_RPC_API_VERSION)
return self.cast(
ctxt,
self.make_msg(
"update_subcloud_endpoints",
subcloud_name=subcloud_name,
endpoints=endpoints,
),
fanout=True,
version=self.BASE_RPC_API_VERSION,
)

View File

@@ -8,9 +8,7 @@ from keystoneauth1 import exceptions as keystone_exceptions
from oslo_log import log as logging
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.keystone_v3 import (
KeystoneClient as ks_client
)
from dccommon.drivers.openstack.keystone_v3 import KeystoneClient as ks_client
from dccommon.drivers.openstack import sdk_platform
from dccommon.drivers.openstack import software_v1
from dccommon.endpoint_cache import build_subcloud_endpoint
@@ -104,9 +102,7 @@ class SoftwareAudit(object):
@classmethod
def get_subcloud_audit_data(
cls,
software_client: software_v1.SoftwareClient,
subcloud_name: str = None
cls, software_client: software_v1.SoftwareClient, subcloud_name: str = None
):
# Retrieve all the releases that are present in this subcloud.
try:
@@ -122,7 +118,7 @@ class SoftwareAudit(object):
cls,
software_client: software_v1.SoftwareClient,
audit_data: SoftwareAuditData,
subcloud_name: str = None
subcloud_name: str = None,
):
# Retrieve all the releases that are present in this subcloud.
subcloud_releases = cls.get_subcloud_audit_data(software_client)
@@ -190,7 +186,7 @@ class SoftwareAudit(object):
subcloud_management_ip: str,
subcloud_name: str,
subcloud_region: str,
audit_data: SoftwareAuditData
audit_data: SoftwareAuditData,
):
LOG.info(f"Triggered software audit for: {subcloud_name}.")
try:
@@ -218,7 +214,7 @@ class SoftwareAudit(object):
if sync_status:
LOG.info(
f'Software audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
f"Software audit completed for: {subcloud_name}, requesting "
f"sync_status update to {sync_status}"
)
return sync_status

View File

@@ -97,9 +97,7 @@ class SubcloudAuditManager(manager.Manager):
self.patch_audit_time = 0
self.firmware_audit = firmware_audit.FirmwareAudit()
self.kubernetes_audit = kubernetes_audit.KubernetesAudit()
self.kube_rootca_update_audit = (
kube_rootca_update_audit.KubeRootcaUpdateAudit()
)
self.kube_rootca_update_audit = kube_rootca_update_audit.KubeRootcaUpdateAudit()
self.software_audit = software_audit.SoftwareAudit()
def _add_missing_endpoints(self):
@@ -124,9 +122,7 @@ class SubcloudAuditManager(manager.Manager):
missing_endpoints = list(endpoint_type_set - subcloud_set)
for endpoint in missing_endpoints:
db_api.subcloud_status_create(
self.context, subcloud.id, endpoint
)
db_api.subcloud_status_create(self.context, subcloud.id, endpoint)
# Add a flag on a replicated filesystem to avoid re-running
# the DB checks for missing subcloud endpoints
open(file_path, "w").close()
@@ -267,8 +263,7 @@ class SubcloudAuditManager(manager.Manager):
# Determine whether to trigger a patch audit of each subcloud
if SubcloudAuditManager.force_patch_audit or (
current_time - self.patch_audit_time
>= CONF.scheduler.patch_audit_interval
current_time - self.patch_audit_time >= CONF.scheduler.patch_audit_interval
):
LOG.info("Trigger patch audit")
audit_patch = True
@@ -460,9 +455,7 @@ class SubcloudAuditManager(manager.Manager):
)
end = datetime.datetime.utcnow()
if num_fixed > 0:
LOG.info(
"Fixed up subcloud audit timestamp for %s subclouds." % num_fixed
)
LOG.info("Fixed up subcloud audit timestamp for %s subclouds." % num_fixed)
LOG.info("Fixup took %s seconds" % (end - start))
subcloud_ids = []
@@ -478,33 +471,44 @@ class SubcloudAuditManager(manager.Manager):
)
# Remove subclouds that don't qualify for this round of audit
for audit, subcloud_name, deploy_status, availability_status in (
list(subcloud_audits)):
for audit, subcloud_name, deploy_status, availability_status in list(
subcloud_audits
):
# Include failure deploy status states in the auditable list
# so that the subcloud can be set as offline
if (deploy_status not in
[consts.DEPLOY_STATE_DONE,
consts.DEPLOY_STATE_CONFIGURING,
consts.DEPLOY_STATE_CONFIG_FAILED,
consts.DEPLOY_STATE_CONFIG_ABORTED,
consts.DEPLOY_STATE_PRE_CONFIG_FAILED,
consts.DEPLOY_STATE_INSTALL_FAILED,
consts.DEPLOY_STATE_INSTALL_ABORTED,
consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
consts.DEPLOY_STATE_INSTALLING,
consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
consts.DEPLOY_STATE_UPGRADE_ACTIVATED,
consts.DEPLOY_STATE_RESTORING,
consts.DEPLOY_STATE_RESTORE_PREP_FAILED,
consts.DEPLOY_STATE_RESTORE_FAILED,
consts.DEPLOY_STATE_REHOME_PENDING]) or (
(deploy_status in [
if (
deploy_status
not in [
consts.DEPLOY_STATE_DONE,
consts.DEPLOY_STATE_CONFIGURING,
consts.DEPLOY_STATE_CONFIG_FAILED,
consts.DEPLOY_STATE_CONFIG_ABORTED,
consts.DEPLOY_STATE_PRE_CONFIG_FAILED,
consts.DEPLOY_STATE_INSTALL_FAILED,
consts.DEPLOY_STATE_INSTALL_ABORTED,
consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
consts.DEPLOY_STATE_INSTALLING,
consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
consts.DEPLOY_STATE_UPGRADE_ACTIVATED,
consts.DEPLOY_STATE_RESTORING,
consts.DEPLOY_STATE_RESTORE_PREP_FAILED,
consts.DEPLOY_STATE_RESTORE_FAILED,
consts.DEPLOY_STATE_REHOME_PENDING,
]
) or (
(
deploy_status
in [
consts.DEPLOY_STATE_INSTALLING,
consts.DEPLOY_STATE_REHOME_PENDING])
and availability_status ==
dccommon_consts.AVAILABILITY_OFFLINE):
LOG.debug("Skip subcloud %s audit, deploy_status: %s" %
(subcloud_name, deploy_status))
consts.DEPLOY_STATE_REHOME_PENDING,
]
)
and availability_status == dccommon_consts.AVAILABILITY_OFFLINE
):
LOG.debug(
"Skip subcloud %s audit, deploy_status: %s"
% (subcloud_name, deploy_status)
)
skipped_subcloud_ids.append(audit.subcloud_id)
else:
pruned_subcloud_audits.append(audit)

View File

@@ -52,24 +52,25 @@ LOG = logging.getLogger(__name__)
# We will update the state of each subcloud in the dcorch about once per hour.
# Calculate how many iterations that will be.
SUBCLOUD_STATE_UPDATE_ITERATIONS = \
SUBCLOUD_STATE_UPDATE_ITERATIONS = (
dccommon_consts.SECONDS_IN_HOUR // CONF.scheduler.subcloud_audit_interval
)
class SubcloudAuditWorkerManager(manager.Manager):
"""Manages tasks related to audits."""
def __init__(self, *args, **kwargs):
LOG.debug(_('SubcloudAuditWorkerManager initialization...'))
LOG.debug(_("SubcloudAuditWorkerManager initialization..."))
super(SubcloudAuditWorkerManager, self).__init__(
service_name="subcloud_audit_worker_manager")
service_name="subcloud_audit_worker_manager"
)
self.context = context.get_admin_context()
self.dcmanager_rpc_client = dcmanager_rpc_client.ManagerClient()
self.state_rpc_client = dcmanager_rpc_client.SubcloudStateClient()
# Keeps track of greenthreads we create to do work.
self.thread_group_manager = scheduler.ThreadGroupManager(
thread_pool_size=150)
self.thread_group_manager = scheduler.ThreadGroupManager(thread_pool_size=150)
# Track workers created for each subcloud.
self.subcloud_workers = dict()
self.alarm_aggr = alarm_aggregation.AlarmAggregation(self.context)
@@ -77,84 +78,92 @@ class SubcloudAuditWorkerManager(manager.Manager):
self.patch_audit = patch_audit.PatchAudit(self.context)
self.firmware_audit = firmware_audit.FirmwareAudit()
self.kubernetes_audit = kubernetes_audit.KubernetesAudit()
self.kube_rootca_update_audit = (
kube_rootca_update_audit.KubeRootcaUpdateAudit()
)
self.kube_rootca_update_audit = kube_rootca_update_audit.KubeRootcaUpdateAudit()
self.software_audit = software_audit.SoftwareAudit()
self.pid = os.getpid()
def audit_subclouds(self,
context,
subcloud_ids,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
do_openstack_audit,
kube_rootca_update_audit_data,
software_audit_data):
def audit_subclouds(
self,
context,
subcloud_ids,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
do_openstack_audit,
kube_rootca_update_audit_data,
software_audit_data,
):
"""Run audits of the specified subcloud(s)"""
LOG.debug('PID: %s, subclouds to audit: %s, do_openstack_audit: %s' %
(self.pid, subcloud_ids, do_openstack_audit))
LOG.debug(
"PID: %s, subclouds to audit: %s, do_openstack_audit: %s"
% (self.pid, subcloud_ids, do_openstack_audit)
)
for subcloud_id in subcloud_ids:
# Retrieve the subcloud and subcloud audit info
try:
subcloud = db_api.subcloud_get(self.context, subcloud_id)
subcloud_audits = db_api.subcloud_audits_get_and_start_audit(
self.context, subcloud_id)
self.context, subcloud_id
)
except exceptions.SubcloudNotFound:
# Possibility subcloud could have been deleted since the list of
# subclouds to audit was created.
LOG.info('Ignoring SubcloudNotFound when auditing subcloud %s' %
subcloud_id)
LOG.info(
"Ignoring SubcloudNotFound when auditing subcloud %s" % subcloud_id
)
continue
LOG.debug("PID: %s, starting audit of subcloud: %s." %
(self.pid, subcloud.name))
LOG.debug(
"PID: %s, starting audit of subcloud: %s." % (self.pid, subcloud.name)
)
# Check the per-subcloud audit flags
do_load_audit = subcloud_audits.load_audit_requested
# Currently we do the load audit as part of the patch audit,
# so if we want a load audit we need to do a patch audit.
do_patch_audit = (subcloud_audits.patch_audit_requested or
do_load_audit)
do_patch_audit = subcloud_audits.patch_audit_requested or do_load_audit
do_firmware_audit = subcloud_audits.firmware_audit_requested
do_kubernetes_audit = subcloud_audits.kubernetes_audit_requested
do_kube_rootca_update_audit = \
do_kube_rootca_update_audit = (
subcloud_audits.kube_rootca_update_audit_requested
)
update_subcloud_state = subcloud_audits.state_update_requested
do_software_audit = subcloud_audits.spare_audit_requested
# Create a new greenthread for each subcloud to allow the audits
# to be done in parallel. If there are not enough greenthreads
# in the pool, this will block until one becomes available.
self.subcloud_workers[subcloud.region_name] = \
self.thread_group_manager.start(self._do_audit_subcloud,
subcloud,
update_subcloud_state,
do_openstack_audit,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
kube_rootca_update_audit_data,
software_audit_data,
do_patch_audit,
do_load_audit,
do_firmware_audit,
do_kubernetes_audit,
do_kube_rootca_update_audit,
do_software_audit)
self.subcloud_workers[subcloud.region_name] = (
self.thread_group_manager.start(
self._do_audit_subcloud,
subcloud,
update_subcloud_state,
do_openstack_audit,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
kube_rootca_update_audit_data,
software_audit_data,
do_patch_audit,
do_load_audit,
do_firmware_audit,
do_kubernetes_audit,
do_kube_rootca_update_audit,
do_software_audit,
)
)
def update_subcloud_endpoints(self, context, subcloud_name, endpoints):
LOG.info(f"Updating service endpoints for subcloud {subcloud_name} "
"in endpoint cache")
LOG.info(
f"Updating service endpoints for subcloud {subcloud_name} in endpoint cache"
)
endpoint_cache.EndpointCache.update_master_service_endpoint_region(
subcloud_name, endpoints
)
def _update_subcloud_audit_fail_count(self, subcloud,
audit_fail_count):
def _update_subcloud_audit_fail_count(self, subcloud, audit_fail_count):
"""Update the subcloud's audit_fail_count directly to db.
It's safe to update audit_fail_count because only the audit actually cares
@@ -166,23 +175,28 @@ class SubcloudAuditWorkerManager(manager.Manager):
param audit_fail_count: count of failed audit.
"""
try:
db_api.subcloud_update(self.context, subcloud.id,
audit_fail_count=audit_fail_count)
db_api.subcloud_update(
self.context, subcloud.id, audit_fail_count=audit_fail_count
)
except exceptions.SubcloudNotFound:
# Possibly subcloud could have been deleted since we found it in db,
# ignore this benign error.
LOG.info('Ignoring SubcloudNotFound when attempting update'
'audit_fail_count for subcloud: %s' % subcloud.name)
LOG.info(
"Ignoring SubcloudNotFound when attempting update "
"audit_fail_count for subcloud: %s" % subcloud.name
)
def _audit_subcloud_openstack_app(self, subcloud_name, sysinv_client,
openstack_installed):
def _audit_subcloud_openstack_app(
self, subcloud_name, sysinv_client, openstack_installed
):
openstack_installed_current = False
# get a list of installed apps in the subcloud
try:
apps = sysinv_client.get_applications()
except Exception:
LOG.exception('Cannot retrieve installed apps for subcloud:%s'
% subcloud_name)
LOG.exception(
"Cannot retrieve installed apps for subcloud: %s" % subcloud_name
)
return
for app in apps:
@@ -198,13 +212,15 @@ class SubcloudAuditWorkerManager(manager.Manager):
self.context,
subcloud_name,
endpoint_type_list,
openstack_installed_current)
openstack_installed_current,
)
elif not openstack_installed_current and openstack_installed:
self.dcmanager_rpc_client.update_subcloud_sync_endpoint_type(
self.context,
subcloud_name,
endpoint_type_list,
openstack_installed_current)
openstack_installed_current,
)
def _do_audit_subcloud(
self,
@@ -241,30 +257,29 @@ class SubcloudAuditWorkerManager(manager.Manager):
do_firmware_audit,
do_kubernetes_audit,
do_kube_rootca_update_audit,
do_software_audit)
do_software_audit,
)
except Exception:
LOG.exception("Got exception auditing subcloud: %s" % subcloud.name)
if failures and len(failures) > 1:
# extra log for multiple failures:
LOG.error("Multiple failures auditing subcloud %s: "
"for endpoints: %s",
subcloud.name, ", ".join(sorted(failures)))
LOG.error(
"Multiple failures auditing subcloud %s: for endpoints: %s",
subcloud.name,
", ".join(sorted(failures)),
)
# Update the audit completion timestamp so it doesn't get
# audited again for a while.
db_api.subcloud_audits_end_audit(self.context,
subcloud.id, audits_done)
db_api.subcloud_audits_end_audit(self.context, subcloud.id, audits_done)
# Remove the worker for this subcloud
self.subcloud_workers.pop(subcloud.region_name, None)
LOG.debug("PID: %s, done auditing subcloud: %s." %
(self.pid, subcloud.name))
LOG.debug("PID: %s, done auditing subcloud: %s." % (self.pid, subcloud.name))
@staticmethod
def _should_perform_additional_audit(
subcloud_management_state,
subcloud_avail_status,
first_identity_sync_complete
subcloud_management_state, subcloud_avail_status, first_identity_sync_complete
):
return (
subcloud_management_state == dccommon_consts.MANAGEMENT_MANAGED
@@ -347,7 +362,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
keystone_client = OpenStackDriver(
region_name=subcloud_region,
region_clients=None,
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips,
).keystone_client
admin_session = keystone_client.session
if has_dcagent:
@@ -359,55 +374,69 @@ class SubcloudAuditWorkerManager(manager.Manager):
),
)
sysinv_client = SysinvClient(
subcloud_region, admin_session,
endpoint=keystone_client.endpoint_cache.get_endpoint("sysinv")
subcloud_region,
admin_session,
endpoint=keystone_client.endpoint_cache.get_endpoint("sysinv"),
)
fm_client = FmClient(
subcloud_region, admin_session,
endpoint=keystone_client.endpoint_cache.get_endpoint("fm")
subcloud_region,
admin_session,
endpoint=keystone_client.endpoint_cache.get_endpoint("fm"),
)
# TODO(vgluzrom): Revise and improve the debug and error messages
# as well as the exception causes
except keystone_exceptions.ConnectTimeout:
if avail_status_current == dccommon_consts.AVAILABILITY_OFFLINE:
LOG.debug("Identity or Platform endpoint for %s not "
"found, ignoring for offline "
"subcloud." % subcloud_name)
LOG.debug(
"Identity or Platform endpoint for %s not found, ignoring for "
"offline subcloud." % subcloud_name
)
return audits_done, failures
else:
# The subcloud will be marked as offline below.
LOG.error("Identity or Platform endpoint for online "
"subcloud: %s not found." % subcloud_name)
LOG.error(
"Identity or Platform endpoint for online subcloud: %s not found."
% subcloud_name
)
except keystone_exceptions.NotFound:
if subcloud.first_identity_sync_complete \
and avail_status_current == dccommon_consts.AVAILABILITY_ONLINE:
if (
subcloud.first_identity_sync_complete
and avail_status_current == dccommon_consts.AVAILABILITY_ONLINE
):
# The first identity sync is already complete
# Therefore this is an error
LOG.error("Identity or Platform endpoint for online "
"subcloud: %s not found." % subcloud_name)
LOG.error(
"Identity or Platform endpoint for online subcloud: %s not found."
% subcloud_name
)
else:
LOG.debug("Identity or Platform endpoint for %s not "
"found, ignoring for offline "
"subcloud or identity sync not done." % subcloud_name)
LOG.debug(
"Identity or Platform endpoint for %s not found, ignoring for "
"offline subcloud or identity sync not done." % subcloud_name
)
return audits_done, failures
except (keystone_exceptions.EndpointNotFound,
keystone_exceptions.ConnectFailure,
IndexError):
except (
keystone_exceptions.EndpointNotFound,
keystone_exceptions.ConnectFailure,
IndexError,
):
if avail_status_current == dccommon_consts.AVAILABILITY_OFFLINE:
LOG.info("Identity or Platform endpoint for %s not "
"found, ignoring for offline "
"subcloud." % subcloud_name)
LOG.info(
"Identity or Platform endpoint for %s not found, ignoring for "
"offline subcloud." % subcloud_name
)
return audits_done, failures
else:
# The subcloud will be marked as offline below.
LOG.error("Identity or Platform endpoint for online "
"subcloud: %s not found." % subcloud_name)
LOG.error(
"Identity or Platform endpoint for online subcloud: %s not found."
% subcloud_name
)
except Exception:
LOG.exception("Failed to create clients for subcloud: %s"
% subcloud_name)
LOG.exception("Failed to create clients for subcloud: %s" % subcloud_name)
if has_dcagent and dcagent_client:
LOG.debug(f"Starting dcagent audit for subcloud: {subcloud_name}")
@@ -433,8 +462,10 @@ class SubcloudAuditWorkerManager(manager.Manager):
dccommon_consts.SYNC_STATUS_IN_SYNC
)
audits_done.append(dccommon_consts.ENDPOINT_TYPE_SOFTWARE)
LOG.debug(f"Skipping following audits for subcloud {subcloud_name} because "
f"RegionOne audit data is not available: {audits_done}")
LOG.debug(
f"Skipping following audits for subcloud {subcloud_name} because "
f"RegionOne audit data is not available: {audits_done}"
)
audit_payload = self._build_dcagent_payload(
subcloud.management_state,
avail_status_current,
@@ -546,33 +577,40 @@ class SubcloudAuditWorkerManager(manager.Manager):
if avail_to_set == dccommon_consts.AVAILABILITY_ONLINE:
audit_fail_count = 0
LOG.debug('Setting new availability status: %s '
'on subcloud: %s' %
(avail_to_set, subcloud_name))
availability_data.update({
"availability_status": avail_to_set,
"update_state_only": False,
"audit_fail_count": audit_fail_count
})
LOG.debug(
"Setting new availability status: %s "
"on subcloud: %s" % (avail_to_set, subcloud_name)
)
availability_data.update(
{
"availability_status": avail_to_set,
"update_state_only": False,
"audit_fail_count": audit_fail_count,
}
)
elif audit_fail_count != subcloud.audit_fail_count:
# The subcloud remains offline, we only need to update
# the audit_fail_count in db directly by an audit worker
# to eliminate unnecessary notification to the dcmanager
self._update_subcloud_audit_fail_count(
subcloud,
audit_fail_count=audit_fail_count)
subcloud, audit_fail_count=audit_fail_count
)
elif update_subcloud_state:
# Nothing has changed, but we want to send a state update for this
# subcloud as an audit.
LOG.debug('Updating subcloud state unconditionally for subcloud %s'
% subcloud_name)
availability_data.update({
"availability_status": avail_status_current,
"update_state_only": True,
"audit_fail_count": None
})
LOG.debug(
"Updating subcloud state unconditionally for subcloud %s"
% subcloud_name
)
availability_data.update(
{
"availability_status": avail_status_current,
"update_state_only": True,
"audit_fail_count": None,
}
)
# If subcloud is managed, online, the identity was synced once
# and it doesn't have dcagent, audit additional resources
@@ -595,9 +633,12 @@ class SubcloudAuditWorkerManager(manager.Manager):
try:
endpoint_data[dccommon_consts.ENDPOINT_TYPE_PATCHING] = (
self.patch_audit.subcloud_patch_audit(
keystone_client.session, sysinv_client,
subcloud_management_ip, subcloud_name, subcloud_region,
patch_audit_data
keystone_client.session,
sysinv_client,
subcloud_management_ip,
subcloud_name,
subcloud_region,
patch_audit_data,
)
)
audits_done.append(dccommon_consts.ENDPOINT_TYPE_PATCHING)
@@ -618,8 +659,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
audits_done.append(dccommon_consts.ENDPOINT_TYPE_LOAD)
except Exception:
LOG.exception(
failmsg
% (subcloud.name, dccommon_consts.ENDPOINT_TYPE_LOAD)
failmsg % (subcloud.name, dccommon_consts.ENDPOINT_TYPE_LOAD)
)
failures.append(dccommon_consts.ENDPOINT_TYPE_LOAD)
# Perform firmware audit
@@ -679,15 +719,18 @@ class SubcloudAuditWorkerManager(manager.Manager):
subcloud_region, sysinv_client, subcloud.openstack_installed
)
except Exception:
LOG.exception(failmsg % (subcloud.name, 'openstack'))
failures.append('openstack')
LOG.exception(failmsg % (subcloud.name, "openstack"))
failures.append("openstack")
# Perform software audit
if do_software_audit:
try:
endpoint_data[dccommon_consts.ENDPOINT_TYPE_SOFTWARE] = (
self.software_audit.subcloud_software_audit(
keystone_client, subcloud_management_ip,
subcloud_name, subcloud_region, software_audit_data
keystone_client,
subcloud_management_ip,
subcloud_name,
subcloud_region,
software_audit_data,
)
)
audits_done.append(dccommon_consts.ENDPOINT_TYPE_SOFTWARE)
@@ -698,23 +741,29 @@ class SubcloudAuditWorkerManager(manager.Manager):
)
failures.append(dccommon_consts.ENDPOINT_TYPE_SOFTWARE)
# Create a new variable to store the update method to avoid line too long error
bulk_update_subcloud_availability_and_endpoint_status = (
self.state_rpc_client.bulk_update_subcloud_availability_and_endpoint_status
)
if availability_data or (endpoint_data and any(endpoint_data.values())):
# If a value is not None, an update should be sent to the rpc client
try:
self.state_rpc_client.\
bulk_update_subcloud_availability_and_endpoint_status(
self.context, subcloud_name, subcloud_region,
availability_data, endpoint_data
)
# If a value is not None, an update should be sent to the rpc client
bulk_update_subcloud_availability_and_endpoint_status(
self.context,
subcloud_name,
subcloud_region,
availability_data,
endpoint_data,
)
LOG.debug(
f'Notifying dcmanager-state, subcloud: {subcloud_name}, bulk '
'availability and endpoint status update'
f"Notifying dcmanager-state, subcloud: {subcloud_name}, bulk "
"availability and endpoint status update"
)
except Exception:
LOG.exception(
'Failed to notify dcmanager-state of subcloud batch '
'availability and endpoint status update, '
f'subcloud: {subcloud_name}'
"Failed to notify dcmanager-state of subcloud batch "
"availability and endpoint status update, "
f"subcloud: {subcloud_name}"
)
return audits_done, failures

View File

@@ -20,27 +20,29 @@
from dcmanager.db import api as db_api
def request_subcloud_audits(context,
update_subcloud_state=False,
audit_patch=False,
audit_load=False,
audit_firmware=False,
audit_kubernetes=False,
audit_kube_rootca=False,
audit_software=False,):
def request_subcloud_audits(
context,
update_subcloud_state=False,
audit_patch=False,
audit_load=False,
audit_firmware=False,
audit_kubernetes=False,
audit_kube_rootca=False,
audit_software=False,
):
values = {}
if update_subcloud_state:
values['state_update_requested'] = True
values["state_update_requested"] = True
if audit_patch:
values['patch_audit_requested'] = True
values["patch_audit_requested"] = True
if audit_load:
values['load_audit_requested'] = True
values["load_audit_requested"] = True
if audit_firmware:
values['firmware_audit_requested'] = True
values["firmware_audit_requested"] = True
if audit_kubernetes:
values['kubernetes_audit_requested'] = True
values["kubernetes_audit_requested"] = True
if audit_kube_rootca:
values['kube_rootca_update_audit_requested'] = True
values["kube_rootca_update_audit_requested"] = True
if audit_software:
values['spare_audit_requested'] = True
values["spare_audit_requested"] = True
db_api.subcloud_audits_update_all(context, values)

View File

@@ -1232,7 +1232,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
db_api.subcloud_destroy(self.ctx, subcloud.id)
wm._update_subcloud_audit_fail_count(subcloud, audit_fail_count)
mock_logging.info.assert_called_with(
'Ignoring SubcloudNotFound when attempting update'
'Ignoring SubcloudNotFound when attempting update '
'audit_fail_count for subcloud: %s' % subcloud.name)
def test_audit_subcloud_online_with_openstack_installed(self):

View File

@@ -25,6 +25,7 @@ formatted_modules = [
"dcorch",
"dcagent",
"dcmanager/api",
"dcmanager/audit",
]