Integrate dcagent with dcmanager-audit
This commit integrates the newly created dcagent with dcmanager-audit. Currently, dcagent is disabled and will remain disabled until installed and enabled in subclouds. To enable, the return call in the function subcloud_has_dcagent in dccommon/utils needs to be removed, as well as removing the comments that check if dcagent should be called. NOTE: patch and load audits will be deprecated in the next major release, which is why they are not performed by dcagent. Until then, they will remain enabled. Test plan: - PASS: Run dcmanager audit with dcagent. Verify only one call is made to audit the subcloud and the response include the correct sync status. - PASS: Run dcmanager audit without dcagent. Verify the audit works as expected querying each individual endpoint. - PASS: Verify dcmanager can audit a subcloud with dcagent and another one with old audit workflow at the same time and verify both subclouds get to 'in-sync' status. - PASS: Induce a status where the sync status of the subclouds' endpoints change (changing cert-id of kube rootca). Verify that the audit and communication to dcmanager-state happen within the 30s audit interval. Story: 2011106 Task: 50560 Change-Id: Ic330a18daefeb6f12fa7589ae6ba82cafd30f298 Signed-off-by: Victor Romano <victor.gluzromano@windriver.com>
This commit is contained in:

committed by
Victor Romano

parent
0f15d27b36
commit
ac33bc6af1
@@ -399,6 +399,13 @@ def send_subcloud_shutdown_signal(subcloud_name):
|
|||||||
rvmc.power_off(subcloud_name, rvmc_config_file, LOG)
|
rvmc.power_off(subcloud_name, rvmc_config_file, LOG)
|
||||||
|
|
||||||
|
|
||||||
|
def subcloud_has_dcagent(software_version: str):
|
||||||
|
# TODO(vgluzrom): remove "False" condition and uncomment code
|
||||||
|
# when dcagent service is enabled by default
|
||||||
|
return False
|
||||||
|
# return software_version >= consts.MIN_VERSION_FOR_DCAGENT
|
||||||
|
|
||||||
|
|
||||||
def log_subcloud_msg(
|
def log_subcloud_msg(
|
||||||
log_func: Callable, msg: str, subcloud_name: str = None, avail_status: str = None
|
log_func: Callable, msg: str, subcloud_name: str = None, avail_status: str = None
|
||||||
):
|
):
|
||||||
|
@@ -21,10 +21,13 @@ from oslo_config import cfg
|
|||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
|
|
||||||
from dccommon import consts as dccommon_consts
|
from dccommon import consts as dccommon_consts
|
||||||
|
from dccommon.drivers.openstack.dcagent_v1 import DcagentClient
|
||||||
from dccommon.drivers.openstack.fm import FmClient
|
from dccommon.drivers.openstack.fm import FmClient
|
||||||
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
||||||
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
|
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
|
||||||
from dccommon import endpoint_cache
|
from dccommon import endpoint_cache
|
||||||
|
from dccommon.utils import log_subcloud_msg
|
||||||
|
from dccommon.utils import subcloud_has_dcagent
|
||||||
from dcmanager.audit import alarm_aggregation
|
from dcmanager.audit import alarm_aggregation
|
||||||
from dcmanager.audit import base_audit
|
from dcmanager.audit import base_audit
|
||||||
from dcmanager.audit import firmware_audit
|
from dcmanager.audit import firmware_audit
|
||||||
@@ -269,6 +272,38 @@ class SubcloudAuditWorkerManager(manager.Manager):
|
|||||||
and first_identity_sync_complete
|
and first_identity_sync_complete
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _build_dcagent_payload(
|
||||||
|
self,
|
||||||
|
subcloud_management_state,
|
||||||
|
subcloud_avail_status,
|
||||||
|
first_identity_sync_complete,
|
||||||
|
firmware_audit_data,
|
||||||
|
kubernetes_audit_data,
|
||||||
|
kube_rootca_update_audit_data,
|
||||||
|
software_audit_data,
|
||||||
|
do_firmware_audit,
|
||||||
|
do_kubernetes_audit,
|
||||||
|
do_kube_rootca_update_audit,
|
||||||
|
do_software_audit,
|
||||||
|
):
|
||||||
|
audit_payload = {dccommon_consts.BASE_AUDIT: ""}
|
||||||
|
if self._should_perform_additional_audit(
|
||||||
|
subcloud_management_state,
|
||||||
|
subcloud_avail_status,
|
||||||
|
first_identity_sync_complete,
|
||||||
|
):
|
||||||
|
if do_firmware_audit and firmware_audit_data:
|
||||||
|
audit_payload[dccommon_consts.FIRMWARE_AUDIT] = firmware_audit_data
|
||||||
|
if do_kubernetes_audit and kubernetes_audit_data:
|
||||||
|
audit_payload[dccommon_consts.KUBERNETES_AUDIT] = kubernetes_audit_data
|
||||||
|
if do_kube_rootca_update_audit and kube_rootca_update_audit_data:
|
||||||
|
audit_payload[dccommon_consts.KUBE_ROOTCA_AUDIT] = (
|
||||||
|
kube_rootca_update_audit_data
|
||||||
|
)
|
||||||
|
if do_software_audit and software_audit_data:
|
||||||
|
audit_payload[dccommon_consts.SOFTWARE_AUDIT] = software_audit_data
|
||||||
|
return audit_payload
|
||||||
|
|
||||||
def _audit_subcloud(
|
def _audit_subcloud(
|
||||||
self,
|
self,
|
||||||
subcloud: models.Subcloud,
|
subcloud: models.Subcloud,
|
||||||
@@ -297,14 +332,17 @@ class SubcloudAuditWorkerManager(manager.Manager):
|
|||||||
failures = list()
|
failures = list()
|
||||||
availability_data = dict()
|
availability_data = dict()
|
||||||
endpoint_data = dict()
|
endpoint_data = dict()
|
||||||
|
has_dcagent = subcloud_has_dcagent(subcloud.software_version)
|
||||||
|
|
||||||
# Set defaults to None and disabled so we will still set disabled
|
# Set defaults to None and disabled so we will still set disabled
|
||||||
# status if we encounter an error.
|
# status if we encounter an error.
|
||||||
|
|
||||||
keystone_client = None
|
keystone_client = None
|
||||||
|
dcagent_client = None
|
||||||
sysinv_client = None
|
sysinv_client = None
|
||||||
fm_client = None
|
fm_client = None
|
||||||
avail_to_set = dccommon_consts.AVAILABILITY_OFFLINE
|
avail_to_set = dccommon_consts.AVAILABILITY_OFFLINE
|
||||||
|
failmsg = "Audit failure subcloud: %s, endpoint: %s"
|
||||||
try:
|
try:
|
||||||
keystone_client = OpenStackDriver(
|
keystone_client = OpenStackDriver(
|
||||||
region_name=subcloud_region,
|
region_name=subcloud_region,
|
||||||
@@ -312,6 +350,14 @@ class SubcloudAuditWorkerManager(manager.Manager):
|
|||||||
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips
|
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips
|
||||||
).keystone_client
|
).keystone_client
|
||||||
admin_session = keystone_client.session
|
admin_session = keystone_client.session
|
||||||
|
if has_dcagent:
|
||||||
|
dcagent_client = DcagentClient(
|
||||||
|
subcloud_region,
|
||||||
|
admin_session,
|
||||||
|
endpoint=endpoint_cache.build_subcloud_endpoint(
|
||||||
|
subcloud_management_ip, "dcagent"
|
||||||
|
),
|
||||||
|
)
|
||||||
sysinv_client = SysinvClient(
|
sysinv_client = SysinvClient(
|
||||||
subcloud_region, admin_session,
|
subcloud_region, admin_session,
|
||||||
endpoint=keystone_client.endpoint_cache.get_endpoint("sysinv")
|
endpoint=keystone_client.endpoint_cache.get_endpoint("sysinv")
|
||||||
@@ -363,8 +409,114 @@ class SubcloudAuditWorkerManager(manager.Manager):
|
|||||||
LOG.exception("Failed to create clients for subcloud: %s"
|
LOG.exception("Failed to create clients for subcloud: %s"
|
||||||
% subcloud_name)
|
% subcloud_name)
|
||||||
|
|
||||||
# Check availability of the subcloud
|
if has_dcagent and dcagent_client:
|
||||||
if sysinv_client:
|
LOG.debug(f"Starting dcagent audit for subcloud: {subcloud_name}")
|
||||||
|
# If we don't have the audit data, we won't send the request to the
|
||||||
|
# dcagent service, so we set the status to "in sync"
|
||||||
|
if do_firmware_audit and not firmware_audit_data:
|
||||||
|
endpoint_data[dccommon_consts.ENDPOINT_TYPE_FIRMWARE] = (
|
||||||
|
dccommon_consts.SYNC_STATUS_IN_SYNC
|
||||||
|
)
|
||||||
|
audits_done.append(dccommon_consts.ENDPOINT_TYPE_FIRMWARE)
|
||||||
|
if do_kubernetes_audit and not kubernetes_audit_data:
|
||||||
|
endpoint_data[dccommon_consts.ENDPOINT_TYPE_KUBERNETES] = (
|
||||||
|
dccommon_consts.SYNC_STATUS_IN_SYNC
|
||||||
|
)
|
||||||
|
audits_done.append(dccommon_consts.ENDPOINT_TYPE_KUBERNETES)
|
||||||
|
if do_kube_rootca_update_audit and not kube_rootca_update_audit_data:
|
||||||
|
endpoint_data[dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA] = (
|
||||||
|
dccommon_consts.SYNC_STATUS_IN_SYNC
|
||||||
|
)
|
||||||
|
audits_done.append(dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA)
|
||||||
|
if do_software_audit and not software_audit_data:
|
||||||
|
endpoint_data[dccommon_consts.ENDPOINT_TYPE_SOFTWARE] = (
|
||||||
|
dccommon_consts.SYNC_STATUS_IN_SYNC
|
||||||
|
)
|
||||||
|
audits_done.append(dccommon_consts.ENDPOINT_TYPE_SOFTWARE)
|
||||||
|
LOG.debug(f"Skipping following audits for subcloud {subcloud_name} because "
|
||||||
|
f"RegionOne audit data is not available: {audits_done}")
|
||||||
|
audit_payload = self._build_dcagent_payload(
|
||||||
|
subcloud.management_state,
|
||||||
|
avail_status_current,
|
||||||
|
subcloud.first_identity_sync_complete,
|
||||||
|
firmware_audit_data,
|
||||||
|
kubernetes_audit_data,
|
||||||
|
kube_rootca_update_audit_data,
|
||||||
|
software_audit_data,
|
||||||
|
do_firmware_audit,
|
||||||
|
do_kubernetes_audit,
|
||||||
|
do_kube_rootca_update_audit,
|
||||||
|
do_software_audit,
|
||||||
|
)
|
||||||
|
audit_results = {}
|
||||||
|
try:
|
||||||
|
audit_results = dcagent_client.audit(audit_payload)
|
||||||
|
except Exception:
|
||||||
|
LOG.exception(failmsg % (subcloud.name, "dcagent"))
|
||||||
|
failures.append("dcagent")
|
||||||
|
LOG.debug(
|
||||||
|
f"Audits results for subcloud {subcloud_name}: "
|
||||||
|
f"{subcloud_name}: {audit_results}"
|
||||||
|
)
|
||||||
|
for audit_type, audit_value in audit_results.items():
|
||||||
|
if audit_type == dccommon_consts.BASE_AUDIT:
|
||||||
|
avail_to_set = audit_value.get("availability")
|
||||||
|
if avail_to_set == dccommon_consts.AVAILABILITY_OFFLINE:
|
||||||
|
inactive_sg = audit_value.get("inactive_sg")
|
||||||
|
msg = f"Inactive service groups: {inactive_sg}"
|
||||||
|
log_subcloud_msg(LOG.debug, msg, subcloud_name, avail_to_set)
|
||||||
|
alarms = audit_value.get("alarms")
|
||||||
|
if alarms:
|
||||||
|
self.alarm_aggr.update_alarm_summary(subcloud_name, alarms)
|
||||||
|
elif audit_value:
|
||||||
|
endpoint_type = dccommon_consts.DCAGENT_ENDPOINT_TYPE_MAP[
|
||||||
|
audit_type
|
||||||
|
]
|
||||||
|
endpoint_data[endpoint_type] = audit_value
|
||||||
|
audits_done.append(endpoint_type)
|
||||||
|
# Patch and load audits are not done in dcagent,
|
||||||
|
# so we need to do it separately
|
||||||
|
if self._should_perform_additional_audit(
|
||||||
|
subcloud.management_state,
|
||||||
|
avail_to_set,
|
||||||
|
subcloud.first_identity_sync_complete,
|
||||||
|
):
|
||||||
|
if do_patch_audit and patch_audit_data:
|
||||||
|
try:
|
||||||
|
endpoint_data[dccommon_consts.ENDPOINT_TYPE_PATCHING] = (
|
||||||
|
self.patch_audit.subcloud_patch_audit(
|
||||||
|
keystone_client.session,
|
||||||
|
sysinv_client,
|
||||||
|
subcloud_management_ip,
|
||||||
|
subcloud_name,
|
||||||
|
subcloud_region,
|
||||||
|
patch_audit_data,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
audits_done.append(dccommon_consts.ENDPOINT_TYPE_PATCHING)
|
||||||
|
except Exception:
|
||||||
|
LOG.exception(
|
||||||
|
failmsg
|
||||||
|
% (subcloud.name, dccommon_consts.ENDPOINT_TYPE_PATCHING)
|
||||||
|
)
|
||||||
|
failures.append(dccommon_consts.ENDPOINT_TYPE_PATCHING)
|
||||||
|
if do_load_audit and patch_audit_data:
|
||||||
|
try:
|
||||||
|
endpoint_data[dccommon_consts.ENDPOINT_TYPE_LOAD] = (
|
||||||
|
self.patch_audit.subcloud_load_audit(
|
||||||
|
sysinv_client, subcloud_name, patch_audit_data
|
||||||
|
)
|
||||||
|
)
|
||||||
|
audits_done.append(dccommon_consts.ENDPOINT_TYPE_LOAD)
|
||||||
|
except Exception:
|
||||||
|
LOG.exception(
|
||||||
|
failmsg
|
||||||
|
% (subcloud.name, dccommon_consts.ENDPOINT_TYPE_LOAD)
|
||||||
|
)
|
||||||
|
failures.append(dccommon_consts.ENDPOINT_TYPE_LOAD)
|
||||||
|
|
||||||
|
# Check availability for subcloud that doesn't have dcagent
|
||||||
|
if not has_dcagent and sysinv_client:
|
||||||
# Avoid a network call to sysinv here if possible:
|
# Avoid a network call to sysinv here if possible:
|
||||||
# If prestaging is active we can assume that the subcloud
|
# If prestaging is active we can assume that the subcloud
|
||||||
# is online (otherwise prestaging will fail):
|
# is online (otherwise prestaging will fail):
|
||||||
@@ -422,9 +574,9 @@ class SubcloudAuditWorkerManager(manager.Manager):
|
|||||||
"audit_fail_count": None
|
"audit_fail_count": None
|
||||||
})
|
})
|
||||||
|
|
||||||
# If subcloud is managed and online and the identity was synced once,
|
# If subcloud is managed, online, the identity was synced once
|
||||||
# audit additional resources
|
# and it doesn't have dcagent, audit additional resources
|
||||||
if self._should_perform_additional_audit(
|
if not has_dcagent and self._should_perform_additional_audit(
|
||||||
subcloud.management_state,
|
subcloud.management_state,
|
||||||
avail_to_set,
|
avail_to_set,
|
||||||
subcloud.first_identity_sync_complete,
|
subcloud.first_identity_sync_complete,
|
||||||
|
Reference in New Issue
Block a user