Reformat dcmanager-audit to support dcagent

Before introducing the new dcagent several changes needed to be made
to dcmanager-audit. That is:

Refactor each individual audit (base audit, firmware, kubernetes,
kube rootca and software) to separate all subcloud client calls in
one single function (get_subcloud_audit_data, to be used by dcagent
periodic info gather loop) and another function to get the subcloud
sync status (get_subcloud_sync_status, to be used by dcagent to
process the data in the subcloud and return the sync status to
the system controller).

NOTES:
  - As patch and load audits will be deprecated in the next major
    release, no effort was made to refactor both patch and load audit.
  - All tests described below were executed applying [1] and [2] as
    well, to avoid retesting.

[1]: https://review.opendev.org/c/starlingx/distcloud/+/923350
[2]: https://review.opendev.org/c/starlingx/distcloud/+/923351

Test plan:
  - PASS: Run dcmanager audit with dcagent. Verify only one call is
          made to audit the subcloud and the response include the
          correct sync status.
  - PASS: Run dcmanager audit without dcagent. Verify the audit
          works as expected querying each individual endpoint.

Story: 2011106
Task: 50558

Change-Id: Ib955ff0c4f2035af2f59b6873f5779b71a8710ce
Signed-off-by: Victor Romano <victor.gluzromano@windriver.com>
This commit is contained in:
Victor Romano
2024-07-02 00:26:55 -03:00
parent 8797888d14
commit d4d548d7c6
23 changed files with 978 additions and 661 deletions

View File

@@ -126,6 +126,8 @@ ENDPOINT_AUDIT_REQUESTS = {
ENDPOINT_TYPE_SOFTWARE: "spare_audit_requested",
}
SKIP_AUDIT = "skip"
# Well known region names
SYSTEM_CONTROLLER_NAME = "SystemController"
DEFAULT_REGION_NAME = "RegionOne"

View File

@@ -64,7 +64,7 @@ class OpenStackDriver(object):
:param region_name: The name of the region. Defaults to "RegionOne".
:type region_name: str
:param thread_name: The name of the thread. Defaults to "dcorch".
:param thread_name: The name of the thread. Defaults to "dc".
:type thread_name: str
:param auth_url: The authentication URL.
:type auth_url: str

View File

@@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0
#
from keystoneauth1.session import Session as keystone_session
from oslo_log import log
import requests
@@ -27,13 +27,19 @@ REST_DEFAULT_TIMEOUT = 900
class SoftwareClient(base.DriverBase):
"""Software V1 driver."""
def __init__(self, region, session, endpoint=None):
def __init__(
self,
region: str,
session: keystone_session,
endpoint: str = None,
endpoint_type: str = consts.KS_ENDPOINT_ADMIN,
):
# Get an endpoint and token.
if not endpoint:
self.endpoint = session.get_endpoint(
service_type="usm",
region_name=region,
interface=consts.KS_ENDPOINT_ADMIN,
interface=endpoint_type,
)
else:
self.endpoint = endpoint

View File

@@ -19,6 +19,7 @@ import os
from cgtsclient.exc import HTTPBadRequest
from cgtsclient.exc import HTTPConflict
from cgtsclient.exc import HTTPNotFound
from keystoneauth1.session import Session as keystone_session
from oslo_log import log
from oslo_utils import encodeutils
@@ -121,11 +122,11 @@ class SysinvClient(base.DriverBase):
def __init__(
self,
region,
session,
timeout=consts.SYSINV_CLIENT_REST_DEFAULT_TIMEOUT,
endpoint_type=consts.KS_ENDPOINT_ADMIN,
endpoint=None,
region: str,
session: keystone_session,
timeout: int = consts.SYSINV_CLIENT_REST_DEFAULT_TIMEOUT,
endpoint_type: str = consts.KS_ENDPOINT_ADMIN,
endpoint: str = None,
):
try:
# TOX cannot import cgts_client and all the dependencies therefore
@@ -736,6 +737,10 @@ class SysinvClient(base.DriverBase):
"""Get a list of devices for a given host"""
return self.sysinv_client.pci_device.list(host_name)
def get_all_hosts_device_list(self):
"""Get a list of devices for all hosts"""
return self.sysinv_client.pci_device.list_all()
def get_device_label_list(self):
"""Get a list of device labels"""
return self.sysinv_client.device_label.list()

View File

@@ -23,7 +23,6 @@ CERT_MANAGER_CERTIFICATE = "certificates"
class KubeOperator(object):
def __init__(self):
self._kube_client_batch = None
self._kube_client_core = None

View File

@@ -20,7 +20,6 @@ from dccommon.tests import base
class TestOpenStackDriver(base.DCCommonTestCase):
@mock.patch.object(sdk, "KeystoneClient")
@mock.patch.object(sdk.OpenStackDriver, "_is_token_valid")
def test_init(self, mock_keystone_client, mock_is_token_valid):

View File

@@ -104,7 +104,6 @@ def mocked_requests_failure(*args, **kwargs):
class TestSoftwareClient(base.DCCommonTestCase):
def setUp(self):
super(TestSoftwareClient, self).setUp()
self.ctx = utils.dummy_context()

View File

@@ -13,7 +13,6 @@ FAKE_LOG_FILE = "/dev/null"
class TestUtils(base.DCCommonTestCase):
def setUp(self):
super(TestUtils, self).setUp()

View File

@@ -21,6 +21,7 @@ import random
import re
import threading
import time
from typing import Callable
from eventlet.green import subprocess
from oslo_log import log as logging
@@ -396,3 +397,14 @@ def send_subcloud_shutdown_signal(subcloud_name):
consts.ANSIBLE_OVERRIDES_PATH, subcloud_name, consts.RVMC_CONFIG_FILE_NAME
)
rvmc.power_off(subcloud_name, rvmc_config_file, LOG)
def log_subcloud_msg(
log_func: Callable, msg: str, subcloud_name: str = None, avail_status: str = None
):
prefix = ""
if subcloud_name:
prefix += f"Subcloud: {subcloud_name}. "
if avail_status:
prefix += f"Availability: {avail_status}. "
log_func(f"{prefix}{msg}")

View File

@@ -16,6 +16,8 @@
from oslo_log import log as logging
from dccommon.drivers.openstack.fm import FmClient
from dccommon.utils import log_subcloud_msg
from dcmanager.common import consts
from dcmanager.db import api as db_api
@@ -28,20 +30,33 @@ class AlarmAggregation(object):
def __init__(self, context):
self.context = context
def update_alarm_summary(self, name, fm_client):
LOG.debug("Updating alarm summary for %s" % name)
@classmethod
def get_alarm_summary(cls, fm_client: FmClient, name: str = None):
msg = "Getting alarm summary"
log_subcloud_msg(LOG.debug, msg, name)
try:
alarms = fm_client.get_alarm_summary()
alarm_updates = {'critical_alarms': alarms[0].critical,
'major_alarms': alarms[0].major,
'minor_alarms': alarms[0].minor,
'warnings': alarms[0].warnings}
alarm_updates = self._set_cloud_status(alarm_updates)
alarm_updates = {
'critical_alarms': alarms[0].critical,
'major_alarms': alarms[0].major,
'minor_alarms': alarms[0].minor,
'warnings': alarms[0].warnings
}
alarm_updates = cls._set_cloud_status(alarm_updates)
return alarm_updates
except Exception as e:
msg = f"Failed to get alarms. Error: {e}"
log_subcloud_msg(LOG.error, msg, name)
def update_alarm_summary(self, name: str, alarm_updates: dict):
LOG.debug(f"Updating alarm summary for {name}")
try:
db_api.subcloud_alarms_update(self.context, name, alarm_updates)
except Exception as e:
LOG.error('Failed to update alarms for %s error: %s' % (name, e))
LOG.error(f"Failed to update alarms for {name}. Error: {e}")
def _set_cloud_status(self, alarm_dict):
@staticmethod
def _set_cloud_status(alarm_dict):
if alarm_dict.get('critical_alarms') > 0:
status = consts.ALARM_CRITICAL_STATUS
elif (alarm_dict.get('major_alarms') > 0) or \

View File

@@ -0,0 +1,87 @@
#
# Copyright (c) 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_log import log as logging
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.fm import FmClient
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.utils import log_subcloud_msg
from dcmanager.audit import alarm_aggregation
from dcmanager.common import consts
LOG = logging.getLogger(__name__)
def get_subcloud_base_audit(
sysinv_client: SysinvClient = None,
fm_client: FmClient = None,
subcloud_name: str = None,
):
avail_to_set = None
inactive_sg = None
alarm_updates = None
if sysinv_client:
avail_to_set, inactive_sg = get_subcloud_availability_status(
sysinv_client, subcloud_name
)
if fm_client:
alarm_updates = alarm_aggregation.AlarmAggregation.get_alarm_summary(
fm_client, subcloud_name
)
return avail_to_set, inactive_sg, alarm_updates
def get_subcloud_availability_status(
sysinv_client: SysinvClient, subcloud_name: str = None
):
"""Check if the subcloud is online or offline.
For each subcloud, if at least one service is active in each
service of servicegroup-list then declare the subcloud online.
Note: It returns the inactive service groups for logging on system
controller purposes only, as the function runs on the subcloud.
:param sysinv_client: The subcloud sysinv client.
:param subcloud_name: The subcloud name.
:returns: availability status, list of inactive service groups
"""
avail_to_set = dccommon_consts.AVAILABILITY_OFFLINE
svc_groups = None
inactive_only = None
# get a list of service groups in the subcloud
try:
svc_groups = sysinv_client.get_service_groups()
except Exception as e:
msg = f"Cannot retrieve service groups. Error: {e}"
log_subcloud_msg(LOG.warn, msg, subcloud_name)
if svc_groups:
active_sgs = []
inactive_sgs = []
# Build 2 lists, 1 of active service groups,
# one with non-active.
for sg in svc_groups:
if sg.state != consts.SERVICE_GROUP_STATUS_ACTIVE:
inactive_sgs.append(sg.service_group_name)
else:
active_sgs.append(sg.service_group_name)
# Create a list of service groups that are only present
# in non-active list
inactive_only = [sg for sg in inactive_sgs if sg not in active_sgs]
# An empty inactive only list and a non-empty active list
# means we're good to go.
if not inactive_only and active_sgs:
avail_to_set = dccommon_consts.AVAILABILITY_ONLINE
else:
msg = f"Non-active service groups: {inactive_only}"
log_subcloud_msg(LOG.info, msg, subcloud_name)
return avail_to_set, inactive_only

View File

@@ -20,7 +20,7 @@ from oslo_log import log as logging
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.utils import log_subcloud_msg
from dcmanager.common import consts
from dcmanager.common import utils
@@ -29,11 +29,19 @@ LOG = logging.getLogger(__name__)
class FirmwareAuditData(object):
def __init__(self, bitstream_type, bitstream_id,
bmc, retimer_included,
key_signature, revoke_key_id,
applied, pci_vendor,
pci_device, applied_labels):
def __init__(
self,
bitstream_type,
bitstream_id,
bmc,
retimer_included,
key_signature,
revoke_key_id,
applied,
pci_vendor,
pci_device,
applied_labels,
):
self.bitstream_type = bitstream_type
self.bitstream_id = bitstream_id
self.bmc = bmc
@@ -47,16 +55,16 @@ class FirmwareAuditData(object):
def to_dict(self):
return {
'bitstream_type': self.bitstream_type,
'bitstream_id': self.bitstream_id,
'bmc': self.bmc,
'retimer_included': self.retimer_included,
'key_signature': self.key_signature,
'revoke_key_id': self.revoke_key_id,
'applied': self.applied,
'pci_vendor': self.pci_vendor,
'pci_device': self.pci_device,
'applied_labels': self.applied_labels,
"bitstream_type": self.bitstream_type,
"bitstream_id": self.bitstream_id,
"bmc": self.bmc,
"retimer_included": self.retimer_included,
"key_signature": self.key_signature,
"revoke_key_id": self.revoke_key_id,
"applied": self.applied,
"pci_vendor": self.pci_vendor,
"pci_device": self.pci_device,
"applied_labels": self.applied_labels,
}
@classmethod
@@ -69,9 +77,8 @@ class FirmwareAuditData(object):
class FirmwareAudit(object):
"""Manages tasks related to firmware audits."""
def __init__(self, context):
LOG.debug('FirmwareAudit initialization...')
self.context = context
def __init__(self):
LOG.debug("FirmwareAudit initialization...")
self.audit_count = 0
def get_regionone_audit_data(self):
@@ -88,12 +95,14 @@ class FirmwareAudit(object):
region_clients=None,
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips,
).keystone_client
endpoint = m_os_ks_client.endpoint_cache.get_endpoint('sysinv')
endpoint = m_os_ks_client.endpoint_cache.get_endpoint("sysinv")
sysinv_client = SysinvClient(
dccommon_consts.DEFAULT_REGION_NAME, m_os_ks_client.session,
endpoint=endpoint)
dccommon_consts.DEFAULT_REGION_NAME,
m_os_ks_client.session,
endpoint=endpoint,
)
except Exception:
LOG.exception('Failure initializing OS Client, skip firmware audit.')
LOG.exception("Failure initializing OS Client, skip firmware audit.")
return None
filtered_images = []
@@ -104,59 +113,77 @@ class FirmwareAudit(object):
# Filter images which have been applied on RegionOne
for image in local_device_images:
if image.applied:
filtered_images.append(FirmwareAuditData(
image.bitstream_type, image.bitstream_id, image.bmc,
image.retimer_included, image.key_signature,
image.revoke_key_id, image.applied, image.pci_vendor,
image.pci_device, image.applied_labels
))
filtered_images.append(
FirmwareAuditData(
image.bitstream_type,
image.bitstream_id,
image.bmc,
image.retimer_included,
image.key_signature,
image.revoke_key_id,
image.applied,
image.pci_vendor,
image.pci_device,
image.applied_labels,
)
)
LOG.debug("RegionOne applied_images: %s" % filtered_images)
except Exception:
LOG.exception('Cannot retrieve device images for RegionOne, '
'skip firmware audit')
LOG.exception(
"Cannot retrieve device images for RegionOne, skip firmware audit"
)
return filtered_images
def _check_for_label_match(self, subcloud_host_device_label_list,
device_uuid,
label_key, label_value):
@staticmethod
def _check_for_label_match(
subcloud_host_device_label_list, device_uuid, label_key, label_value
):
for device_label in subcloud_host_device_label_list:
if device_label.pcidevice_uuid and \
device_uuid == device_label.pcidevice_uuid and \
label_key == device_label.label_key and \
label_value == device_label.label_value:
if (
device_label.pcidevice_uuid
and device_uuid == device_label.pcidevice_uuid
and label_key == device_label.label_key
and label_value == device_label.label_value
):
return True
return False
def _check_image_match(self, subcloud_image, system_controller_image):
@staticmethod
def _check_image_match(subcloud_image, system_controller_image):
if (
(
system_controller_image.bitstream_type ==
consts.BITSTREAM_TYPE_ROOT_KEY and
system_controller_image.key_signature == subcloud_image.key_signature
) or (
system_controller_image.bitstream_type ==
consts.BITSTREAM_TYPE_FUNCTIONAL and
system_controller_image.bitstream_id ==
subcloud_image.bitstream_id and
system_controller_image.bmc == subcloud_image.bmc and
system_controller_image.retimer_included ==
subcloud_image.retimer_included
) or (
system_controller_image.bitstream_type ==
consts.BITSTREAM_TYPE_KEY_REVOCATION and
system_controller_image.revoke_key_id == subcloud_image.revoke_key_id
system_controller_image.bitstream_type == consts.BITSTREAM_TYPE_ROOT_KEY
and system_controller_image.key_signature
== subcloud_image.key_signature
)
or (
system_controller_image.bitstream_type
== consts.BITSTREAM_TYPE_FUNCTIONAL
and system_controller_image.bitstream_id == subcloud_image.bitstream_id
and system_controller_image.bmc == subcloud_image.bmc
and system_controller_image.retimer_included
== subcloud_image.retimer_included
)
or (
system_controller_image.bitstream_type
== consts.BITSTREAM_TYPE_KEY_REVOCATION
and system_controller_image.revoke_key_id
== subcloud_image.revoke_key_id
)
):
return True
return False
def _check_subcloud_device_has_image(self,
subcloud_name,
subcloud_sysinv_client,
image,
enabled_host_device_list,
subcloud_device_image_states,
subcloud_device_label_list):
@classmethod
def _check_subcloud_device_has_image(
cls,
subcloud_name,
image,
enabled_host_device_list,
subcloud_device_image_states,
subcloud_device_label_list,
subcloud_device_images,
):
apply_to_all_devices = False
if image.applied_labels:
# Returns true if the list contains at least one empty dict.
@@ -178,12 +205,9 @@ class FirmwareAudit(object):
for image_label in image.applied_labels:
label_key = list(image_label.keys())[0]
label_value = image_label.get(label_key)
is_device_eligible = \
self._check_for_label_match(
subcloud_device_label_list,
device.uuid,
label_key,
label_value)
is_device_eligible = cls._check_for_label_match(
subcloud_device_label_list, device.uuid, label_key, label_value
)
# If device label matches any image label stop checking
# for any other label matches
if is_device_eligible:
@@ -193,22 +217,23 @@ class FirmwareAudit(object):
if not is_device_eligible:
continue
if image.pci_vendor == device.pvendor_id and \
image.pci_device == device.pdevice_id:
if (
image.pci_vendor == device.pvendor_id
and image.pci_device == device.pdevice_id
):
device_image_state = None
subcloud_image = None
for device_image_state_obj in subcloud_device_image_states:
if device_image_state_obj.pcidevice_uuid == device.uuid:
try:
subcloud_image = subcloud_sysinv_client.\
get_device_image(device_image_state_obj.image_uuid)
uuid = device_image_state_obj.image_uuid
subcloud_image = subcloud_device_images[uuid]
except Exception:
LOG.exception('Cannot retrieve device image for '
'subcloud: %s, skip firmware '
'audit' % subcloud_name)
msg = "Cannot retrieve device image, skip firmware audit."
log_subcloud_msg(LOG.exception, msg, subcloud_name)
return False
if self._check_image_match(subcloud_image, image):
if cls._check_image_match(subcloud_image, image):
device_image_state = device_image_state_obj
break
else:
@@ -216,85 +241,146 @@ class FirmwareAudit(object):
# means the image hasn't been applied yet
return False
if device_image_state and \
device_image_state.status != "completed":
if device_image_state and device_image_state.status != "completed":
# If device image state is not completed it means
# that the image has not been written to the device yet
return False
return True
def subcloud_firmware_audit(
self, sysinv_client, subcloud_name, audit_data
@classmethod
def get_subcloud_audit_data(
cls,
sysinv_client: SysinvClient,
subcloud_name: str = None,
):
LOG.info('Triggered firmware audit for: %s.' % subcloud_name)
sync_status = dccommon_consts.SYNC_STATUS_IN_SYNC
if not audit_data:
LOG.info(
f'Firmware audit skipped for: {subcloud_name}. There are no images '
f'to audit, requesting sync_status update to {sync_status}'
)
return sync_status
enabled_host_device_list = None
subcloud_device_image_states = None
subcloud_device_label_list = None
subcloud_device_images = None
skip_audit = 4 * [dccommon_consts.SKIP_AUDIT]
# Retrieve all the devices that are present in this subcloud.
try:
subcloud_hosts = sysinv_client.get_hosts()
enabled_host_device_list = []
for host in subcloud_hosts:
host_devices = sysinv_client.get_host_device_list(host.uuid)
for device in host_devices:
if device.enabled:
enabled_host_device_list.append(device)
host_devices = sysinv_client.get_all_hosts_device_list()
for device in host_devices:
if device.enabled:
enabled_host_device_list.append(device)
except Exception:
LOG.exception('Cannot retrieve device image states for subcloud: %s, '
'skip firmware audit' % subcloud_name)
return None
msg = "Cannot retrieve host device list, skip firmware audit."
log_subcloud_msg(LOG.exception, msg, subcloud_name)
return skip_audit
# If there are no enabled devices on the subcloud, then report the
# sync status as in-sync
# If there are no enabled devices on the subcloud, exit the firmware audit
if not enabled_host_device_list:
LOG.info("No enabled devices on the subcloud %s,"
"exiting firmware audit" % subcloud_name)
return sync_status
return enabled_host_device_list, None, None, None
# Retrieve the device image states on this subcloud.
try:
subcloud_device_image_states = sysinv_client.get_device_image_states()
LOG.debug("Subcloud %s device_image_states: %s" %
(subcloud_name, subcloud_device_image_states))
msg = f"Device_image_states: {subcloud_device_image_states}"
log_subcloud_msg(LOG.debug, msg, subcloud_name)
except Exception:
LOG.exception('Cannot retrieve device image states for subcloud: %s, '
'skip firmware audit' % subcloud_name)
return None
msg = "Cannot retrieve device image states, skip firmware audit."
log_subcloud_msg(LOG.exception, msg, subcloud_name)
return skip_audit
# Retrieve device label list for all devices on this subcloud.
try:
subcloud_device_label_list = \
sysinv_client.get_device_label_list()
LOG.debug("Subcloud %s: subcloud_device_label_list"
" fetched" % (subcloud_name))
subcloud_device_label_list = sysinv_client.get_device_label_list()
msg = f"Subcloud_device_label_list: {subcloud_device_label_list}"
log_subcloud_msg(LOG.debug, msg, subcloud_name)
except Exception:
LOG.exception('Cannot retrieve device image states for '
'subcloud: %s, skip firmware audit' % subcloud_name)
msg = "Cannot retrieve device label list, skip firmware audit."
log_subcloud_msg(LOG.exception, msg, subcloud_name)
return skip_audit
# Retrieve the device images on this subcloud.
try:
subcloud_device_images = sysinv_client.get_device_images()
if subcloud_device_images:
subcloud_device_images = {
image.uuid: image
for image in subcloud_device_images
}
msg = f"Device_images: {subcloud_device_images}"
log_subcloud_msg(LOG.debug, msg, subcloud_name)
except Exception:
msg = "Cannot retrieve device images, skip firmware audit."
log_subcloud_msg(LOG.exception, msg, subcloud_name)
return skip_audit
return (
enabled_host_device_list,
subcloud_device_image_states,
subcloud_device_label_list,
subcloud_device_images,
)
@classmethod
def get_subcloud_sync_status(
cls,
sysinv_client: SysinvClient,
audit_data,
subcloud_name: str = None,
):
subcloud_audit_data = cls.get_subcloud_audit_data(sysinv_client, subcloud_name)
if dccommon_consts.SKIP_AUDIT in subcloud_audit_data:
return None
(
enabled_host_device_list,
subcloud_device_image_states,
subcloud_device_label_list,
subcloud_device_images,
) = subcloud_audit_data
# If there are no enabled devices on the subcloud, then report the
# sync status as in-sync
if not enabled_host_device_list:
msg = "No enabled devices on the subcloud, exiting firmware audit"
log_subcloud_msg(LOG.info, msg, subcloud_name)
return dccommon_consts.SYNC_STATUS_IN_SYNC
elif enabled_host_device_list == dccommon_consts.SKIP_AUDIT:
return None
# Check that all device images applied in RegionOne
# are applied and installed on this subcloud
# The audit_data for region one is a dictionary
for image in audit_data:
# audit_data will be a dict from passing through RPC, so objectify
# audit_data will be a dict from passing through RPC/api, so objectify
image = FirmwareAuditData.from_dict(image)
proceed = self._check_subcloud_device_has_image(
subcloud_name, sysinv_client, image, enabled_host_device_list,
subcloud_device_image_states, subcloud_device_label_list
proceed = cls._check_subcloud_device_has_image(
subcloud_name,
image,
enabled_host_device_list,
subcloud_device_image_states,
subcloud_device_label_list,
subcloud_device_images,
)
if not proceed:
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
break
return dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
LOG.info(
f'Firmware audit completed for: {subcloud_name}, requesting sync_status'
f'update to {sync_status}'
return dccommon_consts.SYNC_STATUS_IN_SYNC
def subcloud_firmware_audit(
self,
sysinv_client: SysinvClient,
subcloud_name: str,
audit_data: list[FirmwareAuditData],
):
LOG.info(f"Triggered firmware audit for: {subcloud_name}.")
if not audit_data:
LOG.debug("No RegionOne images to audit, exiting firmware audit")
return dccommon_consts.SYNC_STATUS_IN_SYNC
sync_status = self.get_subcloud_sync_status(
sysinv_client, audit_data, subcloud_name
)
return sync_status
if sync_status:
LOG.info(
f'Firmware audit completed for: {subcloud_name}, requesting sync_status'
f'update to {sync_status}'
)
return sync_status

View File

@@ -4,35 +4,40 @@
# SPDX-License-Identifier: Apache-2.0
#
from fm_api.constants import FM_ALARM_ID_CERT_EXPIRED
from fm_api.constants import FM_ALARM_ID_CERT_EXPIRING_SOON
from oslo_config import cfg
from oslo_log import log as logging
from fm_api.constants import FM_ALARM_ID_CERT_EXPIRED
from fm_api.constants import FM_ALARM_ID_CERT_EXPIRING_SOON
from dccommon import consts as dccommon_consts
from dccommon import utils as dccommon_utils
from dccommon.drivers.openstack.fm import FmClient
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.utils import log_subcloud_msg
from dcmanager.common import utils
from dcmanager.db.sqlalchemy import models
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
KUBE_ROOTCA_ALARM_LIST = [FM_ALARM_ID_CERT_EXPIRED,
FM_ALARM_ID_CERT_EXPIRING_SOON, ]
MONITORED_ALARM_ENTITIES = ['system.certificate.kubernetes-root-ca', ]
KUBE_ROOTCA_ALARM_LIST = [
FM_ALARM_ID_CERT_EXPIRED,
FM_ALARM_ID_CERT_EXPIRING_SOON,
]
MONITORED_ALARM_ENTITIES = [
"system.certificate.kubernetes-root-ca",
]
AUDIT_TYPE = "kube rootca update"
CERT_BASED = "cert_based"
ALARM_BASED = "alarm_based"
class KubeRootcaUpdateAudit(object):
"""Manages tasks related to kube rootca update audits."""
def __init__(self, context):
self.context = context
self.audit_type = "kube rootca update"
LOG.debug("%s audit initialized" % self.audit_type)
def __init__(self):
LOG.debug(f"{AUDIT_TYPE} audit initialized")
def get_regionone_audit_data(self):
"""Query RegionOne to determine kube rootca update information.
@@ -49,13 +54,14 @@ class KubeRootcaUpdateAudit(object):
region_clients=None,
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips,
).keystone_client
endpoint = m_os_ks_client.endpoint_cache.get_endpoint('sysinv')
endpoint = m_os_ks_client.endpoint_cache.get_endpoint("sysinv")
sysinv_client = SysinvClient(
dccommon_consts.DEFAULT_REGION_NAME, m_os_ks_client.session,
endpoint=endpoint)
dccommon_consts.DEFAULT_REGION_NAME,
m_os_ks_client.session,
endpoint=endpoint,
)
except Exception:
LOG.exception("Failed init OS Client, skip Kubernetes root CA "
"audit.")
LOG.exception("Failed init OS Client, skip Kubernetes root CA audit.")
return None
try:
@@ -64,72 +70,113 @@ class KubeRootcaUpdateAudit(object):
_, cc_cert = sysinv_client.get_kube_rootca_cert_id()
except Exception:
# Cannot get the cert ID from central cloud, return None
LOG.exception("Failed to get Kubernetes root CA from Region One, "
"skip Kubernetes root CA audit.")
LOG.exception(
"Failed to get Kubernetes root CA from Region One, "
"skip Kubernetes root CA audit."
)
return None
regionone_rootca_certid = cc_cert.cert_id
LOG.debug("RegionOne kubernetes rootca update data: "
f"{regionone_rootca_certid}.")
LOG.debug(
"RegionOne kubernetes rootca update data: " f"{regionone_rootca_certid}."
)
return regionone_rootca_certid
@classmethod
def get_subcloud_audit_data(
cls,
sysinv_client: SysinvClient,
fm_client: FmClient,
subcloud_name: str = None,
) -> tuple:
skip_audit = 2 * [dccommon_consts.SKIP_AUDIT]
try:
success, subcloud_cert_data = sysinv_client.get_kube_rootca_cert_id()
except Exception:
msg = f"Failed to get Kubernetes root CA status, skip {AUDIT_TYPE} audit."
log_subcloud_msg(LOG.exception, msg, subcloud_name)
return skip_audit
if success:
return CERT_BASED, subcloud_cert_data
try:
detected_alarms = fm_client.get_alarms_by_ids(KUBE_ROOTCA_ALARM_LIST)
except Exception:
msg = f"Failed to get alarms by id, skip {AUDIT_TYPE} audit."
log_subcloud_msg(LOG.exception, msg, subcloud_name)
return skip_audit
return ALARM_BASED, detected_alarms
@classmethod
def get_subcloud_sync_status(
cls,
sysinv_client: SysinvClient,
fm_client: FmClient,
regionone_rootca_certid: str,
subcloud_name: str = None,
):
"""Get the sync status of the subcloud's kube root CA cert."""
audit_method, subcloud_audit_data = cls.get_subcloud_audit_data(
sysinv_client, fm_client, subcloud_name
)
sync_status = None
if audit_method == dccommon_consts.SKIP_AUDIT:
return sync_status
elif audit_method == ALARM_BASED:
# If the subcloud doesn't have the sysinv API to get
# the cert ID, audit based on its alarm.
sync_status = cls.subcloud_rootca_audit_alarm_based(subcloud_audit_data)
else:
sync_status = cls.subcloud_rootca_audit_cert_based(
subcloud_audit_data, regionone_rootca_certid, subcloud_name
)
return sync_status
def subcloud_kube_rootca_audit(
self, sysinv_client, fm_client, subcloud, regionone_rootca_certid
self,
sysinv_client: SysinvClient,
fm_client: FmClient,
subcloud: models.Subcloud,
regionone_rootca_certid: str,
):
"""Perform an audit of kube root CA update info in a subcloud.
The audit logic is as follow:
CentOS subclouds -> alarm based
Debian subclouds:
not rehomed(initially deployed or re-deployed) -> alarm based
rehomed subclouds:
Not region one cert ID -> skip audit
subcloud doesn't have the API to get cert ID -> alarm based
region one cert ID -> cert based
No region one cert ID -> skip audit
Subcloud doesn't have the API to get cert ID -> alarm based
Subcloud has the API to get cert ID -> cert based
:param sysinv_client: the sysinv client object
:param fm_client: the fm client object
:param subcloud: subcloud object
:param region_one_audit_data: the audit data of the region one
:param regionone_rootca_certid: the cert id of region one
"""
LOG.info("Triggered %s audit for: %s" % (self.audit_type, subcloud.name))
# Firstly, apply alarm based audit against the subclouds deployed in
# the distributed cloud and the subcloud running on old software
# version that cannot search for the k8s root CA cert id.
if (
dccommon_utils.is_centos(subcloud.software_version)
or not subcloud.rehomed
):
return self.subcloud_audit_alarm_based(fm_client, subcloud.name)
sync_status = dccommon_consts.SYNC_STATUS_IN_SYNC
LOG.info(f"Triggered {AUDIT_TYPE} audit for: {subcloud.name}")
# Skip the audit if cannot get the region one cert ID.
if not regionone_rootca_certid:
LOG.debug(f"No region one audit data, skip {self.audit_type} "
f"audit for subcloud: {subcloud.name}.")
return sync_status
msg = f"No region one audit data, exiting {AUDIT_TYPE} audit"
log_subcloud_msg(LOG.debug, msg, subcloud.name)
return dccommon_consts.SYNC_STATUS_IN_SYNC
try:
success, subcloud_cert_data = sysinv_client.get_kube_rootca_cert_id()
except Exception:
LOG.exception("Failed to get Kubernetes root CA cert ID of "
f"subcloud: {subcloud.name}, skip "
f"{self.audit_type} audit.")
return None
if not success:
# if not success, the subcloud is a Debian based subcloud without
# the sysinv API to get the cert ID, audit the subcloud based on
# its alarm.
return self.subcloud_audit_alarm_based(fm_client, subcloud.name)
return self.subcloud_audit_cert_based(
subcloud.name, subcloud_cert_data, regionone_rootca_certid
sync_status = self.get_subcloud_sync_status(
sysinv_client, fm_client, regionone_rootca_certid, subcloud.name
)
def subcloud_audit_alarm_based(self, fm_client, subcloud_name):
if sync_status:
LOG.info(
f"{AUDIT_TYPE} audit completed for: {subcloud.name}, requesting "
f"sync_status update to {sync_status}"
)
return sync_status
@staticmethod
def subcloud_rootca_audit_alarm_based(detected_alarms):
"""The subcloud doesn't have the method to get Kubernetes root CA
cert ID, use alarm based audit.
@@ -137,44 +184,38 @@ class KubeRootcaUpdateAudit(object):
:param subcloud_name: the name of the subcloud
"""
sync_status = dccommon_consts.SYNC_STATUS_IN_SYNC
detected_alarms = fm_client.get_alarms_by_ids(KUBE_ROOTCA_ALARM_LIST)
if detected_alarms:
for alarm in detected_alarms:
if alarm.entity_instance_id in MONITORED_ALARM_ENTITIES:
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
break
return dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
return dccommon_consts.SYNC_STATUS_IN_SYNC
LOG.info(
f'{self.audit_type} audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
)
return sync_status
def subcloud_audit_cert_based(self, subcloud_name,
subcloud_cert_data, regionone_rootca_certid):
@staticmethod
def subcloud_rootca_audit_cert_based(
subcloud_cert_data: object,
regionone_rootca_certid: str,
subcloud_name: str = None,
):
"""Audit if a subcloud's k8s root CA cert is the same as the central
:param subcloud_name: the name of the subcloud
:param regionone_rootca_certid: the cert ID of the region one
:param subcloud_cert: subcloud's cert info
:param subcloud_name: the name of the subcloud
:return: sync status of the subcloud certificate
"""
sync_status = dccommon_consts.SYNC_STATUS_IN_SYNC
if subcloud_cert_data.error:
LOG.exception("Failed to get Kubernetes root CA cert id for "
f"subcloud:{subcloud_name}, error: "
f"{subcloud_cert_data.error}, skip {self.audit_type} "
"audit.")
msg = (
"Failed to get Kubernetes root CA cert id, error: "
f"{subcloud_cert_data.error}, skip {AUDIT_TYPE} audit."
)
log_subcloud_msg(LOG.error, msg, subcloud_name)
return None
elif subcloud_cert_data.cert_id != regionone_rootca_certid:
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
out_of_sync = subcloud_cert_data.cert_id != regionone_rootca_certid
LOG.info(
f'{self.audit_type} audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
return (
dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
if out_of_sync
else dccommon_consts.SYNC_STATUS_IN_SYNC
)
return sync_status

View File

@@ -20,6 +20,7 @@ from oslo_log import log as logging
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.utils import log_subcloud_msg
from dcmanager.common import utils
@@ -34,9 +35,9 @@ class KubernetesAuditData(object):
def to_dict(self):
return {
'target': self.target,
'version': self.version,
'state': self.state,
"target": self.target,
"version": self.version,
"state": self.state,
}
@classmethod
@@ -49,9 +50,8 @@ class KubernetesAuditData(object):
class KubernetesAudit(object):
"""Manages tasks related to kubernetes audits."""
def __init__(self, context):
LOG.debug('KubernetesAudit initialization...')
self.context = context
def __init__(self):
LOG.debug("KubernetesAudit initialization...")
self.audit_count = 0
def get_regionone_audit_data(self):
@@ -66,78 +66,116 @@ class KubernetesAudit(object):
region_clients=None,
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips,
).keystone_client
endpoint = m_os_ks_client.endpoint_cache.get_endpoint('sysinv')
endpoint = m_os_ks_client.endpoint_cache.get_endpoint("sysinv")
sysinv_client = SysinvClient(
dccommon_consts.DEFAULT_REGION_NAME, m_os_ks_client.session,
endpoint=endpoint)
dccommon_consts.DEFAULT_REGION_NAME,
m_os_ks_client.session,
endpoint=endpoint,
)
except Exception:
LOG.exception('Failed init OS Client, skip kubernetes audit.')
LOG.exception("Failed init OS Client, skip kubernetes audit.")
return None
region_one_data = []
regionone_data = []
regionone_kube_version = None
results_list = sysinv_client.get_kube_versions()
for result in results_list:
region_one_data.append(KubernetesAuditData(result.target,
result.version,
result.state))
LOG.debug("RegionOne kubernetes versions: %s" % region_one_data)
return region_one_data
def subcloud_kubernetes_audit(
self, sysinv_client, subcloud_name, audit_data
):
LOG.info('Triggered kubernetes audit for: %s' % subcloud_name)
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
if not audit_data:
sync_status = dccommon_consts.SYNC_STATUS_IN_SYNC
LOG.info(
f'Kubernetes audit skipped for: {subcloud_name}. There is no audit '
f'data, requesting sync_status update to {sync_status}'
regionone_data.append(
KubernetesAuditData(result.target, result.version, result.state)
)
if result.target and result.state == "active":
regionone_kube_version = result.version
LOG.debug(f"RegionOne kubernetes versions: {regionone_data}")
return regionone_kube_version
return sync_status
@classmethod
def get_subcloud_audit_data(
cls, sysinv_client: SysinvClient, subcloud_name: str = None
):
subcloud_kube_upgrades = None
subcloud_kubernetes_versions = None
skip_audit = 2 * [dccommon_consts.SKIP_AUDIT]
try:
subcloud_kube_upgrades = sysinv_client.get_kube_upgrades()
except Exception:
msg = "Failed to get kubernetes upgrades, skip kubernetes audit."
log_subcloud_msg(LOG.exception, msg, subcloud_name)
return skip_audit
# If there is a kubernetes upgrade operation in the subcloud,
# the subcloud can immediately be flagged as out of sync
if subcloud_kube_upgrades and len(subcloud_kube_upgrades) > 0:
return subcloud_kube_upgrades, None
try:
subcloud_kubernetes_versions = sysinv_client.get_kube_versions()
except Exception:
msg = "Failed to get kubernetes versions, skip kubernetes audit."
log_subcloud_msg(LOG.exception, msg, subcloud_name)
return skip_audit
return None, subcloud_kubernetes_versions
@classmethod
def get_subcloud_sync_status(
cls,
sysinv_client: SysinvClient,
region_one_version: str,
subcloud_name: str = None,
):
# Retrieve kubernetes info for this subcloud
# state - active, partial, available
# active - true / false
# version - any value ex: v1.18.1
# Find the target=true state=active version on system controller
# The audit_data for region one is a dictionary
region_one_version = None
for result in audit_data:
# audit_data will be a dict from passing through RPC, so objectify
result = KubernetesAuditData.from_dict(result)
if result.target and result.state == 'active':
region_one_version = result.version
break
if region_one_version is None:
LOG.info("No active target version found in region one audit data")
return None
# if there is a kubernetes upgrade operation in the subcloud,
# the subcloud can immediately be flagged as out of sync
subcloud_kube_upgrades = sysinv_client.get_kube_upgrades()
if len(subcloud_kube_upgrades) > 0:
# We are out of sync
LOG.debug('Existing Kubernetes upgrade exists for:(%s)'
% subcloud_name)
else:
# We will consider it out of sync even for 'partial' state
# The audit data for subcloud_results is an object not a dictionary
subcloud_results = sysinv_client.get_kube_versions()
for result in subcloud_results:
if result.target and result.state == 'active':
subcloud_version = result.version
if subcloud_version == region_one_version:
sync_status = dccommon_consts.SYNC_STATUS_IN_SYNC
break
LOG.info(
f'Kubernetes audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
subcloud_kube_upgrades, subcloud_kubernetes_versions = (
cls.get_subcloud_audit_data(sysinv_client, subcloud_name)
)
return sync_status
if dccommon_consts.SKIP_AUDIT in [
subcloud_kube_upgrades,
subcloud_kubernetes_versions,
]:
return None
elif subcloud_kube_upgrades and len(subcloud_kube_upgrades) > 0:
# If there is a kubernetes upgrade operation in the subcloud,
# the subcloud can immediately be flagged as out of sync
msg = "Kubernetes upgrade exists"
log_subcloud_msg(LOG.debug, msg, subcloud_name)
return dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
# We will consider it out of sync even for 'partial' state
for result in subcloud_kubernetes_versions:
if (
result.target
and result.state == "active"
and result.version == region_one_version
):
return dccommon_consts.SYNC_STATUS_IN_SYNC
return dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
def subcloud_kubernetes_audit(
self,
sysinv_client: SysinvClient,
subcloud_name: str,
regionone_audit_data: dict,
):
LOG.info(f"Triggered kubernetes audit for: {subcloud_name}")
if not regionone_audit_data:
LOG.debug(
"No active target version found in region one audit data, "
"exiting kubernetes audit"
)
return dccommon_consts.SYNC_STATUS_IN_SYNC
sync_status = self.get_subcloud_sync_status(
sysinv_client, regionone_audit_data, subcloud_name
)
if sync_status:
LOG.info(
f'Kubernetes audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
)
return sync_status

View File

@@ -8,9 +8,13 @@ from keystoneauth1 import exceptions as keystone_exceptions
from oslo_log import log as logging
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.keystone_v3 import (
KeystoneClient as ks_client
)
from dccommon.drivers.openstack import sdk_platform
from dccommon.drivers.openstack import software_v1
from dccommon.endpoint_cache import build_subcloud_endpoint
from dccommon.utils import log_subcloud_msg
from dcmanager.common import utils
LOG = logging.getLogger(__name__)
@@ -39,9 +43,8 @@ class SoftwareAuditData(object):
class SoftwareAudit(object):
"""Manages tasks related to software audits."""
def __init__(self, context):
def __init__(self):
LOG.debug("SoftwareAudit initialization...")
self.context = context
self.audit_count = 0
@staticmethod
@@ -99,9 +102,95 @@ class SoftwareAudit(object):
regionone_releases, deployed_release_ids, committed_release_ids
)
@classmethod
def get_subcloud_audit_data(
cls,
software_client: software_v1.SoftwareClient,
subcloud_name: str = None
):
# Retrieve all the releases that are present in this subcloud.
try:
subcloud_releases = software_client.list()
except Exception:
msg = "Cannot retrieve releases, skip software audit."
log_subcloud_msg(LOG.warn, msg, subcloud_name)
return dccommon_consts.SKIP_AUDIT
return subcloud_releases
@classmethod
def get_subcloud_sync_status(
cls,
software_client: software_v1.SoftwareClient,
audit_data: SoftwareAuditData,
subcloud_name: str = None
):
# Retrieve all the releases that are present in this subcloud.
subcloud_releases = cls.get_subcloud_audit_data(software_client)
if subcloud_releases == dccommon_consts.SKIP_AUDIT:
return None
msg = f"Releases: {subcloud_releases}"
log_subcloud_msg(LOG.debug, msg, subcloud_name)
sync_status = dccommon_consts.SYNC_STATUS_IN_SYNC
# audit_data will be a dict due to passing through RPC so objectify it
audit_data = SoftwareAuditData.from_dict(audit_data)
# Check that all releases in this subcloud are in the correct
# state, based on the state of the release in RegionOne. For the
# subcloud.
for release in subcloud_releases:
release_id = release.get("release_id")
if release["state"] == software_v1.DEPLOYED:
if release_id not in audit_data.deployed_release_ids:
if release_id not in audit_data.committed_release_ids:
msg = f"Release {release_id} should not be deployed."
log_subcloud_msg(LOG.debug, msg, subcloud_name)
else:
msg = f"Release {release_id} should be committed."
log_subcloud_msg(LOG.debug, msg, subcloud_name)
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
elif release["state"] == software_v1.COMMITTED:
if (
release_id not in audit_data.committed_release_ids
and release_id not in audit_data.deployed_release_ids
):
msg = f"Release {release_id} should not be committed."
log_subcloud_msg(LOG.warn, msg, subcloud_name)
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
else:
# In steady state, all releases should either be deployed
# or committed in each subcloud. Release in other
# states mean a sync is required.
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
# Check that all deployed or committed releases in RegionOne are
# present in the subcloud.
for release_id in audit_data.deployed_release_ids:
if not any(
release["release_id"] == release_id for release in subcloud_releases
):
msg = f"Release {release_id} is missing."
log_subcloud_msg(LOG.debug, msg, subcloud_name)
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
for release_id in audit_data.committed_release_ids:
if not any(
release["release_id"] == release_id for release in subcloud_releases
):
msg = f"Release {release_id} is missing."
log_subcloud_msg(LOG.debug, msg, subcloud_name)
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
return sync_status
def subcloud_software_audit(
self, keystone_client, subcloud_management_ip, subcloud_name,
subcloud_region, audit_data
self,
keystone_client: ks_client,
subcloud_management_ip: str,
subcloud_name: str,
subcloud_region: str,
audit_data: SoftwareAuditData
):
LOG.info(f"Triggered software audit for: {subcloud_name}.")
try:
@@ -123,73 +212,13 @@ class SoftwareAudit(object):
)
return None
# Retrieve all the releases that are present in this subcloud.
try:
subcloud_releases = software_client.list()
LOG.debug(f"Releases for subcloud {subcloud_name}: {subcloud_releases}")
except Exception:
LOG.warn(
f"Cannot retrieve releases for subcloud: {subcloud_name}, "
"skip software audit."
)
return None
sync_status = dccommon_consts.SYNC_STATUS_IN_SYNC
# audit_data will be a dict due to passing through RPC so objectify it
audit_data = SoftwareAuditData.from_dict(audit_data)
# Check that all releases in this subcloud are in the correct
# state, based on the state of the release in RegionOne. For the
# subcloud.
for release in subcloud_releases:
release_id = release.get("release_id")
if release["state"] == software_v1.DEPLOYED:
if release_id not in audit_data.deployed_release_ids:
if release_id not in audit_data.committed_release_ids:
LOG.debug(
f"Release {release_id} should not be deployed"
f" in {subcloud_name}."
)
else:
LOG.debug(
f"Release {release_id} should be committed "
f"in {subcloud_name}."
)
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
elif release["state"] == software_v1.COMMITTED:
if (
release_id not in audit_data.committed_release_ids
and release_id not in audit_data.deployed_release_ids
):
LOG.warn(
f"Release {release_id} should not be committed "
f"in {subcloud_name}."
)
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
else:
# In steady state, all releases should either be deployed
# or committed in each subcloud. Release in other
# states mean a sync is required.
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
# Check that all deployed or committed releases in RegionOne are
# present in the subcloud.
for release_id in audit_data.deployed_release_ids:
if not any(
release["release_id"] == release_id for release in subcloud_releases
):
LOG.debug(f"Release {release_id} missing from {subcloud_name}.")
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
for release_id in audit_data.committed_release_ids:
if not any(
release["release_id"] == release_id for release in subcloud_releases
):
LOG.debug(f"Release {release_id} missing from {subcloud_name}.")
sync_status = dccommon_consts.SYNC_STATUS_OUT_OF_SYNC
LOG.info(
f'Software audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
sync_status = self.get_subcloud_sync_status(
software_client, audit_data, subcloud_name
)
return sync_status
if sync_status:
LOG.info(
f'Software audit completed for: {subcloud_name}, requesting '
f'sync_status update to {sync_status}'
)
return sync_status

View File

@@ -95,12 +95,12 @@ class SubcloudAuditManager(manager.Manager):
self.patch_audit_count = 0
# trigger a patch audit on startup
self.patch_audit_time = 0
self.firmware_audit = firmware_audit.FirmwareAudit(self.context)
self.kubernetes_audit = kubernetes_audit.KubernetesAudit(self.context)
self.firmware_audit = firmware_audit.FirmwareAudit()
self.kubernetes_audit = kubernetes_audit.KubernetesAudit()
self.kube_rootca_update_audit = (
kube_rootca_update_audit.KubeRootcaUpdateAudit(self.context)
kube_rootca_update_audit.KubeRootcaUpdateAudit()
)
self.software_audit = software_audit.SoftwareAudit(self.context)
self.software_audit = software_audit.SoftwareAudit()
def _add_missing_endpoints(self):
# Update this flag file based on the most recent new endpoint

View File

@@ -26,6 +26,7 @@ from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon import endpoint_cache
from dcmanager.audit import alarm_aggregation
from dcmanager.audit import base_audit
from dcmanager.audit import firmware_audit
from dcmanager.audit import kube_rootca_update_audit
from dcmanager.audit import kubernetes_audit
@@ -40,6 +41,7 @@ from dcmanager.common import manager
from dcmanager.common import scheduler
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.db.sqlalchemy import models
from dcmanager.rpc import client as dcmanager_rpc_client
CONF = cfg.CONF
@@ -70,12 +72,12 @@ class SubcloudAuditWorkerManager(manager.Manager):
self.alarm_aggr = alarm_aggregation.AlarmAggregation(self.context)
# todo(abailey): refactor the design pattern for adding new audits
self.patch_audit = patch_audit.PatchAudit(self.context)
self.firmware_audit = firmware_audit.FirmwareAudit(self.context)
self.kubernetes_audit = kubernetes_audit.KubernetesAudit(self.context)
self.firmware_audit = firmware_audit.FirmwareAudit()
self.kubernetes_audit = kubernetes_audit.KubernetesAudit()
self.kube_rootca_update_audit = (
kube_rootca_update_audit.KubeRootcaUpdateAudit(self.context)
kube_rootca_update_audit.KubeRootcaUpdateAudit()
)
self.software_audit = software_audit.SoftwareAudit(self.context)
self.software_audit = software_audit.SoftwareAudit()
self.pid = os.getpid()
def audit_subclouds(self,
@@ -169,50 +171,6 @@ class SubcloudAuditWorkerManager(manager.Manager):
LOG.info('Ignoring SubcloudNotFound when attempting update'
'audit_fail_count for subcloud: %s' % subcloud.name)
@staticmethod
def _get_subcloud_availability_status(subcloud_name, sysinv_client):
"""For each subcloud, if at least one service is active in each
service of servicegroup-list then declare the subcloud online.
"""
avail_to_set = dccommon_consts.AVAILABILITY_OFFLINE
svc_groups = None
# get a list of service groups in the subcloud
try:
svc_groups = sysinv_client.get_service_groups()
except Exception as e:
LOG.warn('Cannot retrieve service groups for '
'subcloud: %s, %s' % (subcloud_name, e))
if svc_groups:
active_sgs = []
inactive_sgs = []
# Build 2 lists, 1 of active service groups,
# one with non-active.
for sg in svc_groups:
if sg.state != consts.SERVICE_GROUP_STATUS_ACTIVE:
inactive_sgs.append(sg.service_group_name)
else:
active_sgs.append(sg.service_group_name)
# Create a list of service groups that are only present
# in non-active list
inactive_only = [sg for sg in inactive_sgs if
sg not in active_sgs]
# An empty inactive only list and a non-empty active list
# means we're good to go.
if not inactive_only and active_sgs:
avail_to_set = \
dccommon_consts.AVAILABILITY_ONLINE
else:
LOG.info("Subcloud:%s has non-active "
"service groups: %s" %
(subcloud_name, inactive_only))
return avail_to_set
def _audit_subcloud_openstack_app(self, subcloud_name, sysinv_client,
openstack_installed):
openstack_installed_current = False
@@ -245,21 +203,23 @@ class SubcloudAuditWorkerManager(manager.Manager):
endpoint_type_list,
openstack_installed_current)
def _do_audit_subcloud(self,
subcloud,
update_subcloud_state,
do_audit_openstack,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
kube_rootca_update_audit_data,
software_audit_data,
do_patch_audit,
do_load_audit,
do_firmware_audit,
do_kubernetes_audit,
do_kube_rootca_update_audit,
do_software_audit):
def _do_audit_subcloud(
self,
subcloud: models.Subcloud,
update_subcloud_state: bool,
do_audit_openstack: bool,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
kube_rootca_update_audit_data,
software_audit_data,
do_patch_audit: bool,
do_load_audit: bool,
do_firmware_audit: bool,
do_kubernetes_audit: bool,
do_kube_rootca_update_audit: bool,
do_software_audit: bool,
):
audits_done = list()
failures = list()
# Do the actual subcloud audit.
@@ -297,21 +257,35 @@ class SubcloudAuditWorkerManager(manager.Manager):
LOG.debug("PID: %s, done auditing subcloud: %s." %
(self.pid, subcloud.name))
def _audit_subcloud(self,
subcloud,
update_subcloud_state,
do_audit_openstack,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
kube_rootca_update_audit_data,
software_audit_data,
do_patch_audit,
do_load_audit,
do_firmware_audit,
do_kubernetes_audit,
do_kube_rootca_update_audit,
do_software_audit):
@staticmethod
def _should_perform_additional_audit(
subcloud_management_state,
subcloud_avail_status,
first_identity_sync_complete
):
return (
subcloud_management_state == dccommon_consts.MANAGEMENT_MANAGED
and subcloud_avail_status == dccommon_consts.AVAILABILITY_ONLINE
and first_identity_sync_complete
)
def _audit_subcloud(
self,
subcloud: models.Subcloud,
update_subcloud_state: bool,
do_audit_openstack: bool,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
kube_rootca_update_audit_data,
software_audit_data,
do_patch_audit: bool,
do_load_audit: bool,
do_firmware_audit: bool,
do_kubernetes_audit: bool,
do_kube_rootca_update_audit: bool,
do_software_audit: bool,
):
"""Audit a single subcloud."""
avail_status_current = subcloud.availability_status
@@ -346,6 +320,8 @@ class SubcloudAuditWorkerManager(manager.Manager):
subcloud_region, admin_session,
endpoint=keystone_client.endpoint_cache.get_endpoint("fm")
)
# TODO(vgluzrom): Revise and improve the debug and error messages
# as well as the exception causes
except keystone_exceptions.ConnectTimeout:
if avail_status_current == dccommon_consts.AVAILABILITY_OFFLINE:
LOG.debug("Identity or Platform endpoint for %s not "
@@ -395,14 +371,16 @@ class SubcloudAuditWorkerManager(manager.Manager):
if subcloud.prestage_status in consts.STATES_FOR_ONGOING_PRESTAGE:
avail_to_set = dccommon_consts.AVAILABILITY_ONLINE
else:
avail_to_set = self._get_subcloud_availability_status(
subcloud_name, sysinv_client)
avail_to_set, _ = base_audit.get_subcloud_availability_status(
sysinv_client, subcloud_name
)
if avail_to_set == dccommon_consts.AVAILABILITY_OFFLINE:
if audit_fail_count < consts.AVAIL_FAIL_COUNT_MAX:
audit_fail_count = audit_fail_count + 1
if (avail_status_current == dccommon_consts.AVAILABILITY_ONLINE) and \
(audit_fail_count < consts.AVAIL_FAIL_COUNT_TO_ALARM):
if (avail_status_current == dccommon_consts.AVAILABILITY_ONLINE) and (
audit_fail_count < consts.AVAIL_FAIL_COUNT_TO_ALARM
):
# Do not set offline until we have failed audit
# the requisite number of times
avail_to_set = dccommon_consts.AVAILABILITY_ONLINE
@@ -446,12 +424,17 @@ class SubcloudAuditWorkerManager(manager.Manager):
# If subcloud is managed and online and the identity was synced once,
# audit additional resources
if (subcloud.management_state == dccommon_consts.MANAGEMENT_MANAGED and
avail_to_set == dccommon_consts.AVAILABILITY_ONLINE and
subcloud.first_identity_sync_complete):
if self._should_perform_additional_audit(
subcloud.management_state,
avail_to_set,
subcloud.first_identity_sync_complete,
):
# Get alarm summary and store in db,
if fm_client:
self.alarm_aggr.update_alarm_summary(subcloud_name, fm_client)
alarm_updates = self.alarm_aggr.get_alarm_summary(
fm_client, subcloud_name
)
self.alarm_aggr.update_alarm_summary(subcloud_name, alarm_updates)
failmsg = "Audit failure subcloud: %s, endpoint: %s"
@@ -465,10 +448,13 @@ class SubcloudAuditWorkerManager(manager.Manager):
patch_audit_data
)
)
audits_done.append('patch')
audits_done.append(dccommon_consts.ENDPOINT_TYPE_PATCHING)
except Exception:
LOG.exception(failmsg % (subcloud.name, 'patch'))
failures.append('patch')
LOG.exception(
failmsg
% (subcloud.name, dccommon_consts.ENDPOINT_TYPE_PATCHING)
)
failures.append(dccommon_consts.ENDPOINT_TYPE_PATCHING)
# Perform load audit
if do_load_audit and patch_audit_data:
try:
@@ -477,10 +463,13 @@ class SubcloudAuditWorkerManager(manager.Manager):
sysinv_client, subcloud_name, patch_audit_data
)
)
audits_done.append('load')
audits_done.append(dccommon_consts.ENDPOINT_TYPE_LOAD)
except Exception:
LOG.exception(failmsg % (subcloud.name, 'load'))
failures.append('load')
LOG.exception(
failmsg
% (subcloud.name, dccommon_consts.ENDPOINT_TYPE_LOAD)
)
failures.append(dccommon_consts.ENDPOINT_TYPE_LOAD)
# Perform firmware audit
if do_firmware_audit:
try:
@@ -489,10 +478,13 @@ class SubcloudAuditWorkerManager(manager.Manager):
sysinv_client, subcloud_name, firmware_audit_data
)
)
audits_done.append('firmware')
audits_done.append(dccommon_consts.ENDPOINT_TYPE_FIRMWARE)
except Exception:
LOG.exception(failmsg % (subcloud.name, 'firmware'))
failures.append('firmware')
LOG.exception(
failmsg
% (subcloud.name, dccommon_consts.ENDPOINT_TYPE_FIRMWARE)
)
failures.append(dccommon_consts.ENDPOINT_TYPE_FIRMWARE)
# Perform kubernetes audit
if do_kubernetes_audit:
try:
@@ -501,24 +493,31 @@ class SubcloudAuditWorkerManager(manager.Manager):
sysinv_client, subcloud_name, kubernetes_audit_data
)
)
audits_done.append('kubernetes')
audits_done.append(dccommon_consts.ENDPOINT_TYPE_KUBERNETES)
except Exception:
LOG.exception(failmsg % (subcloud.name, 'kubernetes'))
failures.append('kubernetes')
LOG.exception(
failmsg
% (subcloud.name, dccommon_consts.ENDPOINT_TYPE_KUBERNETES)
)
failures.append(dccommon_consts.ENDPOINT_TYPE_KUBERNETES)
# Perform kube rootca update audit
if do_kube_rootca_update_audit:
try:
endpoint_data[dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA] = (
self.kube_rootca_update_audit.subcloud_kube_rootca_audit(
sysinv_client, fm_client, subcloud,
kube_rootca_update_audit_data
sysinv_client,
fm_client,
subcloud,
kube_rootca_update_audit_data,
)
)
audits_done.append('kube-rootca-update')
audits_done.append(dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA)
except Exception:
LOG.exception(failmsg % (subcloud.name,
'kube-rootca-update'))
failures.append('kube-rootca-update')
LOG.exception(
failmsg
% (subcloud.name, dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA)
)
failures.append(dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA)
# Audit openstack application in the subcloud
if do_audit_openstack:
# We don't want an exception here to cause our
@@ -539,10 +538,13 @@ class SubcloudAuditWorkerManager(manager.Manager):
subcloud_name, subcloud_region, software_audit_data
)
)
audits_done.append('software')
audits_done.append(dccommon_consts.ENDPOINT_TYPE_SOFTWARE)
except Exception:
LOG.exception(failmsg % (subcloud.name, 'software'))
failures.append('software')
LOG.exception(
failmsg
% (subcloud.name, dccommon_consts.ENDPOINT_TYPE_SOFTWARE)
)
failures.append(dccommon_consts.ENDPOINT_TYPE_SOFTWARE)
if availability_data or (endpoint_data and any(endpoint_data.values())):
# If a value is not None, an update should be sent to the rpc client

View File

@@ -236,17 +236,17 @@ def subcloud_audits_end_audit(context, subcloud_id, audits_done):
subcloud_audits_ref.state_update_requested = False
# todo(abailey): define new constants for these audit strings
# and update subcloud_audit_worker_manager to use them as well
if 'patch' in audits_done:
if dccommon_consts.ENDPOINT_TYPE_PATCHING in audits_done:
subcloud_audits_ref.patch_audit_requested = False
if 'firmware' in audits_done:
if dccommon_consts.ENDPOINT_TYPE_FIRMWARE in audits_done:
subcloud_audits_ref.firmware_audit_requested = False
if 'load' in audits_done:
if dccommon_consts.ENDPOINT_TYPE_LOAD in audits_done:
subcloud_audits_ref.load_audit_requested = False
if 'kube-rootca-update' in audits_done:
if dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA in audits_done:
subcloud_audits_ref.kube_rootca_update_audit_requested = False
if 'kubernetes' in audits_done:
if dccommon_consts.ENDPOINT_TYPE_KUBERNETES in audits_done:
subcloud_audits_ref.kubernetes_audit_requested = False
if 'software' in audits_done:
if dccommon_consts.ENDPOINT_TYPE_SOFTWARE in audits_done:
subcloud_audits_ref.spare_audit_requested = False
subcloud_audits_ref.save(session)
return subcloud_audits_ref

View File

@@ -110,7 +110,10 @@ class TestAlarmAggregation(base.DCManagerTestCase):
fake_openstackdriver = FakeOpenStackDriver('subcloud1')
db_api.subcloud_alarms_create(self.ctx, 'subcloud1', values={})
aam.update_alarm_summary('subcloud1', fake_openstackdriver.fm_client)
alarms_summary = aam.get_alarm_summary(
fake_openstackdriver.fm_client, 'subcloud1'
)
aam.update_alarm_summary('subcloud1', alarms_summary)
alarms = db_api.subcloud_alarms_get(self.ctx, 'subcloud1')
self.assertEqual(self.alarms_to_dict(alarms),
{'critical_alarms': 1,
@@ -122,7 +125,10 @@ class TestAlarmAggregation(base.DCManagerTestCase):
fake_openstackdriver = FakeOpenStackDriver('subcloud2')
db_api.subcloud_alarms_create(self.ctx, 'subcloud2', values={})
aam.update_alarm_summary('subcloud2', fake_openstackdriver.fm_client)
alarms_summary = aam.get_alarm_summary(
fake_openstackdriver.fm_client, 'subcloud2'
)
aam.update_alarm_summary('subcloud2', alarms_summary)
alarms = db_api.subcloud_alarms_get(self.ctx, 'subcloud2')
self.assertEqual(self.alarms_to_dict(alarms),
{'critical_alarms': 0,
@@ -134,7 +140,10 @@ class TestAlarmAggregation(base.DCManagerTestCase):
fake_openstackdriver = FakeOpenStackDriver('subcloud3')
db_api.subcloud_alarms_create(self.ctx, 'subcloud3', values={})
aam.update_alarm_summary('subcloud3', fake_openstackdriver.fm_client)
alarms_summary = aam.get_alarm_summary(
fake_openstackdriver.fm_client, 'subcloud3'
)
aam.update_alarm_summary('subcloud3', alarms_summary)
alarms = db_api.subcloud_alarms_get(self.ctx, 'subcloud3')
self.assertEqual(self.alarms_to_dict(alarms),
{'critical_alarms': 0,
@@ -145,7 +154,7 @@ class TestAlarmAggregation(base.DCManagerTestCase):
)
fake_openstackdriver = FakeOpenStackDriver('subcloud4')
aam.update_alarm_summary('subcloud4', fake_openstackdriver.fm_client)
mock_logging.error.assert_called_with('Failed to update alarms for '
'subcloud4 error: Subcloud with '
'id subcloud4 doesn\'t exist.')
aam.update_alarm_summary('subcloud4', alarms_summary)
mock_logging.error.assert_called_with("Failed to update alarms for "
"subcloud4. Error: Subcloud with "
"name subcloud4 doesn't exist.")

View File

@@ -40,13 +40,21 @@ class PCIDevice(object):
class DeviceImage(object):
def __init__(self, bitstream_type,
bitstream_id, bmc,
retimer_included,
key_signature,
revoke_key_id, applied,
pci_vendor, pci_device,
applied_labels):
def __init__(
self,
uuid,
bitstream_type,
bitstream_id,
bmc,
retimer_included,
key_signature,
revoke_key_id,
applied,
pci_vendor,
pci_device,
applied_labels,
):
self.uuid = uuid
self.bitstream_type = bitstream_type
self.bitstream_id = bitstream_id
self.bmc = bmc
@@ -117,40 +125,49 @@ PCI_DEVICE4 = PCIDevice('06789e01-13b6-2347',
True)
# Device image has been applied
DEVICE_IMAGE1 = DeviceImage('functional',
'0x2383a62a010504',
True,
True,
'',
'',
True,
'1111',
'2222',
[{}])
DEVICE_IMAGE1 = DeviceImage(
"7e794693-2060-4e9e-b0bd-b281b059e8e4",
"functional",
"0x2383a62a010504",
True,
True,
"",
"",
True,
"1111",
"2222",
[{}],
)
# Device image has not been applied
DEVICE_IMAGE2 = DeviceImage('functional',
'0x2383a62a010504',
True,
True,
'',
'',
False,
'1111',
'2222',
[{}])
DEVICE_IMAGE2 = DeviceImage(
"09100124-5ae9-44d8-aefc-a192b8f27360",
"functional",
"0x2383a62a010504",
True,
True,
"",
"",
False,
"1111",
"2222",
[{}],
)
# Device image has been applied
DEVICE_IMAGE3 = DeviceImage('functional',
'0x2383a62a010504',
True,
True,
'',
'',
True,
'1111',
'2222',
[{"key1": "value1"}])
DEVICE_IMAGE3 = DeviceImage(
"ef4c39b1-81e9-42dd-b850-06fc8833b47c",
"functional",
"0x2383a62a010504",
True,
True,
"",
"",
True,
"1111",
"2222",
[{"key1": "value1"}],
)
DEVICE_LABEL1 = DeviceLabels('06789e01-13b6-2347',
'key1',
@@ -183,6 +200,9 @@ class FakeSysinvClientNoEnabledDevices(object):
def get_host_device_list(self, host_name):
return self.pci_devices
def get_all_hosts_device_list(self):
return self.pci_devices
def get_device_images(self):
return self.device_images
@@ -198,6 +218,9 @@ class FakeSysinvClientNoAuditData(object):
def get_host_device_list(self, host_name):
return self.pci_devices
def get_all_hosts_device_list(self):
return self.pci_devices
def get_device_images(self):
return self.device_images
@@ -220,6 +243,9 @@ class FakeSysinvClientImageWithoutLabels(object):
def get_host_device_list(self, host_name):
return self.pci_devices
def get_all_hosts_device_list(self):
return self.pci_devices
def get_device_image(self, device_image_uuid):
return self.device_image
@@ -252,6 +278,9 @@ class FakeSysinvClientImageNotApplied(object):
def get_host_device_list(self, host_name):
return self.pci_devices
def get_all_hosts_device_list(self):
return self.pci_devices
def get_device_images(self):
return self.device_images
@@ -279,6 +308,9 @@ class FakeSysinvClientImageNotWritten(object):
def get_host_device_list(self, host_name):
return self.pci_devices
def get_all_hosts_device_list(self):
return self.pci_devices
def get_device_images(self):
return self.device_images
@@ -307,6 +339,9 @@ class FakeSysinvClientImageWithLabels(object):
def get_host_device_list(self, host_name):
return self.pci_devices
def get_all_hosts_device_list(self):
return self.pci_devices
def get_device_image(self, device_image_uuid):
return self.device_image
@@ -338,6 +373,9 @@ class FakeSysinvClientNoMatchingDeviceLabel(object):
def get_host_device_list(self, host_name):
return self.pci_devices
def get_all_hosts_device_list(self):
return self.pci_devices
def get_device_images(self):
return self.device_images
@@ -365,6 +403,9 @@ class FakeSysinvClientNoMatchingDeviceId(object):
def get_host_device_list(self, host_name):
return self.pci_devices
def get_all_hosts_device_list(self):
return self.pci_devices
def get_device_images(self):
return self.device_images
@@ -385,7 +426,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
self.mock_subcloud_audit_manager_context.get_admin_context.\
return_value = self.ctx
self.fm = firmware_audit.FirmwareAudit(self.ctx)
self.fm = firmware_audit.FirmwareAudit()
self.am = subcloud_audit_manager.SubcloudAuditManager()
self.am.firmware_audit = self.fm

View File

@@ -16,8 +16,6 @@
import uuid
import mock
from dccommon import consts as dccommon_consts
from dcmanager.audit import kubernetes_audit
from dcmanager.audit import subcloud_audit_manager
@@ -43,9 +41,6 @@ class FakeKubeVersion(object):
self.applied_patches = []
self.available_patches = []
def to_dict(self):
return dict(self.__dict__)
class FakeKubeUpgrade(object):
def __init__(self):
@@ -72,27 +67,16 @@ class TestKubernetesAudit(base.DCManagerTestCase):
# Set the kube upgrade objects as being empty for all regions
self.kube_sysinv_client().get_kube_upgrades.return_value = []
self.audit = kubernetes_audit.KubernetesAudit(self.ctx)
self.audit = kubernetes_audit.KubernetesAudit()
self.am = subcloud_audit_manager.SubcloudAuditManager()
self.am.kubernetes_audit = self.audit
def _rpc_convert(self, object_list):
# Convert to dict like what would happen calling via RPC
dict_results = []
for result in object_list:
dict_results.append(result.to_dict())
return dict_results
def get_kube_audit_data(self):
(_, _, kubernetes_audit_data, _, _) = \
self.am._get_audit_data(True, True, True, True, True)
# Convert to dict like what would happen calling via RPC
kubernetes_audit_data = self._rpc_convert(kubernetes_audit_data)
return kubernetes_audit_data
@mock.patch.object(subcloud_audit_manager, 'context')
def test_no_kubernetes_audit_data_to_sync(self, mock_context):
mock_context.get_admin_context.return_value = self.ctx
def test_no_kubernetes_audit_data_to_sync(self):
kubernetes_audit_data = self.get_kube_audit_data()
subclouds = {base.SUBCLOUD_1['name']: base.SUBCLOUD_1['region_name'],
@@ -104,9 +88,7 @@ class TestKubernetesAudit(base.DCManagerTestCase):
self.assertEqual(response, dccommon_consts.SYNC_STATUS_IN_SYNC)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_kubernetes_audit_data_out_of_sync_older(self, mock_context):
mock_context.get_admin_context.return_value = self.ctx
def test_kubernetes_audit_data_out_of_sync_older(self):
# Set the region one data as being the upgraded version
self.kube_sysinv_client().get_kube_versions.return_value = [
@@ -127,9 +109,7 @@ class TestKubernetesAudit(base.DCManagerTestCase):
self.assertEqual(response, dccommon_consts.SYNC_STATUS_OUT_OF_SYNC)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_kubernetes_audit_data_out_of_sync_newer(self, mock_context):
mock_context.get_admin_context.return_value = self.ctx
def test_kubernetes_audit_data_out_of_sync_newer(self):
# Set the region one data as being the previous version
self.kube_sysinv_client().get_kube_versions.return_value = [
@@ -150,10 +130,7 @@ class TestKubernetesAudit(base.DCManagerTestCase):
self.assertEqual(response, dccommon_consts.SYNC_STATUS_OUT_OF_SYNC)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_kubernetes_audit_data_in_sync(self,
mock_context):
mock_context.get_admin_context.return_value = self.ctx
def test_kubernetes_audit_data_in_sync(self):
# Set the region one data as being the upgraded version
self.kube_sysinv_client().get_kube_versions.return_value = [
@@ -177,12 +154,9 @@ class TestKubernetesAudit(base.DCManagerTestCase):
self.assertEqual(response, dccommon_consts.SYNC_STATUS_IN_SYNC)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_kubernetes_audit_data_in_sync_but_existing_upgrade(self,
mock_context):
def test_kubernetes_audit_data_in_sync_but_existing_upgrade(self):
# If a subcloud has an existing upgrade, it is out of sync
# even if the kube versions match
mock_context.get_admin_context.return_value = self.ctx
# mock that there is a kube upgrade (only queried in subclouds)
self.kube_sysinv_client().get_kube_upgrades.return_value = [

View File

@@ -26,7 +26,6 @@ class FakeSubcloudObj(object):
def __init__(self, subcloud_dict):
self.name = subcloud_dict['name']
self.region_name = subcloud_dict['region_name']
self.rehomed = subcloud_dict['rehomed']
self.software_version = subcloud_dict['software_version']
@@ -53,7 +52,7 @@ class TestKubeRootcaUpdateAudit(base.DCManagerTestCase):
self.mock_subcloud_audit_manager_context.\
get_admin_context.return_value = self.ctx
self.audit = kube_rootca_update_audit.KubeRootcaUpdateAudit(self.ctx)
self.audit = kube_rootca_update_audit.KubeRootcaUpdateAudit()
self.am = SubcloudAuditManager()
self.am.kube_rootca_update_audit = self.audit
@@ -74,7 +73,9 @@ class TestKubeRootcaUpdateAudit(base.DCManagerTestCase):
subcloud = FakeSubcloudObj(subcloud_dict)
response = self.audit.subcloud_kube_rootca_audit(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud,
self.mock_sysinv_client(),
self.mock_fm_client(),
subcloud,
kube_rootca_update_audit_data
)
@@ -97,7 +98,9 @@ class TestKubeRootcaUpdateAudit(base.DCManagerTestCase):
True, FakeKubeRootcaData("cert1", "")
response = self.audit.subcloud_kube_rootca_audit(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud,
self.mock_sysinv_client(),
self.mock_fm_client(),
subcloud,
kube_rootca_update_audit_data
)
@@ -119,7 +122,9 @@ class TestKubeRootcaUpdateAudit(base.DCManagerTestCase):
self.mock_sysinv_client().get_kube_rootca_cert_id.return_value = \
True, FakeKubeRootcaData("cert2", "")
response = self.audit.subcloud_kube_rootca_audit(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud,
self.mock_sysinv_client(),
self.mock_fm_client(),
subcloud,
kube_rootca_update_audit_data
)
@@ -143,7 +148,9 @@ class TestKubeRootcaUpdateAudit(base.DCManagerTestCase):
self.mock_fm_client().get_alarms_by_ids.return_value = None
response = self.audit.subcloud_kube_rootca_audit(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud,
self.mock_sysinv_client(),
self.mock_fm_client(),
subcloud,
kube_rootca_update_audit_data
)
@@ -168,13 +175,15 @@ class TestKubeRootcaUpdateAudit(base.DCManagerTestCase):
[FakeAlarm('system.certificate.kubernetes-root-ca'), ]
response = self.audit.subcloud_kube_rootca_audit(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud,
self.mock_sysinv_client(),
self.mock_fm_client(),
subcloud,
kube_rootca_update_audit_data
)
self.assertEqual(response, dccommon_consts.SYNC_STATUS_OUT_OF_SYNC)
def test_kube_rootca_update_audit_in_sync_old_release(self):
def test_kube_rootca_update_audit_fail_to_get_audit_data(self):
# Set the region one data
self.mock_region_one_sysinv_client().get_kube_rootca_cert_id.return_value = \
True, FakeKubeRootcaData("cert1", "")
@@ -185,81 +194,14 @@ class TestKubeRootcaUpdateAudit(base.DCManagerTestCase):
subcloud = FakeSubcloudObj(subcloud_dict)
# return API cert ID request failed
self.mock_region_one_sysinv_client().get_kube_rootca_cert_id.\
return_value = False, None
self.mock_fm_client().get_alarms_by_ids.return_value = None
self.mock_sysinv_client().get_kube_rootca_cert_id.\
return_value = base.FakeException("API cert ID request failed")
response = self.audit.subcloud_kube_rootca_audit(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud,
self.mock_sysinv_client(),
self.mock_fm_client(),
subcloud,
kube_rootca_update_audit_data
)
self.assertEqual(response, dccommon_consts.SYNC_STATUS_IN_SYNC)
def test_kube_rootca_update_audit_out_of_sync_old_release(self):
# Set the region one data
self.mock_region_one_sysinv_client().get_kube_rootca_cert_id.return_value = \
True, FakeKubeRootcaData("cert1", "")
kube_rootca_update_audit_data = self.get_rootca_audit_data()
subclouds = [base.SUBCLOUD_3, base.SUBCLOUD_4]
for subcloud_dict in subclouds:
subcloud = FakeSubcloudObj(subcloud_dict)
# return API cert ID request failed
self.mock_region_one_sysinv_client().get_kube_rootca_cert_id.\
return_value = False, None
self.mock_fm_client().get_alarms_by_ids.return_value = \
[FakeAlarm('system.certificate.kubernetes-root-ca'), ]
response = self.audit.subcloud_kube_rootca_audit(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud,
kube_rootca_update_audit_data
)
self.assertEqual(response, dccommon_consts.SYNC_STATUS_OUT_OF_SYNC)
def test_kube_rootca_update_audit_in_sync_not_rehomed(self):
# Set the region one data
self.mock_region_one_sysinv_client().get_kube_rootca_cert_id.return_value = \
True, FakeKubeRootcaData("cert1", "")
kube_rootca_update_audit_data = self.get_rootca_audit_data()
subclouds = [base.SUBCLOUD_5, base.SUBCLOUD_6]
for subcloud_dict in subclouds:
subcloud = FakeSubcloudObj(subcloud_dict)
# return API cert ID request failed
self.mock_region_one_sysinv_client().get_kube_rootca_cert_id.\
return_value = False, None
self.mock_fm_client().get_alarms_by_ids.return_value = None
response = self.audit.subcloud_kube_rootca_audit(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud,
kube_rootca_update_audit_data
)
self.assertEqual(response, dccommon_consts.SYNC_STATUS_IN_SYNC)
def test_kube_rootca_update_audit_out_of_sync_not_rehomed(self):
# Set the region one data
self.mock_region_one_sysinv_client().get_kube_rootca_cert_id.return_value = \
True, FakeKubeRootcaData("cert1", "")
kube_rootca_update_audit_data = self.get_rootca_audit_data()
subclouds = [base.SUBCLOUD_5, base.SUBCLOUD_6]
for subcloud_dict in subclouds:
subcloud = FakeSubcloudObj(subcloud_dict)
# return API cert ID request failed
self.mock_region_one_sysinv_client().get_kube_rootca_cert_id.\
return_value = False, None
self.mock_fm_client().get_alarms_by_ids.return_value = \
[FakeAlarm('system.certificate.kubernetes-root-ca'), ]
response = self.audit.subcloud_kube_rootca_audit(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud,
kube_rootca_update_audit_data
)
self.assertEqual(response, dccommon_consts.SYNC_STATUS_OUT_OF_SYNC)
self.assertEqual(response, None)

View File

@@ -47,6 +47,7 @@ class FakeAuditWorkerAPI(object):
class FakeAlarmAggregation(object):
def __init__(self):
self.get_alarm_summary = mock.MagicMock()
self.update_alarm_summary = mock.MagicMock()
@@ -494,8 +495,12 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
assert_not_called()
# Verify alarm update is called
self.fake_alarm_aggr.get_alarm_summary.assert_called_with(
self.mock_fm_client(), subcloud.name
)
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
subcloud.name, self.mock_fm_client())
subcloud.name, self.fake_alarm_aggr.get_alarm_summary.return_value
)
# Verify patch audit is called
self.fake_patch_audit.subcloud_patch_audit.assert_called_with(
@@ -517,7 +522,9 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify kube rootca update audit is called
self.fake_kube_rootca_update_audit.subcloud_kube_rootca_audit.\
assert_called_with(
mock.ANY, self.mock_fm_client(), subcloud,
self.mock_sysinv_client(),
self.mock_fm_client(),
subcloud,
kube_rootca_update_audit_data
)
@@ -593,6 +600,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.get_alarm_summary.assert_not_called()
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
@@ -669,6 +677,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.get_alarm_summary.assert_not_called()
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
@@ -726,6 +735,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.get_alarm_summary.assert_not_called()
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
@@ -779,6 +789,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.get_alarm_summary.assert_not_called()
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
@@ -851,8 +862,12 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_software_audit=do_software_audit)
# Verify alarm update is called once
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
subcloud.name, self.mock_fm_client())
self.fake_alarm_aggr.get_alarm_summary.assert_called_with(
self.mock_fm_client(), subcloud.name
)
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
subcloud.name, self.fake_alarm_aggr.get_alarm_summary.return_value
)
# Verify patch audit is called once
self.fake_patch_audit.subcloud_patch_audit.assert_called_once_with(
@@ -871,7 +886,9 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify kube rootca update audit is called once
self.fake_kube_rootca_update_audit.subcloud_kube_rootca_audit.\
assert_called_once_with(mock.ANY, mock.ANY, subcloud, mock.ANY)
assert_called_once_with(
self.mock_sysinv_client(), self.mock_fm_client(), subcloud, mock.ANY
)
# Verify the audit fail count was updated in db
audit_fail_count = 1
@@ -922,6 +939,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
)
# Verify alarm update is called only once
self.fake_alarm_aggr.get_alarm_summary.assert_called_once()
self.fake_alarm_aggr.update_alarm_summary.assert_called_once()
# Verify patch audit is called only once
@@ -1003,6 +1021,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.get_alarm_summary.assert_not_called()
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
@@ -1182,6 +1201,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.get_alarm_summary.assert_not_called()
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
@@ -1262,8 +1282,12 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# True)
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
'subcloud1', self.mock_fm_client())
self.fake_alarm_aggr.get_alarm_summary.assert_called_with(
self.mock_fm_client(), 'subcloud1'
)
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
'subcloud1', self.fake_alarm_aggr.get_alarm_summary.return_value
)
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
@@ -1329,8 +1353,12 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
dccommon_consts.ENDPOINT_TYPES_LIST_OS, False)
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
subcloud.name, self.mock_fm_client())
self.fake_alarm_aggr.get_alarm_summary.assert_called_with(
self.mock_fm_client(), subcloud.name
)
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
subcloud.name, self.fake_alarm_aggr.get_alarm_summary.return_value
)
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
@@ -1396,8 +1424,12 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
dccommon_consts.ENDPOINT_TYPES_LIST_OS, False)
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
'subcloud1', self.mock_fm_client())
self.fake_alarm_aggr.get_alarm_summary.assert_called_with(
self.mock_fm_client(), 'subcloud1'
)
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
'subcloud1', self.fake_alarm_aggr.get_alarm_summary.return_value
)
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()