From a26b4b40501d517eb1aeebc7d0af6cc13623fe94 Mon Sep 17 00:00:00 2001 From: Karla Felix Date: Thu, 16 Feb 2023 10:54:20 -0300 Subject: [PATCH] Check for cert alarms in health-query-upgrade This task will adapt existing implementation to run full certificate expiration audit in "health-query-upgrade" and return fail in _check_alarms in case of existence of any cert alarm in the system. Both "expiring soon" and "expired" alarms will block upgrades, but can be skipped with the use of the force flag. This change will also add a information about certificate expiration alarms to the line related to existing alarms of the output in "health-query-upgrade". Note: Now that 'keystone_opt_group' is used for both cert_alarm and health.py, the variable 'keystone_authtoken' had to be changed to 'KEYSTONE_AUTHTOKEN' to match with the key that is used by the CONF object from health.py which is configured as uppercase in line 118 of openstack.py. Test Plan: PASS: Run "health-query-upgrade" with one or more 'expiring soon'or 'expired' alarms and verify that a message is show in 'health-query-upgrade' output saying that there is certificate expiration alarms. PASS: Run 'health-query-upgrade' with no active certificate alarm and verify that no certificate alarms were shown in the output of 'health-query-upgrade'. PASS: Run 'system upgrade-start' with the --force flag with one or more certificate alarms and verify that the upgrade can be started normally. PASS: Add a new certificate with expiry date of less than 30 days and run 'health-query-upgrade' before the scheduled full audit runs and check if the alarm was created and detected by 'health-query-upgrade'. PASS: Delete secret from a certificate that is monitored by cert-mon and check if cert-mon was able to reinstall the secret to the filesystem. Task: 47478 Story: 2009303 Signed-off-by: Karla Felix Change-Id: Iaba585b6ecd7f63e0ed186f87c7274c4b9778889 --- sysinv/sysinv/sysinv/sysinv/cert_mon/utils.py | 22 ++++++++++------- .../sysinv/sysinv/sysinv/common/constants.py | 3 +++ sysinv/sysinv/sysinv/sysinv/common/health.py | 24 ++++++++++++++----- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/sysinv/sysinv/sysinv/sysinv/cert_mon/utils.py b/sysinv/sysinv/sysinv/sysinv/cert_mon/utils.py index 04cb901af8..af24bf3066 100644 --- a/sysinv/sysinv/sysinv/sysinv/cert_mon/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/cert_mon/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022 Wind River Systems, Inc. +# Copyright (c) 2020-2023 Wind River Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -354,14 +354,18 @@ def get_system(token, method, api_cmd, api_cmd_headers=None, def get_token(): """Get token for the sysinv user.""" + + keystone_conf = CONF.get('KEYSTONE_AUTHTOKEN') + token = _get_token( - CONF.keystone_authtoken.auth_url + '/v3/auth/tokens', - CONF.keystone_authtoken.project_name, - CONF.keystone_authtoken.username, - CONF.keystone_authtoken.password, - CONF.keystone_authtoken.user_domain_name, - CONF.keystone_authtoken.project_domain_name, - CONF.keystone_authtoken.region_name) + keystone_conf.auth_url + '/v3/auth/tokens', + keystone_conf.project_name, + keystone_conf.username, + keystone_conf.password, + keystone_conf.user_domain_name, + keystone_conf.project_domain_name, + keystone_conf.region_name) + return token @@ -466,7 +470,7 @@ def init_keystone_auth_opts(): help='Authorization url') ] - keystone_opt_group = cfg.OptGroup(name='keystone_authtoken', + keystone_opt_group = cfg.OptGroup(name='KEYSTONE_AUTHTOKEN', title='Keystone options') cfg.CONF.register_opts(keystone_opts, group=keystone_opt_group.name) diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index 92d4b0b174..3148d32802 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -2241,6 +2241,9 @@ CERT_LOCATION_MAP = { # TODO(): TPM } +# Certificates alarm id +CERT_ALARM_IDS = ['500.200', '500.210'] + CERT_ALARM_ANNOTATION_ALARM = 'starlingx.io/alarm' CERT_ALARM_ANNOTATION_ALARM_BEFORE = 'starlingx.io/alarm-before' CERT_ALARM_ANNOTATION_ALARM_SEVERITY = 'starlingx.io/alarm-severity' diff --git a/sysinv/sysinv/sysinv/sysinv/common/health.py b/sysinv/sysinv/sysinv/sysinv/common/health.py index 6c541bf4a1..8598094821 100755 --- a/sysinv/sysinv/sysinv/sysinv/common/health.py +++ b/sysinv/sysinv/sysinv/sysinv/common/health.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018-2022 Wind River Systems, Inc. +# Copyright (c) 2018-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -18,6 +18,7 @@ from sysinv.common import kubernetes from sysinv.common import utils from sysinv.common.fm import fmclient from sysinv.common.storage_backend_conf import StorageBackendConfig +from sysinv.cert_alarm.audit import CertAlarmAudit from sysinv.api.controllers.v1 import patch_api from sysinv.api.controllers.v1 import vim_api @@ -37,6 +38,7 @@ class Health(object): self._dbapi = dbapi self._ceph = ceph.CephApiOperator() self._kube_operator = kubernetes.KubeOperator() + self._cert_alarm_manager = CertAlarmAudit() def _check_hosts_provisioned(self, hosts): """Checks that each host is provisioned""" @@ -105,16 +107,24 @@ class Health(object): if alarm_ignore_list is None: alarm_ignore_list = [] + self._cert_alarm_manager.run_full_audit() + alarms = fmclient(context).alarm.list(include_suppress=True) success = True allowed = 0 affecting = 0 + cert_alarm = 0 + # Separate alarms that are mgmt affecting for alarm in alarms: if alarm.alarm_id not in alarm_ignore_list: mgmt_affecting = alarm.mgmt_affecting == "True" - if not mgmt_affecting: + if alarm.alarm_id in constants.CERT_ALARM_IDS: + cert_alarm += 1 + if not force: + success = False + elif not mgmt_affecting: allowed += 1 if not force: success = False @@ -122,7 +132,7 @@ class Health(object): affecting += 1 success = False - return success, allowed, affecting + return success, allowed, affecting, cert_alarm def _check_active_is_controller_0(self): """Checks that active controller is controller-0""" @@ -412,15 +422,17 @@ class Health(object): health_ok = health_ok and success - success, allowed, affecting = self._check_alarms( + success, allowed, affecting, cert_alarm = self._check_alarms( context, force=force, alarm_ignore_list=alarm_ignore_list) output += _('No alarms: [%s]\n') \ % (Health.SUCCESS_MSG if success else Health.FAIL_MSG) if not success: - output += _('[%s] alarms found, [%s] of which are management ' - 'affecting\n') % (allowed + affecting, affecting) + output += _('[%s] alarms found, [%s] of which are management affecting ' + 'and [%s] are certificate expiration alarms. ' + 'Use "fm alarm-list" for details') % (allowed + affecting + cert_alarm, + affecting, cert_alarm) health_ok = health_ok and success