Check for cert alarms in health-query-upgrade

This task will adapt existing implementation to run full certificate
expiration audit in "health-query-upgrade" and return fail in
_check_alarms in case of existence of any cert alarm in the system.
Both "expiring soon" and "expired" alarms will block upgrades, but
can be skipped with the use of the force flag. This change will also
add a information about certificate expiration alarms to the line
related to existing alarms of the output in "health-query-upgrade".

Note: Now that 'keystone_opt_group' is used for both cert_alarm and
      health.py, the variable 'keystone_authtoken' had to be changed
      to 'KEYSTONE_AUTHTOKEN' to match with the key that is used by
      the CONF object from health.py which is configured as
      uppercase in line 118 of openstack.py.

Test Plan:

PASS: Run "health-query-upgrade" with one or more 'expiring soon'or
      'expired' alarms and verify that a message is show in
      'health-query-upgrade' output saying that there is certificate
      expiration alarms.
PASS: Run 'health-query-upgrade' with no active certificate alarm and
      verify that no certificate alarms were shown in the output of
      'health-query-upgrade'.
PASS: Run 'system upgrade-start' with the --force flag with one or more
      certificate alarms and verify that the upgrade can be started
      normally.
PASS: Add a new certificate with expiry date of less than 30 days
      and run 'health-query-upgrade' before the scheduled full audit
      runs and check if the alarm was created and detected by
      'health-query-upgrade'.
PASS: Delete secret from a certificate that is monitored by cert-mon
      and check if cert-mon was able to reinstall the secret to the
      filesystem.

Task: 47478
Story: 2009303
Signed-off-by: Karla Felix <karla.karolinenogueirafelix@windriver.com>
Change-Id: Iaba585b6ecd7f63e0ed186f87c7274c4b9778889
This commit is contained in:
Karla Felix 2023-02-16 10:54:20 -03:00
parent 2400ae204e
commit a26b4b4050
3 changed files with 34 additions and 15 deletions

View File

@ -1,4 +1,4 @@
# Copyright (c) 2020-2022 Wind River Systems, Inc.
# Copyright (c) 2020-2023 Wind River Systems, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -354,14 +354,18 @@ def get_system(token, method, api_cmd, api_cmd_headers=None,
def get_token():
"""Get token for the sysinv user."""
keystone_conf = CONF.get('KEYSTONE_AUTHTOKEN')
token = _get_token(
CONF.keystone_authtoken.auth_url + '/v3/auth/tokens',
CONF.keystone_authtoken.project_name,
CONF.keystone_authtoken.username,
CONF.keystone_authtoken.password,
CONF.keystone_authtoken.user_domain_name,
CONF.keystone_authtoken.project_domain_name,
CONF.keystone_authtoken.region_name)
keystone_conf.auth_url + '/v3/auth/tokens',
keystone_conf.project_name,
keystone_conf.username,
keystone_conf.password,
keystone_conf.user_domain_name,
keystone_conf.project_domain_name,
keystone_conf.region_name)
return token
@ -466,7 +470,7 @@ def init_keystone_auth_opts():
help='Authorization url')
]
keystone_opt_group = cfg.OptGroup(name='keystone_authtoken',
keystone_opt_group = cfg.OptGroup(name='KEYSTONE_AUTHTOKEN',
title='Keystone options')
cfg.CONF.register_opts(keystone_opts, group=keystone_opt_group.name)

View File

@ -2241,6 +2241,9 @@ CERT_LOCATION_MAP = {
# TODO(): TPM
}
# Certificates alarm id
CERT_ALARM_IDS = ['500.200', '500.210']
CERT_ALARM_ANNOTATION_ALARM = 'starlingx.io/alarm'
CERT_ALARM_ANNOTATION_ALARM_BEFORE = 'starlingx.io/alarm-before'
CERT_ALARM_ANNOTATION_ALARM_SEVERITY = 'starlingx.io/alarm-severity'

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2018-2022 Wind River Systems, Inc.
# Copyright (c) 2018-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -18,6 +18,7 @@ from sysinv.common import kubernetes
from sysinv.common import utils
from sysinv.common.fm import fmclient
from sysinv.common.storage_backend_conf import StorageBackendConfig
from sysinv.cert_alarm.audit import CertAlarmAudit
from sysinv.api.controllers.v1 import patch_api
from sysinv.api.controllers.v1 import vim_api
@ -37,6 +38,7 @@ class Health(object):
self._dbapi = dbapi
self._ceph = ceph.CephApiOperator()
self._kube_operator = kubernetes.KubeOperator()
self._cert_alarm_manager = CertAlarmAudit()
def _check_hosts_provisioned(self, hosts):
"""Checks that each host is provisioned"""
@ -105,16 +107,24 @@ class Health(object):
if alarm_ignore_list is None:
alarm_ignore_list = []
self._cert_alarm_manager.run_full_audit()
alarms = fmclient(context).alarm.list(include_suppress=True)
success = True
allowed = 0
affecting = 0
cert_alarm = 0
# Separate alarms that are mgmt affecting
for alarm in alarms:
if alarm.alarm_id not in alarm_ignore_list:
mgmt_affecting = alarm.mgmt_affecting == "True"
if not mgmt_affecting:
if alarm.alarm_id in constants.CERT_ALARM_IDS:
cert_alarm += 1
if not force:
success = False
elif not mgmt_affecting:
allowed += 1
if not force:
success = False
@ -122,7 +132,7 @@ class Health(object):
affecting += 1
success = False
return success, allowed, affecting
return success, allowed, affecting, cert_alarm
def _check_active_is_controller_0(self):
"""Checks that active controller is controller-0"""
@ -412,15 +422,17 @@ class Health(object):
health_ok = health_ok and success
success, allowed, affecting = self._check_alarms(
success, allowed, affecting, cert_alarm = self._check_alarms(
context,
force=force,
alarm_ignore_list=alarm_ignore_list)
output += _('No alarms: [%s]\n') \
% (Health.SUCCESS_MSG if success else Health.FAIL_MSG)
if not success:
output += _('[%s] alarms found, [%s] of which are management '
'affecting\n') % (allowed + affecting, affecting)
output += _('[%s] alarms found, [%s] of which are management affecting '
'and [%s] are certificate expiration alarms. '
'Use "fm alarm-list" for details') % (allowed + affecting + cert_alarm,
affecting, cert_alarm)
health_ok = health_ok and success