Merge "Add pod health status to kube rootca check"

This commit is contained in:
Zuul 2023-11-17 21:11:52 +00:00 committed by Gerrit Code Review
commit 0825055c7a
7 changed files with 85 additions and 16 deletions

View File

@ -1,6 +1,6 @@
# -*- encoding: utf-8 -*-
#
# Copyright (c) 2015-2016 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -23,9 +23,12 @@ class HealthManager(base.Manager):
resp, body = self.api.json_request('GET', path)
return body
def get_kube_upgrade(self, relaxed=None):
def get_kube_upgrade(self, args: dict, relaxed=None):
path = '/v1/health/kube-upgrade'
if relaxed:
path += '/relaxed'
rootca = args.get('rootca')
if rootca:
path += f'?rootca={rootca}'
resp, body = self.api.json_request('GET', path)
return body

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2016 Wind River Systems, Inc.
# Copyright (c) 2016-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -8,6 +8,8 @@
# All Rights Reserved.
#
from cgtsclient.common import utils
def do_health_query(cc, args):
"""Run the Health Check."""
@ -19,6 +21,13 @@ def do_health_query_upgrade(cc, args):
print(cc.health.get_upgrade())
@utils.arg('--rootca',
action='store_true',
default=False,
help='Whether additional RootCA verifications should be executed')
def do_health_query_kube_upgrade(cc, args):
"""Run the Health Check for a Kubernetes Upgrade."""
print(cc.health.get_kube_upgrade())
attributes = {}
if args.rootca is not None:
attributes.update({'rootca': args.rootca})
print(cc.health.get_kube_upgrade(attributes))

View File

@ -1,4 +1,4 @@
# Copyright (c) 2016 Wind River Systems, Inc.
# Copyright (c) 2016-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -33,8 +33,8 @@ class HealthController(rest.RestController):
"Unable to perform health query."))
return output
@wsme_pecan.wsexpose(wtypes.text, wtypes.text, wtypes.text)
def get_one(self, upgrade, relaxed=None):
@wsme_pecan.wsexpose(wtypes.text, wtypes.text, wtypes.text, wtypes.text)
def get_one(self, upgrade, relaxed=None, rootca=None):
"""Validates the health of the system for an upgrade"""
force = False
if relaxed:
@ -51,7 +51,8 @@ class HealthController(rest.RestController):
elif upgrade == 'kube-upgrade':
try:
success, output = pecan.request.rpcapi.get_system_health(
pecan.request.context, kube_upgrade=True, force=force)
pecan.request.context, kube_upgrade=True, force=force,
kube_rootca_update=rootca)
except Exception as e:
LOG.exception(e)
raise wsme.exc.ClientSideError(_(

View File

@ -433,7 +433,8 @@ class KubeRootCAUpdateController(rest.RestController):
LOG.info("Running in lab, ignoring health errors.")
else:
raise wsme.exc.ClientSideError(_(
"System is not healthy. Run system health-query for more details."))
"System is not healthy. Run 'system health-query-kube-upgrade "
"--rootca' for more details."))
def _clear_kubernetes_resources(self, hostnames):
"""Clears secrets and issuers created during the update process

View File

@ -252,6 +252,21 @@ class Health(object):
success = not fail_pod_list
return success, fail_pod_list
def _check_kube_all_pods_are_healthy(self):
"""Checks that all kubernetes pod are healthy
A healthy pod is in ready or completed status.
"""
fail_pod_list = []
pod_list = self._kube_operator.kube_get_all_pods()
for pod in pod_list:
if pod.status.phase not in ['Pending', 'Running', 'Succeeded']:
# Add it to the failed list as it's not ready/completed/pending
fail_pod_list.append((pod.metadata.name, pod.metadata.namespace))
success = not fail_pod_list
return success, fail_pod_list
def _check_kube_applications(self):
"""Checks that each kubernetes application is in a valid state"""
@ -677,7 +692,8 @@ class Health(object):
def get_system_health_kube_upgrade(self,
context,
force=False,
alarm_ignore_list=None):
alarm_ignore_list=None,
kube_rootca_update=False):
"""
Ensures the system is in a valid state for a kubernetes upgrade
@ -707,6 +723,18 @@ class Health(object):
output += _('Kubernetes applications not in a valid state: %s\n') \
% ', '.join(apps_not_valid)
health_ok = health_ok and success
if kube_rootca_update:
pods_healthy, fail_pod_list = self._check_kube_all_pods_are_healthy()
output += _(
'All kubernetes pods are in a valid state: [%s]\n') \
% (Health.SUCCESS_MSG if pods_healthy else Health.FAIL_MSG)
if not pods_healthy:
formatted_fail_pod_list = ['{} (namespace: {})'.format(name, namespace)
for name, namespace in fail_pod_list]
output += _('Kubernetes pods not in a valid state: %s\n') \
% ', '.join(formatted_fail_pod_list)
health_ok = health_ok and success and \
(pods_healthy if kube_rootca_update else True)
return health_ok, output

View File

@ -13729,7 +13729,8 @@ class ConductorManager(service.PeriodicService):
return health_util.get_system_health_kube_upgrade(
context=context,
force=force,
alarm_ignore_list=alarm_ignore_list)
alarm_ignore_list=alarm_ignore_list,
kube_rootca_update=kube_rootca_update)
else:
return health_util.get_system_health(
context=context,

View File

@ -192,7 +192,9 @@ class TestPostKubeRootCAUpdate(TestKubeRootCAUpdate,
dbbase.ProvisionedControllerHostTestCase):
@mock.patch('sysinv.common.health.Health._check_trident_compatibility', lambda x: True)
def test_create(self):
@mock.patch('sysinv.common.health.Health._check_kube_all_pods_are_healthy')
def test_create(self, mock_pods_healthy):
mock_pods_healthy.return_value = True, []
# Test creation of kubernetes rootca update
create_dict = dbutils.get_test_kube_rootca_update()
result = self.post_json('/kube_rootca_update?force=False', create_dict,
@ -212,13 +214,16 @@ class TestPostKubeRootCAUpdate(TestKubeRootCAUpdate,
self.assertEqual(host_updates[0]['effective_rootca_cert'], 'current_cert_serial')
@mock.patch('sysinv.common.health.Health._check_trident_compatibility', lambda x: True)
def test_create_rootca_update_unhealthy_from_alarms(self):
@mock.patch('sysinv.common.health.Health._check_kube_all_pods_are_healthy')
def test_create_rootca_update_unhealthy_from_alarms(self, mock_pods_healthy):
""" Test creation of kube rootca update while there are alarms"""
# Test creation of kubernetes rootca update when system health check fails
# 1 alarm will return False
self.fake_fm_client.alarm.list.return_value = \
[FAKE_MGMT_ALARM, ]
mock_pods_healthy.return_value = True, []
# Test creation of kubernetes rootca update
create_dict = dbutils.get_test_kube_rootca_update()
result = self.post_json('/kube_rootca_update?force=False', create_dict,
@ -228,8 +233,29 @@ class TestPostKubeRootCAUpdate(TestKubeRootCAUpdate,
# Verify that the rootca update has the expected attributes
self.assertEqual(result.content_type, 'application/json')
self.assertEqual(http_client.BAD_REQUEST, result.status_int)
self.assertIn("System is not healthy. Run system health-query for more details.",
result.json['error_message'])
self.assertIn("System is not healthy. Run 'system health-query-kube-upgrade "
"--rootca' for more details.", result.json['error_message'])
@mock.patch('sysinv.common.health.Health._check_trident_compatibility', lambda x: True)
@mock.patch('sysinv.common.health.Health._check_kube_all_pods_are_healthy')
def test_create_rootca_update_unhealthy_from_pods(self, mock_pods_healthy):
""" Test creation of kube rootca update while there are unhealthy pods"""
# Unhealthy pods
mock_pods_healthy.return_value = False, \
[('Unhealthy-pod-name', 'Unhealthy-pod-namespace')]
# Test creation of kubernetes rootca update
create_dict = dbutils.get_test_kube_rootca_update()
result = self.post_json('/kube_rootca_update?force=False', create_dict,
headers=self.headers,
expect_errors=True)
# Verify that the rootca update has the expected attributes
self.assertEqual(result.content_type, 'application/json')
self.assertEqual(http_client.BAD_REQUEST, result.status_int)
self.assertIn("System is not healthy. Run 'system health-query-kube-upgrade "
"--rootca' for more details.", result.json['error_message'])
def test_create_rootca_update_exists(self):
# Test creation of rootca update when a kubernetes rootca update already exists