Add kubernetes health checks

Adding kubernetes health checks when a kubernetes upgrade is
started to ensure the kubernetes cluster is healthy. Also
adding a new "system health-query-kube-upgrade" command to
do the health checks without starting a kubernetes upgrade.

Change-Id: Ib3a491e25a6cb523cfd9f08e74879e5c0a25d4f7
Story: 2006781
Task: 37583
Depends-On: https://review.opendev.org/#/c/700197/
Signed-off-by: Bart Wensley <barton.wensley@windriver.com>
This commit is contained in:
Bart Wensley 2019-12-20 10:50:33 -06:00
parent d09c66a421
commit b5c78afe28
14 changed files with 951 additions and 44 deletions

View File

@ -20,3 +20,8 @@ class HealthManager(base.Manager):
path = '/v1/health/upgrade'
resp, body = self.api.json_request('GET', path)
return body
def get_kube_upgrade(self):
path = '/v1/health/kube-upgrade'
resp, body = self.api.json_request('GET', path)
return body

View File

@ -17,3 +17,8 @@ def do_health_query(cc, args):
def do_health_query_upgrade(cc, args):
"""Run the Health Check for an Upgrade."""
print(cc.health.get_upgrade())
def do_health_query_kube_upgrade(cc, args):
"""Run the Health Check for a Kubernetes Upgrade."""
print(cc.health.get_kube_upgrade())

View File

@ -36,11 +36,24 @@ class HealthController(rest.RestController):
@wsme_pecan.wsexpose(wtypes.text, wtypes.text)
def get_one(self, upgrade):
"""Validates the health of the system for an upgrade"""
try:
success, output = pecan.request.rpcapi.get_system_health(
pecan.request.context, upgrade=True)
except Exception as e:
LOG.exception(e)
if upgrade == 'upgrade':
try:
success, output = pecan.request.rpcapi.get_system_health(
pecan.request.context, upgrade=True)
except Exception as e:
LOG.exception(e)
raise wsme.exc.ClientSideError(_(
"Unable to perform health upgrade query."))
return output
elif upgrade == 'kube-upgrade':
try:
success, output = pecan.request.rpcapi.get_system_health(
pecan.request.context, kube_upgrade=True)
except Exception as e:
LOG.exception(e)
raise wsme.exc.ClientSideError(_(
"Unable to perform kubernetes health upgrade query."))
return output
else:
raise wsme.exc.ClientSideError(_(
"Unable to perform health upgrade query."))
return output
"Unsupported upgrade type %s." % upgrade))

View File

@ -170,6 +170,16 @@ class KubeUpgradeController(rest.RestController):
force = body.get('force', False) is True
# There must not be a platform upgrade in progress
try:
pecan.request.dbapi.software_upgrade_get_one()
except exception.NotFound:
pass
else:
raise wsme.exc.ClientSideError(_(
"A kubernetes upgrade cannot be done while a platform upgrade "
"is in progress"))
# There must not already be a kubernetes upgrade in progress
try:
pecan.request.dbapi.kube_upgrade_get_one()
@ -214,9 +224,9 @@ class KubeUpgradeController(rest.RestController):
# TODO: check that all installed applications support new k8s version
# TODO: check that tiller/armada support new k8s version
# The system must be healthy from the platform perspective
# The system must be healthy
success, output = pecan.request.rpcapi.get_system_health(
pecan.request.context, force=force)
pecan.request.context, force=force, kube_upgrade=True)
if not success:
LOG.info("Health query failure during kubernetes upgrade start: %s"
% output)
@ -225,9 +235,7 @@ class KubeUpgradeController(rest.RestController):
else:
raise wsme.exc.ClientSideError(_(
"System is not in a valid state for kubernetes upgrade. "
"Run system health-query-upgrade for more details."))
# TODO: kubernetes related health checks...
"Run system health-query for more details."))
# Create upgrade record.
create_values = {'from_version': current_kube_version,
@ -327,6 +335,15 @@ class KubeUpgradeController(rest.RestController):
"Kubernetes upgrade must be in %s state to complete" %
kubernetes.KUBE_UPGRADING_KUBELETS))
# Make sure no hosts are in a transitory or failed state
kube_host_upgrades = \
pecan.request.dbapi.kube_host_upgrade_get_list()
for kube_host_upgrade in kube_host_upgrades:
if kube_host_upgrade.status is not None:
raise wsme.exc.ClientSideError(_(
"At least one host has not completed the kubernetes "
"upgrade"))
# Make sure the target version is active
version_states = self._kube_operator.kube_get_version_states()
if version_states.get(kube_upgrade_obj.to_version, None) != \

View File

@ -198,6 +198,16 @@ class UpgradeController(rest.RestController):
"upgrade-start rejected: An upgrade can only be started "
"when %s is active." % constants.CONTROLLER_0_HOSTNAME))
# There must not be a kubernetes upgrade in progress
try:
pecan.request.dbapi.kube_upgrade_get_one()
except exception.NotFound:
pass
else:
raise wsme.exc.ClientSideError(_(
"upgrade-start rejected: A platform upgrade cannot be done "
"while a kubernetes upgrade is in progress."))
# There must not already be an upgrade in progress
try:
pecan.request.dbapi.software_upgrade_get_one()

View File

@ -14,6 +14,7 @@ from oslo_log import log
from sysinv._i18n import _
from sysinv.common import ceph
from sysinv.common import constants
from sysinv.common import kubernetes
from sysinv.common import utils
from sysinv.common.fm import fmclient
from sysinv.common.storage_backend_conf import StorageBackendConfig
@ -33,6 +34,7 @@ class Health(object):
def __init__(self, dbapi):
self._dbapi = dbapi
self._ceph = ceph.CephApiOperator()
self._kube_operator = kubernetes.KubeOperator()
def _check_hosts_provisioned(self, hosts):
"""Checks that each host is provisioned"""
@ -213,15 +215,64 @@ class Health(object):
return True
def _check_kube_nodes_ready(self):
"""Checks that each kubernetes node is ready"""
fail_node_list = []
nodes = self._kube_operator.kube_get_nodes()
for node in nodes:
for condition in node.status.conditions:
if condition.type == "Ready" and condition.status != "True":
# This node is not ready
fail_node_list.append(node.metadata.name)
success = not fail_node_list
return success, fail_node_list
def _check_kube_control_plane_pods(self):
"""Checks that each kubernetes control plane pod is ready"""
fail_pod_list = []
pod_ready_status = self._kube_operator.\
kube_get_control_plane_pod_ready_status()
for pod_name, ready_status in pod_ready_status.items():
if ready_status != "True":
# This pod is not ready
fail_pod_list.append(pod_name)
success = not fail_pod_list
return success, fail_pod_list
def _check_kube_applications(self):
"""Checks that each kubernetes application is in a valid state"""
fail_app_list = []
apps = self._dbapi.kube_app_get_all()
for app in apps:
# The following states are valid during kubernetes upgrade
if app.status not in [constants.APP_UPLOAD_SUCCESS,
constants.APP_APPLY_SUCCESS,
constants.APP_INACTIVE_STATE]:
fail_app_list.append(app.name)
success = not fail_app_list
return success, fail_app_list
def get_system_health(self, context, force=False):
"""Returns the general health of the system"""
# Checks the following:
# All hosts are provisioned
# All hosts are patch current
# All hosts are unlocked/enabled
# All hosts having matching configs
# No management affecting alarms
# For ceph systems: The storage cluster is healthy
"""Returns the general health of the system
Checks the following:
- All hosts are provisioned
- All hosts are patch current
- All hosts are unlocked/enabled
- All hosts having matching configs
- No management affecting alarms
- For ceph systems: The storage cluster is healthy
- All kubernetes nodes are ready
- All kubernetes control plane pods are ready
"""
hosts = self._dbapi.ihost_get_list()
output = _('System Health:\n')
@ -289,6 +340,24 @@ class Health(object):
health_ok = health_ok and success
success, error_nodes = self._check_kube_nodes_ready()
output += _('All kubernetes nodes are ready: [%s]\n') \
% (Health.SUCCESS_MSG if success else Health.FAIL_MSG)
if not success:
output += _('Kubernetes nodes not ready: %s\n') \
% ', '.join(error_nodes)
health_ok = health_ok and success
success, error_nodes = self._check_kube_control_plane_pods()
output += _('All kubernetes control plane pods are ready: [%s]\n') \
% (Health.SUCCESS_MSG if success else Health.FAIL_MSG)
if not success:
output += _('Kubernetes control plane pods not ready: %s\n') \
% ', '.join(error_nodes)
health_ok = health_ok and success
return health_ok, output
def get_system_health_upgrade(self, context, force=False):
@ -358,3 +427,24 @@ class Health(object):
health_ok = health_ok and success
return health_ok, output
def get_system_health_kube_upgrade(self, context, force=False):
"""Ensures the system is in a valid state for a kubernetes upgrade
Does a general health check then does the following:
- All kubernetes applications are in a stable state
"""
health_ok, output = self.get_system_health(context, force)
success, apps_not_valid = self._check_kube_applications()
output += _(
'All kubernetes applications are in a valid state: [%s]\n') \
% (Health.SUCCESS_MSG if success else Health.FAIL_MSG)
if not success:
output += _('Kubernetes applications not in a valid state: %s\n') \
% ', '.join(apps_not_valid)
health_ok = health_ok and success
return health_ok, output

View File

@ -387,6 +387,42 @@ class KubeOperator(object):
% (namespace, e))
raise
def kube_get_control_plane_pod_ready_status(self):
"""Returns the ready status of the control plane pods."""
c = self._get_kubernetesclient_core()
# First get a list of master nodes
master_nodes = list()
api_response = c.list_node(
label_selector="node-role.kubernetes.io/master")
for node in api_response.items:
master_nodes.append(node.metadata.name)
# Populate status dictionary
ready_status = dict()
for node_name in master_nodes:
for component in [KUBE_APISERVER,
KUBE_CONTROLLER_MANAGER,
KUBE_SCHEDULER]:
# Control plane pods are named by component and node.
# E.g. kube-apiserver-controller-0
pod_name = component + '-' + node_name
ready_status[pod_name] = None
# Retrieve the control plane pods
api_response = c.list_pod_for_all_namespaces(
label_selector="component in (%s,%s,%s)" % (
KUBE_APISERVER, KUBE_CONTROLLER_MANAGER, KUBE_SCHEDULER)
)
pods = api_response.items
for pod in pods:
if pod.status.conditions is not None:
for condition in pod.status.conditions:
if condition.type == "Ready":
ready_status[pod.metadata.name] = condition.status
return ready_status
def kube_get_control_plane_versions(self):
"""Returns the lowest control plane component version on each
master node."""

View File

@ -9166,19 +9166,25 @@ class ConductorManager(service.PeriodicService):
return
def get_system_health(self, context, force=False, upgrade=False):
def get_system_health(self, context, force=False, upgrade=False,
kube_upgrade=False):
"""
Performs a system health check.
:param context: request context.
:param force: set to true to ignore minor and warning alarms
:param upgrade: set to true to perform an upgrade health check
:param kube_upgrade: set to true to perform a kubernetes upgrade health
check
"""
health_util = health.Health(self.dbapi)
if upgrade is True:
return health_util.get_system_health_upgrade(context=context,
force=force)
elif kube_upgrade is True:
return health_util.get_system_health_kube_upgrade(context=context,
force=force)
else:
return health_util.get_system_health(context=context,
force=force)

View File

@ -1285,17 +1285,21 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
return self.cast(context, self.make_msg('complete_simplex_backup',
success=success))
def get_system_health(self, context, force=False, upgrade=False):
def get_system_health(self, context, force=False, upgrade=False,
kube_upgrade=False):
"""
Performs a system health check.
:param context: request context.
:param force: set to true to ignore minor and warning alarms
:param upgrade: set to true to perform an upgrade health check
:param kube_upgrade: set to true to perform a kubernetes upgrade health
check
"""
return self.call(context,
self.make_msg('get_system_health',
force=force, upgrade=upgrade))
force=force, upgrade=upgrade,
kube_upgrade=kube_upgrade))
def reserve_ip_for_first_storage_node(self, context):
"""

View File

@ -11,6 +11,7 @@ Tests for the API /kube_upgrade/ methods.
import mock
from six.moves import http_client
from sysinv.common import constants
from sysinv.common import kubernetes
from sysinv.tests.api import base
@ -56,16 +57,17 @@ class FakeConductorAPI(object):
def __init__(self):
self.kube_download_images = mock.MagicMock()
self.kube_upgrade_networking = mock.MagicMock()
self.get_system_health_return = (True, "System is super healthy")
self.get_system_health_return = (
True, "System is super healthy")
def get_system_health(self, context, force=False):
def get_system_health(self, context, force=False, kube_upgrade=False):
if force:
return True, "System is healthy because I was forced to say that"
else:
return self.get_system_health_return
class TestKubeUpgrade(base.FunctionalTest, dbbase.BaseSystemTestCase):
class TestKubeUpgrade(base.FunctionalTest, dbbase.BaseHostTestCase):
def setUp(self):
super(TestKubeUpgrade, self).setUp()
@ -132,6 +134,14 @@ class TestKubeUpgrade(base.FunctionalTest, dbbase.BaseSystemTestCase):
self.mocked_kube_get_version_states.start()
self.addCleanup(self.mocked_kube_get_version_states.stop)
def _create_controller_0(self, subfunction=None, numa_nodes=1, **kw):
return self._create_test_host(
personality=constants.CONTROLLER,
subfunction=subfunction,
numa_nodes=numa_nodes,
unit=0,
**kw)
class TestListKubeUpgrade(TestKubeUpgrade):
@ -183,6 +193,24 @@ class TestPostKubeUpgrade(TestKubeUpgrade, dbbase.ControllerHostTestCase):
self.host.id)
self.assertEqual('v1.43.1', kube_host_upgrade.target_version)
def test_create_platform_upgrade_exists(self):
# Test creation of upgrade when platform upgrade in progress
dbutils.create_test_load(software_version=dbutils.SW_VERSION_NEW,
compatible_version=dbutils.SW_VERSION,
state=constants.IMPORTED_LOAD_STATE)
dbutils.create_test_upgrade()
create_dict = dbutils.post_get_test_kube_upgrade(to_version='v1.43.2')
result = self.post_json('/kube_upgrade', create_dict,
headers={'User-Agent': 'sysinv-test'},
expect_errors=True)
# Verify the failure
self.assertEqual(result.content_type, 'application/json')
self.assertEqual(http_client.BAD_REQUEST, result.status_int)
self.assertIn("upgrade cannot be done while a platform upgrade",
result.json['error_message'])
def test_create_upgrade_exists(self):
# Test creation of upgrade when upgrade already exists
dbutils.create_test_kube_upgrade(
@ -546,6 +574,43 @@ class TestPatch(TestKubeUpgrade):
self.assertEqual(result['to_version'], 'v1.43.2')
self.assertEqual(result['state'], new_state)
def test_update_state_complete_incomplete_host(self):
# Test updating the state of an upgrade to complete when a host has
# not completed its upgrade
self.kube_get_version_states_result = {'v1.42.1': 'available',
'v1.42.2': 'available',
'v1.43.1': 'available',
'v1.43.2': 'active',
'v1.43.3': 'available'}
# Create host
self._create_controller_0()
# Create the upgrade
dbutils.create_test_kube_upgrade(
from_version='v1.43.1',
to_version='v1.43.2',
state=kubernetes.KUBE_UPGRADING_KUBELETS)
# Mark the kube host upgrade as failed
values = {'status': kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED}
self.dbapi.kube_host_upgrade_update(1, values)
# Update state
new_state = kubernetes.KUBE_UPGRADE_COMPLETE
result = self.patch_json('/kube_upgrade',
[{'path': '/state',
'value': new_state,
'op': 'replace'}],
headers={'User-Agent': 'sysinv-test'},
expect_errors=True)
# Verify the failure
self.assertEqual(result.content_type, 'application/json')
self.assertEqual(http_client.BAD_REQUEST, result.status_int)
self.assertIn("At least one host has not completed",
result.json['error_message'])
def test_update_state_no_upgrade(self):
# Test updating the state when an upgrade doesn't exist

View File

@ -0,0 +1,93 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
"""
Tests for the API /upgrade/ methods.
"""
import mock
from six.moves import http_client
from sysinv.common import constants
from sysinv.common import kubernetes
from sysinv.tests.api import base
from sysinv.tests.db import base as dbbase
from sysinv.tests.db import utils as dbutils
class FakeConductorAPI(object):
def __init__(self):
self.start_upgrade = mock.MagicMock()
self.get_system_health_return = (True, "System is super healthy")
def get_system_health(self, context, force=False, upgrade=False):
if force:
return True, "System is healthy because I was forced to say that"
else:
return self.get_system_health_return
class TestUpgrade(base.FunctionalTest, dbbase.BaseSystemTestCase):
def setUp(self):
super(TestUpgrade, self).setUp()
# Mock the Conductor API
self.fake_conductor_api = FakeConductorAPI()
p = mock.patch('sysinv.conductor.rpcapi.ConductorAPI')
self.mock_conductor_api = p.start()
self.mock_conductor_api.return_value = self.fake_conductor_api
self.addCleanup(p.stop)
# Behave as if the API is running on controller-0
p = mock.patch('socket.gethostname')
self.mock_socket_gethostname = p.start()
self.mock_socket_gethostname.return_value = 'controller-0'
self.addCleanup(p.stop)
class TestPostUpgrade(TestUpgrade, dbbase.ControllerHostTestCase):
def test_create(self):
# Create the to load
dbutils.create_test_load(software_version=dbutils.SW_VERSION_NEW,
compatible_version=dbutils.SW_VERSION,
state=constants.IMPORTED_LOAD_STATE)
# Test creation of upgrade
create_dict = dbutils.get_test_upgrade()
result = self.post_json('/upgrade', create_dict,
headers={'User-Agent': 'sysinv-test'})
# Verify that the upgrade was started
self.fake_conductor_api.start_upgrade.assert_called_once()
# Verify that the upgrade has the expected attributes
self.assertEqual(result.json['from_release'], dbutils.SW_VERSION)
self.assertEqual(result.json['to_release'], dbutils.SW_VERSION_NEW)
self.assertEqual(result.json['state'], constants.UPGRADE_STARTING)
def test_create_kube_upgrade_exists(self):
# Test creation of upgrade when a kubernetes upgrade exists
dbutils.create_test_kube_upgrade(
from_version='v1.42.1',
to_version='v1.42.2',
state=kubernetes.KUBE_UPGRADING_FIRST_MASTER,
)
# Test creation of upgrade
create_dict = dbutils.get_test_upgrade()
result = self.post_json('/upgrade', create_dict,
headers={'User-Agent': 'sysinv-test'},
expect_errors=True)
# Verify the failure
self.assertEqual(result.content_type, 'application/json')
self.assertEqual(http_client.BAD_REQUEST, result.status_int)
self.assertIn("cannot be done while a kubernetes upgrade",
result.json['error_message'])

View File

@ -0,0 +1,377 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
"""
Tests for the health utilities.
"""
import kubernetes
import mock
import uuid
from sysinv.common import constants
from sysinv.common import health
from sysinv.openstack.common import context
from sysinv.tests.db import base as dbbase
from sysinv.tests.db import utils as dbutils
class TestHealth(dbbase.BaseHostTestCase):
def setup_result(self):
self.patch_current_result = {
'data': [
{'hostname': 'controller-0',
'patch_current': True,
},
{'hostname': 'controller-1',
'patch_current': True,
}
]
}
self.multi_node_result = [
kubernetes.client.V1Node(
api_version="v1",
kind="Node",
metadata=kubernetes.client.V1ObjectMeta(
name="controller-0",
namespace="test-namespace-1"),
status=kubernetes.client.V1NodeStatus(
conditions=[
kubernetes.client.V1NodeCondition(
status="False",
type="NetworkUnavailable"),
kubernetes.client.V1NodeCondition(
status="False",
type="MemoryPressure"),
kubernetes.client.V1NodeCondition(
status="False",
type="DiskPressure"),
kubernetes.client.V1NodeCondition(
status="False",
type="PIDPressure"),
kubernetes.client.V1NodeCondition(
status="True",
type="Ready"),
],
node_info=kubernetes.client.V1NodeSystemInfo(
architecture="fake-architecture",
boot_id="fake-boot-id",
container_runtime_version="fake-cr-version",
kernel_version="fake-kernel-version",
kube_proxy_version="fake-proxy-version",
kubelet_version="v1.42.4",
machine_id="fake-machine-id",
operating_system="fake-os",
os_image="fake-os-image",
system_uuid="fake-system-uuid"))
),
kubernetes.client.V1Node(
api_version="v1",
kind="Node",
metadata=kubernetes.client.V1ObjectMeta(
name="controller-1",
namespace="test-namespace-1"),
status=kubernetes.client.V1NodeStatus(
conditions=[
kubernetes.client.V1NodeCondition(
status="False",
type="NetworkUnavailable"),
kubernetes.client.V1NodeCondition(
status="False",
type="MemoryPressure"),
kubernetes.client.V1NodeCondition(
status="False",
type="DiskPressure"),
kubernetes.client.V1NodeCondition(
status="False",
type="PIDPressure"),
kubernetes.client.V1NodeCondition(
status="True",
type="Ready"),
],
node_info=kubernetes.client.V1NodeSystemInfo(
architecture="fake-architecture",
boot_id="fake-boot-id",
container_runtime_version="fake-cr-version",
kernel_version="fake-kernel-version",
kube_proxy_version="fake-proxy-version",
kubelet_version="v1.42.3",
machine_id="fake-machine-id",
operating_system="fake-os",
os_image="fake-os-image",
system_uuid="fake-system-uuid"))
),
]
self.cp_pod_ready_status_result = {
'kube-apiserver-controller-0': 'True',
'kube-controller-manager-controller-0': 'True',
'kube-scheduler-controller-0': 'True',
'kube-apiserver-controller-1': 'True',
'kube-controller-manager-controller-1': 'True',
'kube-scheduler-controller-1': 'True',
}
def setUp(self):
super(TestHealth, self).setUp()
# Mock the patching API
self.mock_patch_query_hosts_result = None
def mock_patch_query_hosts(token, timeout, region_name):
return self.mock_patch_query_hosts_result
self.mocked_patch_query_hosts = mock.patch(
'sysinv.api.controllers.v1.patch_api.patch_query_hosts',
mock_patch_query_hosts)
self.mocked_patch_query_hosts.start()
self.addCleanup(self.mocked_patch_query_hosts.stop)
# Mock the KubeOperator
self.kube_get_nodes_result = None
def mock_kube_get_nodes(obj):
return self.kube_get_nodes_result
self.mocked_kube_get_nodes = mock.patch(
'sysinv.common.kubernetes.KubeOperator.kube_get_nodes',
mock_kube_get_nodes)
self.mocked_kube_get_nodes.start()
self.addCleanup(self.mocked_kube_get_nodes.stop)
self.kube_get_control_plane_pod_ready_status_result = None
def mock_kube_get_control_plane_pod_ready_status(obj):
return self.kube_get_control_plane_pod_ready_status_result
self.mocked_kube_get_control_plane_pod_ready_status = mock.patch(
'sysinv.common.kubernetes.KubeOperator.'
'kube_get_control_plane_pod_ready_status',
mock_kube_get_control_plane_pod_ready_status)
self.mocked_kube_get_control_plane_pod_ready_status.start()
self.addCleanup(
self.mocked_kube_get_control_plane_pod_ready_status.stop)
# Mock the fm API
p = mock.patch('sysinv.common.health.fmclient')
self.mock_fm_client_alarm_list = p.start()
self.addCleanup(p.stop)
# Set up objects for testing
self.context = context.get_admin_context()
self.health = health.Health(self.dbapi)
# Set up results
self.setup_result()
def tearDown(self):
super(TestHealth, self).tearDown()
pass
def test_get_system_health(self):
# Create controller-0
config_uuid = str(uuid.uuid4())
self._create_test_host(personality=constants.CONTROLLER,
unit=0,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create controller-1
self._create_test_host(personality=constants.CONTROLLER,
unit=1,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Set up the mocked results
self.mock_patch_query_hosts_result = self.patch_current_result
self.kube_get_nodes_result = self.multi_node_result
self.kube_get_control_plane_pod_ready_status_result = \
self.cp_pod_ready_status_result
# Check system health
health_ok, output = self.health.get_system_health(self.context)
assert health_ok is True, "output: %s" % output
def test_get_system_health_k8s_node_not_ready(self):
# Create controller-0
config_uuid = str(uuid.uuid4())
self._create_test_host(personality=constants.CONTROLLER,
unit=0,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create controller-1
self._create_test_host(personality=constants.CONTROLLER,
unit=1,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Set up the mocked results
self.mock_patch_query_hosts_result = self.patch_current_result
self.kube_get_nodes_result = self.multi_node_result
# Mark controller-0 as not ready
self.kube_get_nodes_result[0].status.conditions[4].status = "False"
self.kube_get_control_plane_pod_ready_status_result = \
self.cp_pod_ready_status_result
# Check system health
health_ok, output = self.health.get_system_health(self.context)
assert health_ok is False, "output: %s" % output
assert "Kubernetes nodes not ready: controller-0" in output, \
"get_system_health output: %s" % output
def test_get_system_health_k8s_cp_pod_not_ready(self):
# Create controller-0
config_uuid = str(uuid.uuid4())
self._create_test_host(personality=constants.CONTROLLER,
unit=0,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create controller-1
self._create_test_host(personality=constants.CONTROLLER,
unit=1,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Set up the mocked results
self.mock_patch_query_hosts_result = self.patch_current_result
self.kube_get_nodes_result = self.multi_node_result
self.kube_get_control_plane_pod_ready_status_result = \
self.cp_pod_ready_status_result
# Mark a cp pod as not ready
self.kube_get_control_plane_pod_ready_status_result[
'kube-controller-manager-controller-1'] = 'False'
# Check system health
health_ok, output = self.health.get_system_health(self.context)
assert health_ok is False, "get_system_health output: %s" % output
assert "kubernetes control plane pods are ready: [Fail]" in output, \
"output: %s" % output
assert "not ready: kube-controller-manager-controller-1" in output, \
"output: %s" % output
def test_get_system_health_kube_upgrade(self):
# Create controller-0
config_uuid = str(uuid.uuid4())
self._create_test_host(personality=constants.CONTROLLER,
unit=0,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create controller-1
self._create_test_host(personality=constants.CONTROLLER,
unit=1,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create kubernetes apps
dbutils.create_test_app(name='test-app-1',
status=constants.APP_APPLY_SUCCESS)
dbutils.create_test_app(name='test-app-2',
status=constants.APP_APPLY_SUCCESS)
dbutils.create_test_app(name='test-app-3',
status=constants.APP_UPLOAD_SUCCESS)
# Set up the mocked results
self.mock_patch_query_hosts_result = self.patch_current_result
self.kube_get_nodes_result = self.multi_node_result
self.kube_get_control_plane_pod_ready_status_result = \
self.cp_pod_ready_status_result
# Check system health
health_ok, output = self.health.get_system_health_kube_upgrade(
self.context)
assert health_ok is True, "output: %s" % output
def test_get_system_health_kube_upgrade_k8s_app_invalid_state(self):
# Create controller-0
config_uuid = str(uuid.uuid4())
self._create_test_host(personality=constants.CONTROLLER,
unit=0,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create controller-1
self._create_test_host(personality=constants.CONTROLLER,
unit=1,
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create kubernetes apps
dbutils.create_test_app(name='test-app-1',
status=constants.APP_APPLY_SUCCESS)
dbutils.create_test_app(name='test-app-2',
status=constants.APP_APPLY_IN_PROGRESS)
dbutils.create_test_app(name='test-app-3',
status=constants.APP_UPLOAD_SUCCESS)
# Set up the mocked results
self.mock_patch_query_hosts_result = self.patch_current_result
self.kube_get_nodes_result = self.multi_node_result
self.kube_get_control_plane_pod_ready_status_result = \
self.cp_pod_ready_status_result
# Check system health
health_ok, output = self.health.get_system_health_kube_upgrade(
self.context)
assert health_ok is False, "output: %s" % output
assert "applications are in a valid state: [Fail]" in output, \
"output: %s" % output
assert "applications not in a valid state: test-app-2" in output, \
"output: %s" % output

View File

@ -54,6 +54,23 @@ FAKE_KUBE_VERSIONS = [
},
]
FAKE_POD_STATUS = kubernetes.client.V1PodStatus(
conditions=[
kubernetes.client.V1PodCondition(
status="True",
type="Initialized"),
kubernetes.client.V1PodCondition(
status="True",
type="Ready"),
kubernetes.client.V1PodCondition(
status="True",
type="ContainersReady"),
kubernetes.client.V1PodCondition(
status="True",
type="PodScheduled"),
],
)
def mock_get_kube_versions():
return FAKE_KUBE_VERSIONS
@ -193,6 +210,7 @@ class TestKubeOperator(base.TestCase):
metadata=kubernetes.client.V1ObjectMeta(
name="kube-apiserver-test-node-1",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
@ -213,6 +231,7 @@ class TestKubeOperator(base.TestCase):
metadata=kubernetes.client.V1ObjectMeta(
name="kube-controller-manager-test-node-1",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
@ -233,6 +252,7 @@ class TestKubeOperator(base.TestCase):
metadata=kubernetes.client.V1ObjectMeta(
name="kube-scheduler-test-node-1",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
@ -253,6 +273,7 @@ class TestKubeOperator(base.TestCase):
metadata=kubernetes.client.V1ObjectMeta(
name="kube-apiserver-test-node-2",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
@ -273,6 +294,7 @@ class TestKubeOperator(base.TestCase):
metadata=kubernetes.client.V1ObjectMeta(
name="kube-controller-manager-test-node-2",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
@ -293,6 +315,7 @@ class TestKubeOperator(base.TestCase):
metadata=kubernetes.client.V1ObjectMeta(
name="kube-scheduler-test-node-2",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
@ -398,10 +421,106 @@ class TestKubeOperator(base.TestCase):
),
}
self.single_node_result = kubernetes.client.V1NodeList(
self.cp_pods_list_result = kubernetes.client.V1PodList(
api_version="v1",
items=[
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="kube-apiserver-test-node-1",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
name="kube-apiserver",
image="test-image-1:v1.42.1"),
],
),
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="kube-controller-manager-test-node-1",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
name="kube-controller-manager",
image="test-image-2:v1.42.1"),
],
),
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="kube-scheduler-test-node-1",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
name="kube-scheduler",
image="test-image-3:v1.42.1"),
],
),
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="kube-apiserver-test-node-2",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
name="kube-apiserver",
image="test-image-1:v1.42.1"),
],
),
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="kube-controller-manager-test-node-2",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
name="kube-controller-manager",
image="test-image-2:v1.42.1"),
],
),
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="kube-scheduler-test-node-2",
namespace="kube-system"),
status=FAKE_POD_STATUS,
spec=kubernetes.client.V1PodSpec(
containers=[
kubernetes.client.V1Container(
name="kube-scheduler",
image="test-image-3:v1.42.1"),
],
),
),
],
)
self.single_node_result = kubernetes.client.V1NodeList(
api_version="v1",
items=[
kubernetes.client.V1Node(
api_version="v1",
kind="Node",
metadata=kubernetes.client.V1ObjectMeta(
@ -426,7 +545,7 @@ class TestKubeOperator(base.TestCase):
self.multi_node_result = kubernetes.client.V1NodeList(
api_version="v1",
items=[
kubernetes.client.V1Pod(
kubernetes.client.V1Node(
api_version="v1",
kind="Node",
metadata=kubernetes.client.V1ObjectMeta(
@ -445,7 +564,7 @@ class TestKubeOperator(base.TestCase):
os_image="fake-os-image",
system_uuid="fake-system-uuid"))
),
kubernetes.client.V1Pod(
kubernetes.client.V1Node(
api_version="v1",
kind="Node",
metadata=kubernetes.client.V1ObjectMeta(
@ -470,13 +589,13 @@ class TestKubeOperator(base.TestCase):
self.config_map_result = kubernetes.client.V1ConfigMap(
api_version="v1",
data={"ClusterConfiguration":
"apiServer:\n"
" certSANs:\n"
" - 127.0.0.1\n"
" - 192.168.206.2\n"
"apiVersion: kubeadm.k8s.io/v1beta2\n"
"kubernetesVersion: v1.42.4\n"
"kind: ClusterStatus\n"
"apiServer:\n"
" certSANs:\n"
" - 127.0.0.1\n"
" - 192.168.206.2\n"
"apiVersion: kubeadm.k8s.io/v1beta2\n"
"kubernetesVersion: v1.42.4\n"
"kind: ClusterStatus\n"
},
metadata=kubernetes.client.V1ObjectMeta(
name="kubeadm-config",
@ -486,12 +605,12 @@ class TestKubeOperator(base.TestCase):
self.config_map_result_no_version = kubernetes.client.V1ConfigMap(
api_version="v1",
data={"ClusterConfiguration":
"apiServer:\n"
" certSANs:\n"
" - 127.0.0.1\n"
" - 192.168.206.2\n"
"apiVersion: kubeadm.k8s.io/v1beta2\n"
"kind: ClusterStatus\n"
"apiServer:\n"
" certSANs:\n"
" - 127.0.0.1\n"
" - 192.168.206.2\n"
"apiVersion: kubeadm.k8s.io/v1beta2\n"
"kind: ClusterStatus\n"
},
metadata=kubernetes.client.V1ObjectMeta(
name="kubeadm-config",
@ -513,6 +632,15 @@ class TestKubeOperator(base.TestCase):
mock_list_namespaced_pod)
self.mocked_list_namespaced_pod.start()
self.list_pod_for_all_namespaces_result = None
def mock_list_pod_for_all_namespaces(obj, label_selector=""):
return self.list_pod_for_all_namespaces_result
self.mocked_list_pod_for_all_namespaces = mock.patch(
'kubernetes.client.CoreV1Api.list_pod_for_all_namespaces',
mock_list_pod_for_all_namespaces)
self.mocked_list_pod_for_all_namespaces.start()
self.list_node_result = None
def mock_list_node(obj, label_selector=""):
@ -537,7 +665,9 @@ class TestKubeOperator(base.TestCase):
super(TestKubeOperator, self).tearDown()
self.mocked_list_namespaced_pod.stop()
self.mocked_list_pod_for_all_namespaces.stop()
self.mocked_list_node.stop()
self.mocked_read_namespaced_config_map.stop()
def test_kube_get_image_by_pod_name(self):
@ -563,6 +693,47 @@ class TestKubeOperator(base.TestCase):
'test-pod-1', 'test-namespace-1', 'test-container-1')
assert result == "test-image-1:imageversion-1"
def test_kube_get_control_plane_pod_ready_status(self):
self.list_pod_for_all_namespaces_result = self.cp_pods_list_result
self.list_node_result = self.multi_node_result
result = self.kube_operator.kube_get_control_plane_pod_ready_status()
assert result == {'kube-apiserver-test-node-1': 'True',
'kube-controller-manager-test-node-1': 'True',
'kube-scheduler-test-node-1': 'True',
'kube-apiserver-test-node-2': 'True',
'kube-controller-manager-test-node-2': 'True',
'kube-scheduler-test-node-2': 'True'}
def test_kube_get_control_plane_pod_ready_status_single_node(self):
self.list_pod_for_all_namespaces_result = self.cp_pods_list_result
del self.cp_pods_list_result.items[5]
del self.cp_pods_list_result.items[4]
del self.cp_pods_list_result.items[3]
self.list_node_result = self.single_node_result
result = self.kube_operator.kube_get_control_plane_pod_ready_status()
assert result == {'kube-apiserver-test-node-1': 'True',
'kube-controller-manager-test-node-1': 'True',
'kube-scheduler-test-node-1': 'True'}
def test_kube_get_control_plane_pod_ready_status_missing_pods(self):
self.list_pod_for_all_namespaces_result = self.cp_pods_list_result
del self.cp_pods_list_result.items[5]
del self.cp_pods_list_result.items[1]
self.list_node_result = self.multi_node_result
result = self.kube_operator.kube_get_control_plane_pod_ready_status()
assert result == {'kube-apiserver-test-node-1': 'True',
'kube-controller-manager-test-node-1': None,
'kube-scheduler-test-node-1': 'True',
'kube-apiserver-test-node-2': 'True',
'kube-controller-manager-test-node-2': 'True',
'kube-scheduler-test-node-2': None}
def test_kube_get_control_plane_versions(self):
self.list_namespaced_pod_result = self.cp_pods_result

View File

@ -77,6 +77,7 @@ properties = {
int_uninitialized = 999
SW_VERSION = '0.0'
SW_VERSION_NEW = '1.0'
def get_test_node(**kw):
@ -222,9 +223,10 @@ def create_test_isystem(**kw):
def get_test_load(**kw):
load = {
"software_version": SW_VERSION,
"compatible_version": "N/A",
"software_version": kw.get("software_version", SW_VERSION),
"compatible_version": kw.get("compatible_version", "N/A"),
"required_patches": "N/A",
"state": kw.get("state", constants.ACTIVE_LOAD_STATE),
}
return load
@ -235,6 +237,19 @@ def create_test_load(**kw):
return dbapi.load_create(load)
def get_test_upgrade(**kw):
upgrade = {'from_load': kw.get('from_load', 1),
'to_load': kw.get('to_load', 2),
'state': kw.get('state', constants.UPGRADE_STARTING)}
return upgrade
def create_test_upgrade(**kw):
upgrade = get_test_upgrade(**kw)
dbapi = db_api.get_instance()
return dbapi.software_upgrade_create(upgrade)
def post_get_test_kube_upgrade(**kw):
upgrade = get_test_kube_upgrade(**kw)
del upgrade['id']