From 45621b7019e8eacee6c6620cdc0bd8efe650dcf3 Mon Sep 17 00:00:00 2001 From: Joao Soubihe Date: Wed, 24 Mar 2021 15:17:15 -0400 Subject: [PATCH] kubernetes rootca update start Modifications for new k8s-rootca-update-start API - For the beginning of our feature we've developed an API to be responsible to start our procedure in the system and register it in db. - Addition of constants describing the possible states to be presented during the execution of kube-rootca-update procedure - Adding check to kube_upgrade API for assurance of no root CA update in progress before upgrade starts - Enhanced health checking to include kube-rootca-update specific checking - Tox unit tests Story: 2008675 Task: 42405 Depends-on: https://review.opendev.org/c/starlingx/fault/+/788943 Change-Id: I59ebf977303c13f9c9c97276bb379ef125bdcfc7 Signed-off-by: Joao Soubihe --- .../sysinv/api/controllers/v1/__init__.py | 12 ++ .../api/controllers/v1/kube_rootca_update.py | 170 +++++++++++++++ .../sysinv/sysinv/sysinv/common/kubernetes.py | 23 +++ .../sysinv/sysinv/sysinv/conductor/manager.py | 5 +- .../sysinv/sysinv/sysinv/conductor/rpcapi.py | 4 +- .../tests/api/test_kube_rootca_update.py | 194 ++++++++++++++++++ 6 files changed, 406 insertions(+), 2 deletions(-) create mode 100644 sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_rootca_update.py create mode 100644 sysinv/sysinv/sysinv/sysinv/tests/api/test_kube_rootca_update.py diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/__init__.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/__init__.py index b477aab19d..5d90fb23a7 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/__init__.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/__init__.py @@ -43,6 +43,7 @@ from sysinv.api.controllers.v1 import host from sysinv.api.controllers.v1 import kube_app from sysinv.api.controllers.v1 import kube_cluster from sysinv.api.controllers.v1 import kube_host_upgrade +from sysinv.api.controllers.v1 import kube_rootca_update from sysinv.api.controllers.v1 import kube_upgrade from sysinv.api.controllers.v1 import kube_version from sysinv.api.controllers.v1 import label @@ -262,6 +263,9 @@ class V1(base.APIBase): kube_upgrade = [link.Link] "Links to the kube_upgrade resource" + kube_rootca_update = [link.Link] + "Links to the kube_rootca_update resource" + kube_host_upgrades = [link.Link] "Links to the kube_host_upgrade resource" @@ -818,6 +822,13 @@ class V1(base.APIBase): 'kube_upgrade', '', bookmark=True)] + v1.kube_rootca_update = [link.Link.make_link('self', pecan.request.host_url, + 'kube_rootca_update', ''), + link.Link.make_link('bookmark', + pecan.request.host_url, + 'kube_rootca_update', '', + bookmark=True)] + v1.kube_host_upgrades = [link.Link.make_link('self', pecan.request.host_url, 'kube_host_upgrades', ''), @@ -925,6 +936,7 @@ class Controller(rest.RestController): kube_clusters = kube_cluster.KubeClusterController() kube_versions = kube_version.KubeVersionController() kube_upgrade = kube_upgrade.KubeUpgradeController() + kube_rootca_update = kube_rootca_update.KubeRootCAUpdateController() kube_host_upgrades = kube_host_upgrade.KubeHostUpgradeController() device_images = device_image.DeviceImageController() device_image_state = device_image_state.DeviceImageStateController() diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_rootca_update.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_rootca_update.py new file mode 100644 index 0000000000..d03f5355ee --- /dev/null +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_rootca_update.py @@ -0,0 +1,170 @@ +# +# Copyright (c) 2019 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + + +import os +import pecan +import six +import wsme +import wsmeext.pecan as wsme_pecan + +from fm_api import fm_api +from fm_api import constants as fm_constants +from oslo_log import log +from pecan import rest +from sysinv import objects +from sysinv.api.controllers.v1 import base +from sysinv.api.controllers.v1 import link +from sysinv.api.controllers.v1 import types +from sysinv.api.controllers.v1 import utils +from sysinv.common import constants +from sysinv.common import exception +from sysinv.common import kubernetes +from sysinv.common import utils as cutils +from wsme import types as wtypes + + +LOG = log.getLogger(__name__) +LOCK_NAME = 'KubeRootCAUpdateController' + + +class KubeRootCAUpdate(base.APIBase): + """API representation of a Kubernetes RootCA Update.""" + + id = int + "Unique ID for this entry" + + uuid = types.uuid + "Unique UUID for this entry" + + from_rootca_cert = wtypes.text + "The from certificate for the kubernetes rootCA update" + + to_rootca_cert = wtypes.text + "The to certificate for the kubernetes rootCA update" + + state = wtypes.text + "Kubernetes rootCA update state" + + capabilities = {wtypes.text: utils.ValidTypes(wtypes.text, + six.integer_types)} + "Additional properties to be used in kube_rootca_update operations" + + links = [link.Link] + "A list containing a self link and associated kubernetes rootca update links" + + def __init__(self, **kwargs): + self.fields = objects.kube_rootca_update.fields.keys() + for k in self.fields: + if not hasattr(self, k): + continue + setattr(self, k, kwargs.get(k, wtypes.Unset)) + + @classmethod + def convert_with_links(cls, rpc_kube_rootca_update, expand=True): + kube_rootca_update = KubeRootCAUpdate(**rpc_kube_rootca_update.as_dict()) + if not expand: + kube_rootca_update.unset_fields_except(['uuid', 'from_rootca_cert', + 'to_rootca_cert', 'state']) + + kube_rootca_update.links = [ + link.Link.make_link('self', pecan.request.host_url, + 'kube_rootca_update', kube_rootca_update.uuid), + link.Link.make_link('bookmark', + pecan.request.host_url, + 'kube_rootca_update', kube_rootca_update.uuid, + bookmark=True) + ] + return kube_rootca_update + + +class KubeRootCAUpdateController(rest.RestController): + """REST controller for kubernetes rootCA updates.""" + + def __init__(self): + self.fm_api = fm_api.FaultAPIs() + + @cutils.synchronized(LOCK_NAME) + @wsme_pecan.wsexpose(KubeRootCAUpdate, body=six.text_type) + def post(self, body): + """Create a new Kubernetes RootCA Update and start update.""" + + force = body.get('force', False) is True + alarm_ignore_list = body.get('alarm_ignore_list') + + try: + pecan.request.dbapi.kube_rootca_update_get_one() + except exception.NotFound: + pass + else: + raise wsme.exc.ClientSideError(( + "A kubernetes rootca update is already in progress")) + + # There must not be a platform upgrade in progress + try: + pecan.request.dbapi.software_upgrade_get_one() + except exception.NotFound: + pass + else: + raise wsme.exc.ClientSideError(( + "A kubernetes rootca update cannot be done while a platform upgrade is in progress")) + + # There must not be a kubernetes upgrade in progress + try: + pecan.request.dbapi.kube_upgrade_get_one() + except exception.NotFound: + pass + else: + raise wsme.exc.ClientSideError(( + "A kubernetes rootca update cannot be done while a kube upgrade " + "is in progress")) + + # The system must be healthy + healthy, output = pecan.request.rpcapi.get_system_health( + pecan.request.context, + force=force, + kube_rootca_update=True, + alarm_ignore_list=alarm_ignore_list) + if not healthy: + LOG.info("Health query failure during kubernetes rootca update start: %s" + % output) + if os.path.exists(constants.SYSINV_RUNNING_IN_LAB): + LOG.info("Running in lab, ignoring health errors.") + else: + raise wsme.exc.ClientSideError(( + "System is not in a valid state for kubernetes rootca update. " + "Run system health-query for more details.")) + + create_obj = {'state': kubernetes.KUBE_ROOTCA_UPDATE_STARTED} + new_update = pecan.request.dbapi.kube_rootca_update_create(create_obj) + + entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, + constants.CONTROLLER_HOSTNAME) + fault = fm_api.Fault( + alarm_id=fm_constants.FM_ALARM_ID_KUBE_ROOTCA_UPDATE_IN_PROGRESS, + alarm_state=fm_constants.FM_ALARM_STATE_SET, + entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, + entity_instance_id=entity_instance_id, + severity=fm_constants.FM_ALARM_SEVERITY_MINOR, + reason_text="Kubernetes rootca update in progress", + # environmental + alarm_type=fm_constants.FM_ALARM_TYPE_5, + # unspecified-reason + probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_65, + proposed_repair_action="Wait for kubernetes rootca procedure to complete", + service_affecting=False) + self.fm_api.set_fault(fault) + LOG.info("Started kubernetes rootca update") + + return KubeRootCAUpdate.convert_with_links(new_update) + + @wsme_pecan.wsexpose(KubeRootCAUpdate, types.uuid) + def get_one(self, uuid): + """Retrieve information about the given kubernetes rootca update.""" + + rpc_kube_rootca_update = objects.kube_rootca_update.get_by_uuid( + pecan.request.context, uuid) + return KubeRootCAUpdate.convert_with_links(rpc_kube_rootca_update) diff --git a/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py b/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py index a3cfa21be6..c4edf73b62 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py +++ b/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py @@ -76,6 +76,29 @@ KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED = 'upgrading-control-plane-failed' KUBE_HOST_UPGRADING_KUBELET = 'upgrading-kubelet' KUBE_HOST_UPGRADING_KUBELET_FAILED = 'upgrading-kubelet-failed' +# Kubernetes rootca update states +KUBE_ROOTCA_UPDATE_STARTED = 'update-started' +KUBE_ROOTCA_UPDATE_CERT_UPLOADED = 'update-new-rootca-cert-uploaded' +KUBE_ROOTCA_UPDATE_CERT_GENERATED = 'update-new-rootca-cert-generated' +KUBE_ROOTCA_UPDATE_UPDATING_PODS_TRUSTBOTHCAS = 'updating-pods-trustBothCAs' +KUBE_ROOTCA_UPDATE_UPDATED_PODS_TRUSTBOTHCAS = 'updated-pods-trustBothCAs' +KUBE_ROOTCA_UPDATE_UPDATING_PODS_TRUSTBOTHCAS_FAILED = 'updating-pods-trustBothCAs-failed' +KUBE_ROOTCA_UPDATE_UPDATING_PODS_TRUSTNEWCA = 'updating-pods-trustNewCA' +KUBE_ROOTCA_UPDATE_UPDATED_PODS_TRUSTNEWCA = 'updated-pods-trustNewCA' +KUBE_ROOTCA_UPDATE_UPDATING_PODS_TRUSTNEWCA_FAILED = 'updating-pods-trustNewCA-failed' +KUBE_ROOTCA_UPDATE_COMPLETED = 'update-completed' + +# Kubernetes rootca host update states +KUBE_ROOTCA_UPDATING_HOST_TRUSTBOTHCAS = 'updating-host-trustBothCAs' +KUBE_ROOTCA_UPDATED_HOST_TRUSTBOTHCAS = 'updated-host-trustBothCAs' +KUBE_ROOTCA_UPDATING_HOST_TRUSTBOTHCAS_FAILED = 'updating-host-trustBothCAs-failed' +KUBE_ROOTCA_UPDATING_HOST_UPDATECERTS = 'updating-host-updateCerts' +KUBE_ROOTCA_UPDATED_HOST_UPDATECERTS = 'updated-host-updateCerts' +KUBE_ROOTCA_UPDATING_HOST_UPDATECERTS_FAILED = 'updating-host-updateCerts-failed' +KUBE_ROOTCA_UPDATING_HOST_TRUSTNEWCA = 'updating-host-trustNewCA' +KUBE_ROOTCA_UPDATED_HOST_TRUSTNEWCA = 'updated-host-trustNewCA' +KUBE_ROOTCA_UPDATING_HOST_TRUSTNEWCA_FAILED = 'updating-host-trustNewCA-failed' + # Kubernetes constants MANIFEST_APPLY_TIMEOUT = 60 * 15 MANIFEST_APPLY_INTERVAL = 10 diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index 36971377b3..b9a640166b 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -10792,6 +10792,7 @@ class ConductorManager(service.PeriodicService): def get_system_health(self, context, force=False, upgrade=False, kube_upgrade=False, + kube_rootca_update=False, alarm_ignore_list=None): """ Performs a system health check. @@ -10801,6 +10802,8 @@ class ConductorManager(service.PeriodicService): :param upgrade: set to true to perform an upgrade health check :param kube_upgrade: set to true to perform a kubernetes upgrade health check + :param kube_rootca_update: set to true to perform a kubernetes root CA + update health check :param alarm_ignore_list: list of alarm ids to ignore when performing a health check """ @@ -10811,7 +10814,7 @@ class ConductorManager(service.PeriodicService): context=context, force=force, alarm_ignore_list=alarm_ignore_list) - elif kube_upgrade is True: + elif kube_upgrade is True or kube_rootca_update is True: return health_util.get_system_health_kube_upgrade( context=context, force=force, diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py index a181790f8f..a71c30ad00 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py @@ -1396,7 +1396,8 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): success=success)) def get_system_health(self, context, force=False, upgrade=False, - kube_upgrade=False, alarm_ignore_list=None): + kube_upgrade=False, kube_rootca_update=False, + alarm_ignore_list=None): """ Performs a system health check. @@ -1412,6 +1413,7 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): self.make_msg('get_system_health', force=force, upgrade=upgrade, kube_upgrade=kube_upgrade, + kube_rootca_update=kube_rootca_update, alarm_ignore_list=alarm_ignore_list)) def reserve_ip_for_first_storage_node(self, context): diff --git a/sysinv/sysinv/sysinv/sysinv/tests/api/test_kube_rootca_update.py b/sysinv/sysinv/sysinv/sysinv/tests/api/test_kube_rootca_update.py new file mode 100644 index 0000000000..c29e180d34 --- /dev/null +++ b/sysinv/sysinv/sysinv/sysinv/tests/api/test_kube_rootca_update.py @@ -0,0 +1,194 @@ +""" +Tests for the API /kube_rootca_update/ methods. +""" + +import mock +from six.moves import http_client + +from sysinv.common import constants +from sysinv.common import health +from sysinv.common import kubernetes +from sysinv.conductor.manager import ConductorManager + +from sysinv.tests.api import base +from sysinv.tests.db import utils as dbutils +from sysinv.tests.db import base as dbbase + + +class FakeAlarm(object): + def __init__(self, alarm_id, mgmt_affecting): + self.alarm_id = alarm_id + self.mgmt_affecting = mgmt_affecting + + +FAKE_MGMT_ALARM = FakeAlarm('900.401', "True") +# FAKE_NON_MGMT_AFFECTING_ALARM = FakeAlarm('900.400', "False") + + +class FakeFmClient(object): + def __init__(self): + self.alarm = mock.MagicMock() + + +class FakeConductorAPI(object): + + def __init__(self): + self.service = ConductorManager('test-host', 'test-topic') + + def get_system_health(self, context, force=False, upgrade=False, + kube_upgrade=False, kube_rootca_update=False, + alarm_ignore_list=None): + return self.service.get_system_health( + context, + force=force, + upgrade=upgrade, + kube_upgrade=kube_upgrade, + kube_rootca_update=kube_rootca_update, + alarm_ignore_list=alarm_ignore_list) + + +class TestKubeRootCAUpdate(base.FunctionalTest): + + def setUp(self): + super(TestKubeRootCAUpdate, self).setUp() + + # Mock the Conductor API + self.fake_conductor_api = FakeConductorAPI() + # rather than start the fake_conductor_api.service, we stage its dbapi + self.fake_conductor_api.service.dbapi = self.dbapi + p = mock.patch('sysinv.conductor.rpcapi.ConductorAPI') + self.mock_conductor_api = p.start() + self.mock_conductor_api.return_value = self.fake_conductor_api + self.addCleanup(p.stop) + + self.setup_health_mocked_calls() + + def setup_health_mocked_calls(self): + """Mock away the API calls invoked from the health check. + + These calls can be altered by unit tests to test the behaviour + of systems in different states of health. + """ + + # patch_query_hosts + p = mock.patch('sysinv.api.controllers.v1.patch_api.patch_query_hosts') + self.mock_patch_query_hosts = p.start() + self.mock_patch_query_hosts.return_value = self._patch_current() + self.addCleanup(p.stop) + + # _check_alarms calls fmclient alarms.list + self.fake_fm_client = FakeFmClient() + p = mock.patch('sysinv.common.health.fmclient') + self.mock_fm_client = p.start() + self.mock_fm_client.return_value = self.fake_fm_client + self.addCleanup(p.stop) + + # _check_kube_nodes_ready + # returns (Success Boolean, List of failed nodes []) + p = mock.patch.object(health.Health, '_check_kube_nodes_ready') + self.mock_check_kube_nodes_ready = p.start() + self.mock_check_kube_nodes_ready.return_value = (True, []) + self.addCleanup(p.stop) + + # _check_kube_control_plane_pods + # returns (Success Boolean, List of failed pods []) + p = mock.patch.object(health.Health, '_check_kube_control_plane_pods') + self.mock_check_kube_control_plane_pods = p.start() + self.mock_check_kube_control_plane_pods.return_value = (True, []) + self.addCleanup(p.stop) + + def _patch_current(self, bool_val=True): + return { + 'data': [ + {'hostname': 'controller-0', + 'patch_current': bool_val, + }, + ] + } + + +class TestPostKubeRootUpdate(TestKubeRootCAUpdate, + dbbase.ProvisionedControllerHostTestCase): + + def test_create(self): + # Test creation of kubernetes rootca update + create_dict = dbutils.get_test_kube_rootca_update() + result = self.post_json('/kube_rootca_update', create_dict, + headers={'User-Agent': 'sysinv-test'}) + + # Verify that the kubernetes rootca update has the expected attributes + self.assertEqual(result.json['state'], + kubernetes.KUBE_ROOTCA_UPDATE_STARTED) + + def test_create_rootca_update_unhealthy_from_alarms(self): + """ Test creation of kube rootca update while there are alarms""" + # Test creation of kubernetes rootca update when system health check fails + # 1 alarm will return False + self.fake_fm_client.alarm.list.return_value = \ + [FAKE_MGMT_ALARM, ] + + # Test creation of kubernetes rootca update + create_dict = dbutils.get_test_kube_rootca_update() + result = self.post_json('/kube_rootca_update', create_dict, + headers={'User-Agent': 'sysinv-test'}, + expect_errors=True) + + # Verify that the rootca update has the expected attributes + self.assertEqual(result.content_type, 'application/json') + self.assertEqual(http_client.BAD_REQUEST, result.status_int) + # The error should contain the following: + # System is not in a valid state for kubernetes rootca update. + # Run system health-query-kube-rootca-update for more details. + self.assertIn("System is not in a valid state", + result.json['error_message']) + + def test_create_rootca_update_exists(self): + # Test creation of rootca update when a kubernetes rootca update already exists + dbutils.create_test_kube_rootca_update() + create_dict = dbutils.post_get_test_kube_rootca_update(state=kubernetes.KUBE_ROOTCA_UPDATE_STARTED) + result = self.post_json('/kube_rootca_update', create_dict, + headers={'User-Agent': 'sysinv-test'}, + expect_errors=True) + + # Verify the failure + self.assertEqual(result.content_type, 'application/json') + self.assertEqual(http_client.BAD_REQUEST, result.status_int) + self.assertIn("A kubernetes rootca update is already in progress", + result.json['error_message']) + + def test_create_kube_upgrade_exists(self): + # Test creation of rootca update when kubernetes upgrade in progress + dbutils.create_test_kube_upgrade( + from_version='v1.42.1', + to_version='v1.42.2', + state=kubernetes.KUBE_UPGRADING_FIRST_MASTER, + ) + create_dict = dbutils.post_get_test_kube_rootca_update() + result = self.post_json('/kube_rootca_update', create_dict, + headers={'User-Agent': 'sysinv-test'}, + expect_errors=True) + + # Verify the failure + self.assertEqual(result.content_type, 'application/json') + self.assertEqual(http_client.BAD_REQUEST, result.status_int) + self.assertIn("rootca update cannot be done while a kube upgrade " + "is in progress", + result.json['error_message']) + + def test_create_platform_upgrade_exists(self): + # Test creation of rootca update when platform upgrade in progress + dbutils.create_test_load(software_version=dbutils.SW_VERSION_NEW, + compatible_version=dbutils.SW_VERSION, + state=constants.IMPORTED_LOAD_STATE) + dbutils.create_test_upgrade() + + create_dict = dbutils.post_get_test_kube_rootca_update() + result = self.post_json('/kube_rootca_update', create_dict, + headers={'User-Agent': 'sysinv-test'}, + expect_errors=True) + + # Verify the failure + self.assertEqual(result.content_type, 'application/json') + self.assertEqual(http_client.BAD_REQUEST, result.status_int) + self.assertIn("rootca update cannot be done while a platform upgrade", + result.json['error_message'])